New regime

This commit is contained in:
Henry Whitney 2020-06-04 17:19:00 -04:00
parent 57fcb466a5
commit 3ec88d158b
3 changed files with 64996 additions and 65161 deletions

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,7 @@ HTML browser: /Applications/Firefox.app
Repository directory: /Users/Henry/Documents/WACS
translationNotes path: en_tn
translationWords path: bible.en_tw.kt
translationWords path: Restructure/bible/kt
Unlocked Literal Bible path: en_ulb
# translationNotes path: gl_.*_tn
# translationWords path: gl_.*_bible.en_tw

View File

@ -48,11 +48,11 @@ my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
"$Bin${d}User${d}tW_work.txt",
);
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir);
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir, $twPath);
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef);
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %specifiedEntries);
my ($book, $testament);
my (@fileList);
@ -76,11 +76,11 @@ while (<DATA>) {
GetUserDefaults();
GetULBBooksToProcess();
#ReadExceptions();
#close LOG;
#open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
#PairtWEntriesTotWPageAndUniqSNs();
#close LOG;
ReadExceptions();
close LOG;
open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
PairtWEntriesTotWPageAndUniqSNs();
close LOG;
#open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
#ReadLinkedSNs();
#LinkULBtoCV();
@ -110,6 +110,8 @@ sub GetUserDefaults {
} elsif ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1;
#say $repoPath; die;
} elsif ($thisLine =~ /^translationWords path: (.*)$/) {
$twPath = $1;
}
}
@ -117,7 +119,7 @@ sub GetUserDefaults {
die "No text editor found" if $textEditor eq "";
die "No path to repo found" if $repoPath eq "";
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}$twPath", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
close $defaults;
}
@ -155,16 +157,26 @@ sub ReadExceptions {
while (my $line = <$file>) {
chomp $line;
my $rf;
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) {
if ($line =~ /^([^#\n][^\t\n]*)\t(\d+)\t\|\|/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($deleteNum{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
} elsif ($line =~ /^([^#\n][^\t\n]*)\t(\d+\t\d+)/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($adjust{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
}
}
foreach my $key (sort keys %adjust) {
say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
elsif ($line =~ /^([^#\n\t][^\t\n]*)\t(\d+\t[^\t\n]*\t[^\t\n]*)/) {
my ($rf, $sn, $snippet, $page) = ($1, $2, $3, $4);
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
$specifiedEntries{$rf} = "$sn≈$snippet≈$page√";
$specifiedText{$rf} = 1;
}
}
close $file;
}
@ -186,45 +198,47 @@ sub PairtWEntriesTotWPageAndUniqSNs {
#say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8');
open IN, $file or die "$!";
while (<IN>) {
if (/^# ([^\n]*)$/) {
$thisList = $1;
$thisList =~ s/[\r\n]*$//;
#say LOG "\$thisList = |$thisList|";
$thisList =~ s/ \([^\)]*\)//g;
$entriesThisPage{$shortFile} = $thisList;
my @ULBEntries = split /, /, $thisList;
foreach my $ULB_entry(@ULBEntries) {
$pageThisEntry{$ULB_entry} = $shortFile;
say LOG "\$pageThisEntry{$ULB_entry}: $pageThisEntry{$ULB_entry}"
}
my @tempArray = split /, /, $thisList;
foreach my $slice (@tempArray) {
$sourcePage{$slice} = $shortFile;
say LOG "$slice: \$sourcePage{$slice}: $sourcePage{$slice}";
}
}
if (/Strong's(.*)$/) {
my $SNs = $1;
while ($SNs =~ s/[G](\d*)//) {
push @relevantSNs, $1;
$entriesThisSN{$1} .= "$thisList, ";
$pagesThisSN{$1} .= "$shortFile, ";
}
}
@relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) {
$relevantSNs{$_} = "$_";
my $fileText = read_file("$file", binmode => 'utf8');
$fileText =~ s/$/\n/;
#say LOG "=====\n$file:\n$fileText";
if ($fileText =~ /## Forms Found in the English ULB:\n\n([^\n]*)\n/s) {
$thisList = $1;
say LOG "$file: $thisList";
$thisList =~ s/[\r\n]+$//;
my @ULBEntries = split /, /, $thisList;
my @tempArray = split /, /, $thisList;
foreach my $ULB_entry(@ULBEntries) {
if ($pagesThisEntry{$ULB_entry} =~ /^.+$/) {
say LOG "*!!*"
}
$pagesThisEntry{$ULB_entry} .= "$shortFile, ";
$sourcePage{$ULB_entry} = $shortFile;
say LOG "\$pagesThisEntry{$ULB_entry}: $pagesThisEntry{$ULB_entry}, \$sourcePage{$ULB_entry}: $sourcePage{$ULB_entry}, \$shortFile: $shortFile"
}
close IN;
}
if ($fileText =~ /Strong's([^\n]*)\n/) {
my $SNs = $1;
while ($SNs =~ s/([GH]\d*)//) {
my $thisSN = $1;
say LOG "! $shortFile ! $thisSN !";
push @relevantSNs, $thisSN;
$entriesThisSN{$thisSN} .= "$thisList, ";
$pagesThisSN{$thisSN} .= "$shortFile, ";
}
}
}
@relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) {
$relevantSNs{$_} = "$_";
}
say "";
#say LOG "====";
#say LOG "====";
foreach my $thisSN (sort keys %entriesThisSN) {
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}"
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}, \$pagesThisSN{$1}: $pagesThisSN{$1}"
}
}
@ -353,26 +367,26 @@ sub LinkSNsToULBtextViaEntries {
if ($thisNum =~ /(\d+)\[(.*?)\]/) {
($thisNum) = ($1);
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
unless (exists $pageThisEntry{$forced_entry_for_page}) {
unless (exists $pagesThisEntry{$forced_entry_for_page}) {
my $try = lc $forced_entry_for_page;
if (exists $pageThisEntry{$try}) {
if (exists $pagesThisEntry{$try}) {
$forced_entry_for_page = lc $forced_entry_for_page
}
else {
say "!!!\tNo \$pageThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
say "!!!\tNo \$pagesThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
#die
}
}
say LOG
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pagesThisEntry{$forced_entry_for_page}: $pagesThisEntry{$forced_entry_for_page}";
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
$forced_entry_for_search = lc $forced_entry_for_display;
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
say LOG
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
\$forced_entry_for_search: >>$forced_entry_for_search<<
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}";
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n";
\$pagesThisEntry{$forced_entry_for_page}: $pagesThisEntry{$forced_entry_for_page}";
$outString .= "[$forced_entry_for_display]($pagesThisEntry{$forced_entry_for_page})\n";
say LOG "\t\t$outString:\n$outString";
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second, $third) = ($1, $2, $3);
@ -416,13 +430,13 @@ sub LinkSNsToULBtextViaEntries {
print LOG "\$testEntry: |$testEntry| ";
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
$outString .= "[$entry]($pagesThisEntry{$entry})\n";
say LOG $outString . "\n===" . $text{$thisCV};
$found = 1;
goto Breakout;
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i || $text{$thisCV} =~ s/\b($testEntry)["']//i || $text{$thisCV} =~ s/["']($testEntry)\b//i) {
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n";
$outString .= "[$entry]($pagesThisEntry{$entry})\n";
say LOG $outString . "\n" . $text{$thisCV};
$found = 1;
goto Breakout;
@ -473,7 +487,7 @@ sub Adjust {
my $adj = $1;
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
if ($adj =~ /([^\t]*)\t([^\t]*)/) {
$snsNew = "[$1]($pageThisEntry{$2})"
$snsNew = "[$1]($pagesThisEntry{$2})"
} else {
$snsNew .= "$adj "
}