New regime

This commit is contained in:
Henry Whitney 2020-06-04 17:19:00 -04:00
parent 57fcb466a5
commit 3ec88d158b
3 changed files with 64996 additions and 65161 deletions

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,7 @@ HTML browser: /Applications/Firefox.app
Repository directory: /Users/Henry/Documents/WACS Repository directory: /Users/Henry/Documents/WACS
translationNotes path: en_tn translationNotes path: en_tn
translationWords path: bible.en_tw.kt translationWords path: Restructure/bible/kt
Unlocked Literal Bible path: en_ulb Unlocked Literal Bible path: en_ulb
# translationNotes path: gl_.*_tn # translationNotes path: gl_.*_tn
# translationWords path: gl_.*_bible.en_tw # translationWords path: gl_.*_bible.en_tw

View File

@ -48,11 +48,11 @@ my ($cv, $ULBfile, $exceptions, $missing, $output, $workFile) =
"$Bin${d}User${d}tW_work.txt", "$Bin${d}User${d}tW_work.txt",
); );
my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir); my ($sourceFile, $topTwDir, $textEditor, $repoPath, $topSourceLangDir, $topOTSourceLangDir, $topNTSourceLangDir, $twPath);
my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages, my (%entries, %text, %ref, %order, %pages, %listOfPages, %sourcePage, %checkPages, %foundPages, %substitutedPages,
%specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs, %specifiedText, %realPage, %checkPage, %workEntries, %vsn, %StrongNum, %fullText, %ulbOrder, %bkAbr, %bkFull, %relevantSNs,
%SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pageThisEntry, %adjust, %newRef); %SNsInCV, %entriesThisSN, %entriesThisPage, %pagesThisSN, %pagesThisEntry, %adjust, %deleteNum, %newRef, %specifiedEntries);
my ($book, $testament); my ($book, $testament);
my (@fileList); my (@fileList);
@ -76,11 +76,11 @@ while (<DATA>) {
GetUserDefaults(); GetUserDefaults();
GetULBBooksToProcess(); GetULBBooksToProcess();
#ReadExceptions(); ReadExceptions();
#close LOG; close LOG;
#open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!"; open LOG, ">:utf8", "Logs${d}tW_pairs.log" or die "Logs${d}tW_pairs.log: $!";
#PairtWEntriesTotWPageAndUniqSNs(); PairtWEntriesTotWPageAndUniqSNs();
#close LOG; close LOG;
#open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!"; #open LOG, ">:utf8", "Logs${d}tWs_from_MAST.log" or die "Logs${d}tWs_from_MAST.log: $!";
#ReadLinkedSNs(); #ReadLinkedSNs();
#LinkULBtoCV(); #LinkULBtoCV();
@ -110,6 +110,8 @@ sub GetUserDefaults {
} elsif ($thisLine =~ /^Repository directory: (.*)$/) { } elsif ($thisLine =~ /^Repository directory: (.*)$/) {
$repoPath = $1; $repoPath = $1;
#say $repoPath; die; #say $repoPath; die;
} elsif ($thisLine =~ /^translationWords path: (.*)$/) {
$twPath = $1;
} }
} }
@ -117,7 +119,7 @@ sub GetUserDefaults {
die "No text editor found" if $textEditor eq ""; die "No text editor found" if $textEditor eq "";
die "No path to repo found" if $repoPath eq ""; die "No path to repo found" if $repoPath eq "";
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}en_tw${d}bible", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT"); ($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}$twPath", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
close $defaults; close $defaults;
} }
@ -155,16 +157,26 @@ sub ReadExceptions {
while (my $line = <$file>) { while (my $line = <$file>) {
chomp $line; chomp $line;
my $rf; my $rf;
if ($line =~ /^([^#\n][^\t\n]*)\t([^\t\n]*\t[^\t\n]*)/) { if ($line =~ /^([^#\n][^\t\n]*)\t(\d+)\t\|\|/) {
my ($oldNew) = ($2);
$rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($deleteNum{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1;
} elsif ($line =~ /^([^#\n][^\t\n]*)\t(\d+\t\d+)/) {
my ($oldNew) = ($2); my ($oldNew) = ($2);
$rf = $1; $rf = $1;
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew"; #say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
($adjust{$rf}) .= "$oldNew√"; ($adjust{$rf}) .= "$oldNew√";
$specifiedText{$rf} = 1; $specifiedText{$rf} = 1;
} }
elsif ($line =~ /^([^#\n\t][^\t\n]*)\t(\d+\t[^\t\n]*\t[^\t\n]*)/) {
my ($rf, $sn, $snippet, $page) = ($1, $2, $3, $4);
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
$specifiedEntries{$rf} = "$sn≈$snippet≈$page√";
$specifiedText{$rf} = 1;
} }
foreach my $key (sort keys %adjust) {
say LOG "\$key: $key: \$adjust{$key}: $adjust{$key}"
} }
close $file; close $file;
} }
@ -186,45 +198,47 @@ sub PairtWEntriesTotWPageAndUniqSNs {
#say "|$shortFile|"; die; #say "|$shortFile|"; die;
#if ($shortFile =~ /^(kt|names)/) { #if ($shortFile =~ /^(kt|names)/) {
#my $fileText = read_file("$file", binmode => 'utf8'); #my $fileText = read_file("$file", binmode => 'utf8');
open IN, $file or die "$!"; my $fileText = read_file("$file", binmode => 'utf8');
while (<IN>) { $fileText =~ s/$/\n/;
if (/^# ([^\n]*)$/) { #say LOG "=====\n$file:\n$fileText";
if ($fileText =~ /## Forms Found in the English ULB:\n\n([^\n]*)\n/s) {
$thisList = $1; $thisList = $1;
$thisList =~ s/[\r\n]*$//; say LOG "$file: $thisList";
#say LOG "\$thisList = |$thisList|"; $thisList =~ s/[\r\n]+$//;
$thisList =~ s/ \([^\)]*\)//g;
$entriesThisPage{$shortFile} = $thisList;
my @ULBEntries = split /, /, $thisList; my @ULBEntries = split /, /, $thisList;
foreach my $ULB_entry(@ULBEntries) {
$pageThisEntry{$ULB_entry} = $shortFile;
say LOG "\$pageThisEntry{$ULB_entry}: $pageThisEntry{$ULB_entry}"
}
my @tempArray = split /, /, $thisList; my @tempArray = split /, /, $thisList;
foreach my $slice (@tempArray) { foreach my $ULB_entry(@ULBEntries) {
$sourcePage{$slice} = $shortFile; if ($pagesThisEntry{$ULB_entry} =~ /^.+$/) {
say LOG "$slice: \$sourcePage{$slice}: $sourcePage{$slice}"; say LOG "*!!*"
}
$pagesThisEntry{$ULB_entry} .= "$shortFile, ";
$sourcePage{$ULB_entry} = $shortFile;
say LOG "\$pagesThisEntry{$ULB_entry}: $pagesThisEntry{$ULB_entry}, \$sourcePage{$ULB_entry}: $sourcePage{$ULB_entry}, \$shortFile: $shortFile"
} }
} }
if (/Strong's(.*)$/) { if ($fileText =~ /Strong's([^\n]*)\n/) {
my $SNs = $1; my $SNs = $1;
while ($SNs =~ s/[G](\d*)//) { while ($SNs =~ s/([GH]\d*)//) {
push @relevantSNs, $1; my $thisSN = $1;
$entriesThisSN{$1} .= "$thisList, "; say LOG "! $shortFile ! $thisSN !";
$pagesThisSN{$1} .= "$shortFile, "; push @relevantSNs, $thisSN;
$entriesThisSN{$thisSN} .= "$thisList, ";
$pagesThisSN{$thisSN} .= "$shortFile, ";
} }
} }
}
@relevantSNs = uniq(@relevantSNs); @relevantSNs = uniq(@relevantSNs);
foreach (@relevantSNs) { foreach (@relevantSNs) {
$relevantSNs{$_} = "$_"; $relevantSNs{$_} = "$_";
} }
}
close IN;
}
say ""; say "";
#say LOG "===="; #say LOG "====";
#say LOG "===="; #say LOG "====";
foreach my $thisSN (sort keys %entriesThisSN) { foreach my $thisSN (sort keys %entriesThisSN) {
say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}" say LOG "tW entries for $thisSN: $entriesThisSN{$thisSN}, \$pagesThisSN{$1}: $pagesThisSN{$1}"
} }
} }
@ -353,26 +367,26 @@ sub LinkSNsToULBtextViaEntries {
if ($thisNum =~ /(\d+)\[(.*?)\]/) { if ($thisNum =~ /(\d+)\[(.*?)\]/) {
($thisNum) = ($1); ($thisNum) = ($1);
my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2); my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2);
unless (exists $pageThisEntry{$forced_entry_for_page}) { unless (exists $pagesThisEntry{$forced_entry_for_page}) {
my $try = lc $forced_entry_for_page; my $try = lc $forced_entry_for_page;
if (exists $pageThisEntry{$try}) { if (exists $pagesThisEntry{$try}) {
$forced_entry_for_page = lc $forced_entry_for_page $forced_entry_for_page = lc $forced_entry_for_page
} }
else { else {
say "!!!\tNo \$pageThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page"; say "!!!\tNo \$pagesThisEntry{$try} found for $try\n\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page\n$ref{$thisRef}\t$thisNum\t$forced_entry_for_page";
#die #die
} }
} }
say LOG say LOG
"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}"; "*0* \$forced_entry_for_display: $forced_entry_for_display, \$pagesThisEntry{$forced_entry_for_page}: $pagesThisEntry{$forced_entry_for_page}";
#while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {} #while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {}
$forced_entry_for_search = lc $forced_entry_for_display; $forced_entry_for_search = lc $forced_entry_for_display;
#while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {} #while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {}
say LOG say LOG
"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<, "*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<,
\$forced_entry_for_search: >>$forced_entry_for_search<< \$forced_entry_for_search: >>$forced_entry_for_search<<
\$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}"; \$pagesThisEntry{$forced_entry_for_page}: $pagesThisEntry{$forced_entry_for_page}";
$outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n"; $outString .= "[$forced_entry_for_display]($pagesThisEntry{$forced_entry_for_page})\n";
say LOG "\t\t$outString:\n$outString"; say LOG "\t\t$outString:\n$outString";
if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) { if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) {
my ($first, $second, $third) = ($1, $2, $3); my ($first, $second, $third) = ($1, $2, $3);
@ -416,13 +430,13 @@ sub LinkSNsToULBtextViaEntries {
print LOG "\$testEntry: |$testEntry| "; print LOG "\$testEntry: |$testEntry| ";
if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) { if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) {
say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n==="; say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n"; $outString .= "[$entry]($pagesThisEntry{$entry})\n";
say LOG $outString . "\n===" . $text{$thisCV}; say LOG $outString . "\n===" . $text{$thisCV};
$found = 1; $found = 1;
goto Breakout; goto Breakout;
} elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i || $text{$thisCV} =~ s/\b($testEntry)["']//i || $text{$thisCV} =~ s/["']($testEntry)\b//i) { } elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i || $text{$thisCV} =~ s/\b($testEntry)["']//i || $text{$thisCV} =~ s/["']($testEntry)\b//i) {
say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n==="; say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n===";
$outString .= "[$entry]($pageThisEntry{$entry})\n"; $outString .= "[$entry]($pagesThisEntry{$entry})\n";
say LOG $outString . "\n" . $text{$thisCV}; say LOG $outString . "\n" . $text{$thisCV};
$found = 1; $found = 1;
goto Breakout; goto Breakout;
@ -473,7 +487,7 @@ sub Adjust {
my $adj = $1; my $adj = $1;
say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj"; say LOG "*3*\t\$adjustment: $adjustment, \$adj: $adj";
if ($adj =~ /([^\t]*)\t([^\t]*)/) { if ($adj =~ /([^\t]*)\t([^\t]*)/) {
$snsNew = "[$1]($pageThisEntry{$2})" $snsNew = "[$1]($pagesThisEntry{$2})"
} else { } else {
$snsNew .= "$adj " $snsNew .= "$adj "
} }