From fbe92310d7c4202a83c403e9901493eb729cf71e Mon Sep 17 00:00:00 2001 From: Henry Whitney Date: Thu, 7 Nov 2019 14:34:16 -0500 Subject: [PATCH] PDF Isaiah 1-15 --- .../Exceptions_tWs_from_MAST_HB.txt | 21 +++---- .../Output/Entries_not_handled.txt | 13 ----- .../FilesForUpdates/get_strongs_gist.pl | 4 ++ .../FilesForUpdates/tWs.from.MAST_HB.pl | 34 +++++++---- .../FilesForUpdates/tWs.from.MAST_NT.pl | 57 ++++++++++++------- 5 files changed, 73 insertions(+), 56 deletions(-) diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions_tWs_from_MAST_HB.txt b/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions_tWs_from_MAST_HB.txt index 6003ed7..b912919 100644 --- a/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions_tWs_from_MAST_HB.txt +++ b/MAST_tW_PDF_Updater/FilesForUpdates/Exceptions/Exceptions_tWs_from_MAST_HB.txt @@ -21042,7 +21042,7 @@ Isaiah 13:17 5782 || Isaiah 13:18 990 || Isaiah 13:18 5869 || Isaiah 13:18 7198 || -Isaiah 13:19 8597 +Isaiah 13:19 8597 splendor Isaiah 13:20 7931 || Isaiah 13:21 1323 || Isaiah 13:21 7931 || @@ -21091,7 +21091,6 @@ Isaiah 14:21 3559 || Isaiah 14:21 5892 || Isaiah 14:21 6440 || Isaiah 14:22 5209 descendant -Isaiah 14:22 5220 Isaiah 14:22 6635 || Isaiah 14:22 7605 || Isaiah 14:23 98 || @@ -21106,28 +21105,24 @@ Isaiah 14:27 6635 || Isaiah 14:27 7725 || Isaiah 14:29 3318 || Isaiah 14:29 5221 || -Isaiah 14:29 6848 -Isaiah 14:29 8314 +Isaiah 14:29 6848 adder +Isaiah 14:29 8314 serpent Isaiah 14:31 935 || -Isaiah 14:31 3213 Isaiah 14:31 5892 || Isaiah 14:32 6030 || Isaiah 15:2 1116 || -Isaiah 15:2 3213 +Isaiah 15:2 3213 wails Isaiah 15:2 5927 || Isaiah 15:2 7144 || -Isaiah 15:3 1065 Isaiah 15:3 1406 || -Isaiah 15:3 2296 -Isaiah 15:3 3213 Isaiah 15:3 3381 || +Isaiah 15:3 2296 || Isaiah 15:4 2502 || -Isaiah 15:4 3415 +Isaiah 15:4 3415 tremble Isaiah 15:4 5315 || -Isaiah 15:4 7321 +Isaiah 15:4 7321 cry out Isaiah 15:5 1870 || -Isaiah 15:5 2201 -Isaiah 15:5 5697 +Isaiah 15:5 5697 || Isaiah 15:5 5782 || Isaiah 15:5 5927 || Isaiah 15:5 7992 || diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/Output/Entries_not_handled.txt b/MAST_tW_PDF_Updater/FilesForUpdates/Output/Entries_not_handled.txt index b68f064..638aafc 100644 --- a/MAST_tW_PDF_Updater/FilesForUpdates/Output/Entries_not_handled.txt +++ b/MAST_tW_PDF_Updater/FilesForUpdates/Output/Entries_not_handled.txt @@ -1,14 +1,3 @@ -Isaiah 13:19 8597 -Isaiah 14:22 5220 -Isaiah 14:31 3213 -Isaiah 15:2 3213 -Isaiah 15:3 2296 -Isaiah 15:3 3213 -Isaiah 15:3 1065 -Isaiah 15:4 7321 -Isaiah 15:4 3415 -Isaiah 15:5 5697 -Isaiah 15:5 2201 Isaiah 16:1 4057 Isaiah 16:3 5080 Isaiah 16:4 5080 @@ -66,14 +55,12 @@ Isaiah 22:21 4475 Isaiah 22:23 1 Isaiah 22:25 6635 Isaiah 22:25 4853 -Isaiah 23:1 3213 Isaiah 23:1 7703 Isaiah 23:4 4581 Isaiah 23:4 1330 Isaiah 23:9 6635 Isaiah 23:11 6680 Isaiah 23:13 6728 -Isaiah 23:14 3213 Isaiah 23:17 8141 Isaiah 23:17 868 Isaiah 23:17 127 diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/get_strongs_gist.pl b/MAST_tW_PDF_Updater/FilesForUpdates/get_strongs_gist.pl index abddda0..0e80817 100644 --- a/MAST_tW_PDF_Updater/FilesForUpdates/get_strongs_gist.pl +++ b/MAST_tW_PDF_Updater/FilesForUpdates/get_strongs_gist.pl @@ -29,6 +29,10 @@ my $fileText = read_file("$Bin${d}Temp${d}Strongs_raw.html", binmode => 'utf8'); $title = $1; } + my $short_title = $title; + $short_title =~ s/^[^:]*: //; + + $html_tof =~ s/<\/head>/\t$short_title<\/title>\n$&/; $html_tof =~ s/<body>/$&\n<p>$title<\/p>/; if ($language eq "greek") { diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_HB.pl b/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_HB.pl index 0673ad8..cbe94f4 100644 --- a/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_HB.pl +++ b/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_HB.pl @@ -341,12 +341,22 @@ sub LinkSNsToULBtextViaEntries { my ($found, $specPage); if ($thisNum =~ /(\d+)\[(.*?)\]/) { ($thisNum) = ($1); - my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, lc $2, lc $2); + my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2); + unless (exists $pageThisEntry{$forced_entry_for_page}) { + my $try = lc $forced_entry_for_page; + if (exists $pageThisEntry{$try}) { + $forced_entry_for_page = lc $forced_entry_for_page + } + else { + say "\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page"; + die + } + } say LOG "*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}"; #while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {} $forced_entry_for_search = lc $forced_entry_for_display; - while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {} + #while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {} say LOG "*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<, \$forced_entry_for_search: >>$forced_entry_for_search<< @@ -355,15 +365,15 @@ sub LinkSNsToULBtextViaEntries { say LOG "\t\t$outString:\n$outString"; if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) { my ($first, $second, $third) = ($1, $2, $3); - $text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/; + $text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i; say LOG "\t*\t$text{$thisCV}"; } elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) { my ($first, $second) = ($1, $2); - $text{$thisCV} =~ s/$first(.*?)$second/$1/; + $text{$thisCV} =~ s/$first(.*?)$second/$1/i; say LOG "\t**\t$text{$thisCV}"; } else { #say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|"; - $text{$thisCV} =~ s/$forced_entry_for_search//; + $text{$thisCV} =~ s/$forced_entry_for_search//i; say LOG "\t***\t$text{$thisCV}"; } next; @@ -380,15 +390,14 @@ sub LinkSNsToULBtextViaEntries { $workEntries{$thisNum} = $entriesThisSN{$thisNum}; } $workEntries{$thisNum} =~ s/, $//; - say LOG "**\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<"; + say LOG "*D*\t\$thisNum: >$thisNum<\t\$workEntries{$thisNum}: >$workEntries{$thisNum}<"; my @beforeArray = split /, /, $workEntries{$thisNum}; my @sortedArray = reverse sort { substr($a,0,1) <=> substr($b,0,1) || length($a) <=> length($b) || $a <=> $b } @beforeArray; $" = "\n\t"; - say LOG "\@sortedArray:\n@sortedArray"; - #say LOG "\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}"; + say LOG "*E*\t\@sortedArray: @sortedArray\n\$outString: $outString\n\$text{$ref{$thisRef}}: $text{$ref{$thisRef}}"; foreach my $entry (@sortedArray) { my $testEntry = $entry; #print LOG "\$entry: $entry. Becomes "; @@ -581,17 +590,22 @@ sub ProperOrderOutString { my ($ulb, $fileLoc) = ($2, $3); $ulb =~ s/ \.\.\. /.*?/g; say LOG "\t>>\t\$ulb: $ulb, \$fileLoc: $fileLoc, \$fullText{$thisCV}\n$fullText{$thisCV}"; - if ($ulb =~ /^(.*)\.\.\.(.*)$/) { + if ($ulb =~ /^(.*)\.\*\?(.*)$/) { +# if ($ulb =~ /^(.*)\.\.\.(.*)$/) { + say LOG "\t>>\tGap found"; my ($found1, $found2, $blank1, $blank2) = ($1, $2, "", ""); while (length $blank1 < length $found1) {$blank1 .= " "} while (length $blank2 < length $found2) {$blank2 .= " "} - if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*)\b$found1\b(.*)$/$1$blank1$2$blank2$3/i) { + #say LOG "\t\t\$found1: $found1\t\$found2: $found2"; + say LOG "Looking for\ns/^(.*?)$found1(.*?)$found2(.*)\$\nin\n$fullText{$thisCV}"; + if ($fullText{$thisCV} =~ s/^(.*?)\b$found1\b(.*?)\b$found2\b(.*)$/$1$blank1$2$blank2$3/i) { say LOG "\t>>>\t$fullText{$thisCV}"; my ($order) = (length $1); $orderedSet{$order} = $thisSet; say LOG "\t\t\t\$order: $order\t \$orderedSet{$order}: $orderedSet{$order}" } } else { + say LOG "\t>>\tGap not found"; my $blank = ""; while (length $blank < length $ulb) {$blank .= " "} if ($fullText{$thisCV} =~ s/^(.*?)\b$ulb[^\w'](.*)$/$1$blank$2/i) { diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_NT.pl b/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_NT.pl index 846a3f9..1125146 100644 --- a/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_NT.pl +++ b/MAST_tW_PDF_Updater/FilesForUpdates/tWs.from.MAST_NT.pl @@ -309,14 +309,15 @@ sub LinkSNsToULBtextViaEntries { $SNsInCV{$thisCV} =~ s/^ +(.*)/$1/; $SNsInCV{$thisCV} =~ s/(.*) +$/$1/; $SNsInCV{$thisCV} =~ s/ {2,}/ /g; + $SNsInCV{$thisCV} =~ s/^√+//; + $SNsInCV{$thisCV} =~ s/√{2,}/√/g; $SNsInCV{$thisCV} =~ s/√ /√/g; $SNsInCV{$thisCV} =~ s/√$//; - say LOG "*7*\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<"; + say LOG "**\t\$SNsInCV{$thisCV}: >$SNsInCV{$thisCV}<"; } #say LOG "<>\t<>\t\$SNsInCV{$thisCV}: |$SNsInCV{$thisCV}|"; - $SNsInCV{$thisCV} =~ s/ \.\.\. /.*?/g; say LOG "*7*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}"; - while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {}; +# while ($SNsInCV{$thisCV} =~ s/(\[[^\]]*?) ([^\]]*?\] )/$1√$2/) {}; say LOG "*8*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}"; my @tempArray = split /√/, $SNsInCV{$thisCV}; my %alreadyUsed; @@ -331,31 +332,43 @@ sub LinkSNsToULBtextViaEntries { $" = "|\n"; say LOG "*9*\t\$SNsInCV{$thisCV}:$SNsInCV{$thisCV}\n\@regArray: >@regArray<"; foreach my $thisNum (@regArray) { + say LOG "\$thisNum: $thisNum"; my ($found, $specPage); if ($thisNum =~ /(\d+)\[(.*?)\]/) { ($thisNum) = ($1); - my ($forced_entry_for_display) = ($2); - say LOG "\$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_display}: $pageThisEntry{$forced_entry_for_display}"; - - while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {} - my $forced_entry_for_search = $forced_entry_for_display; - $pageThisEntry{$forced_entry_for_search} = $pageThisEntry{$forced_entry_for_display}; - while ($forced_entry_for_display =~ s/\.\*\?/ ... /) {} - say LOG "*A*\t\$thisNum: >>$thisNum<<\t\$forced_entry_for_display: >$forced_entry_for_display<, - \$forced_entry_for_search: >>$forced_entry_for_search<<\n\t\$pageThisEntry{$forced_entry_for_display}: $pageThisEntry{$forced_entry_for_display}"; - $outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_display})\n"; + my ($forced_entry_for_display, $forced_entry_for_page, $forced_entry_for_search) = ($2, $2, $2); + unless (exists $pageThisEntry{$forced_entry_for_page}) { + my $try = lc $forced_entry_for_page; + if (exists $pageThisEntry{$try}) { + $forced_entry_for_page = lc $forced_entry_for_page + } + else { + say "\$thisNum: $thisNum\t\$forced_entry_for_page: $forced_entry_for_page"; + die + } + } + say LOG +"*0* \$forced_entry_for_display: $forced_entry_for_display, \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}"; + #while ($forced_entry_for_display =~ s/([^ \]]*?)√([^ \]]*?)/$1 $2/) {} + $forced_entry_for_search = lc $forced_entry_for_display; + #while ($forced_entry_for_search =~ s/ ... /\.\*\?/) {} + say LOG +"*A* \$thisNum: >>$thisNum<< \$forced_entry_for_display: >$forced_entry_for_display<, + \$forced_entry_for_search: >>$forced_entry_for_search<< + \$pageThisEntry{$forced_entry_for_page}: $pageThisEntry{$forced_entry_for_page}"; + $outString .= "[$forced_entry_for_display]($pageThisEntry{$forced_entry_for_page})\n"; say LOG "\t\t$outString:\n$outString"; if ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*) \.\.\. ([^,]*)/) { my ($first, $second, $third) = ($1, $2, $3); - $text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/; + $text{$thisCV} =~ s/$first(.*?)$second(.*?)$third/$1 $2/i; say LOG "\t*\t$text{$thisCV}"; } elsif ($forced_entry_for_search =~ /([^,]*) \.\.\. ([^,]*)/) { my ($first, $second) = ($1, $2); - $text{$thisCV} =~ s/$first(.*?)$second/$1/; + $text{$thisCV} =~ s/$first(.*?)$second/$1/i; say LOG "\t**\t$text{$thisCV}"; } else { #say "\$text{$thisCV}:\n$text{$thisCV}\n\$forced_entry_for_search: |$forced_entry_for_search|"; - $text{$thisCV} =~ s/$forced_entry_for_search//; + $text{$thisCV} =~ s/$forced_entry_for_search//i; say LOG "\t***\t$text{$thisCV}"; } next; @@ -365,6 +378,7 @@ sub LinkSNsToULBtextViaEntries { } else { say LOG "*C*\t\$thisNum: $thisNum"; } + say LOG "\t\$specPage: $specPage"; if ($specPage) { $workEntries{$thisNum} = $entriesThisPage{$specPage}; } else { @@ -387,12 +401,12 @@ sub LinkSNsToULBtextViaEntries { if ($testEntry =~ /\(\.\*\?\)/ && $text{$thisCV} =~ s/\b($testEntry)\b/$3/i) { say LOG "\n===\n$thisNum |$testEntry| is found in first test.\n==="; $outString .= "[$entry]($pageThisEntry{$entry})\n"; - say LOG $outString . "\n==" . $text{$thisCV}; + say LOG $outString . "\n===" . $text{$thisCV}; $found = 1; goto Breakout; - } elsif ($text{$thisCV} =~ s/\b($testEntry)\b//i) { + } elsif ($text{$thisCV} =~ s/\b($testEntry)[^\w']//i) { say LOG "\n===\n$thisNum |$testEntry| is found in second test.\n==="; - $outString .= "[$entry]($pageThisEntry{$entry})\n" unless $outString =~ /\[$entry\]\($pageThisEntry{$entry}\)/; + $outString .= "[$entry]($pageThisEntry{$entry})\n"; say LOG $outString . "\n" . $text{$thisCV}; $found = 1; goto Breakout; @@ -401,7 +415,10 @@ sub LinkSNsToULBtextViaEntries { } } Breakout: - say MISSING "$thisCV $thisNum" unless ($found); + unless ($found) { + say MISSING "$thisCV $thisNum"; + say LOG "\$thisCV: $thisCV\t\$thisNum: $thisNum" + } next if $found; } say LOG "*F*\t\$outString: $outString";