From d8df85e0f1bae3bb1b807cdeb6f173b60952de6f Mon Sep 17 00:00:00 2001 From: Henry Whitney Date: Fri, 12 Jun 2020 13:52:15 -0400 Subject: [PATCH] New version of Mine.URL.Strong.Verse script --- .../Mine.URL.Strong.Verse.5.pl | 511 ++++++++++++++++++ 1 file changed, 511 insertions(+) create mode 100644 MAST_tW_PDF_Updater/FilesForUpdates/Mine.URL.Strong.Verse.5.pl diff --git a/MAST_tW_PDF_Updater/FilesForUpdates/Mine.URL.Strong.Verse.5.pl b/MAST_tW_PDF_Updater/FilesForUpdates/Mine.URL.Strong.Verse.5.pl new file mode 100644 index 0000000..e634892 --- /dev/null +++ b/MAST_tW_PDF_Updater/FilesForUpdates/Mine.URL.Strong.Verse.5.pl @@ -0,0 +1,511 @@ +# Routine to take missing.log entries and link to UGNT and ULB.KJV.Strongs +# Same as version 2 but in Linux outputs all html files to temp dir and then opens +# Same as version 3 but good for both OT and NT + +use 5.12.0; +use File::Slurp; +$| = "\n"; +use utf8; +#use open IN => ":utf8", OUT => ":utf8"; +use open IO => ":utf8"; +binmode(STDOUT, ":utf8"); +use File::Find ; +use FindBin '$Bin'; +use Cwd ; + +my ($workDir, $d) = ($Bin, "\\"); +my ($language, $textEditor, $repoPath, $browser, $blbRef, $intrln_ref, $tN_page, $tW_list, $html_wrap) = ($Bin, "\\"); +my (%abbr, %bkno); + +if ($^O eq "darwin" || $^O eq "linux") {$d = "/"} + +open LOG, ">:utf8", "$Bin${d}Logs${d}mine_log.log" or die ("$!:\n$Bin$[d}Logs${d}mine_log.log"); + +#=== + +open OUT3, ">strongs.sh" or die "$!"; + +while () { + chomp; + #say LOG $_; + if (/^([^\t]*)\t([^\t]*)\t([^\t]*)$/) { + ($bkno{$3}, $abbr{$3}) = ($1, $2); + # say LOG "\t \$1: $1, \$2: $2, \$3: $3, \$bkno{$3}: $bkno{$3}, \$abbr{$3}: $abbr{$3}"; + if ($bkno{$3} < 40) { + $language = "greek" + } else { + $language = "hebrew" + } + } elsif (/^:utf8", "$results_file" or die; + +say OUT " + + + + + +"; + +ParseLine(); +FindVerse($ref); +ChecktWPages($word); +Finish(); + +say OUT " + +"; +close OUT; + +say LOG "\$tW_list\$tN_page: $tW_list$tN_page, \$language: $language, \$strong: $strong"; + +say OUT3 "curl https://biblehub.com/$language/$strong.htm > $Bin/Temp/Strongs_raw.html +perl get_strongs_gist.pl +$browser $Bin/Temp/This_interlinear.html $Bin/Temp/This_verse.html $Bin/Temp/Strongs.html $results_file & +$textEditor --new-window $tW_list$tN_page &"; + +close OUT3; + +#==== + +close LOG; +print "\n\tDone.\n\n"; + +sub GetUserDefaults { + open (my $defaults, "<:utf8", "$Bin${d}User${d}$udf") or die "$Bin${d}User${d}$udf:\n$!"; + + while (my $thisLine = <$defaults>) { + chomp $thisLine; + if ($thisLine =~ /^Text editor: (.*)$/) { + $textEditor = $1; + if ($^O eq "MSWin32" || $^O eq "MSWin64") {$textEditor =~ s/^(.*)$/"$1"/g;} + } elsif ($thisLine =~ /^Repository directory: (.*)$/) { + $repoPath = $1 + } elsif ($thisLine =~ /^HTML browser: (.*)$/) { + $browser = $1; + if ($^O eq "MSWin32" || $^O eq "MSWin64") {$textEditor =~ s/^(.*)$/"$1"/g;} + } + } + + #say LOG "\$textEditor: $textEditor\n\$repoPath: $repoPath\n\$browser: $browser"; + die "No text editor found" if $textEditor eq ""; + die "No path to repo found" if $repoPath eq ""; + + if ($^O eq "darwin") {$textEditor = "open -a $textEditor"; $browser = "open -a $browser"} + + close $defaults; +} + +sub ParseLine { + say LOG "ParseLine"; + my ($sch, $svs); + if ($^O eq "linux" || $^O eq "darwin") { + say "\nEnter line from Entries_not_handled.txt:\n"; + $missingLine = ; # I moved chomp to a new line to make it more readable + chomp $missingLine; # Get rid of newline character at the end + exit 0 if ($missingLine eq ""); # If empty string, exit. + #$missingLine = "Zephaniah 1:2 5486"; + } else { + $missingLine = read_file ("Temp${d}temp.tmp"); + } + say "\n\n"; + if ($missingLine =~ /^(([^:]*) (\d+):(\d+))\t([GH]\d+)/) { + ($ref, $book_name, $ch, $vs, $sn, $xfAbr) = ($1, $2, $3, $4, $5, $bkAbr{$1}); + ($sch, $svs, $strong) = ($ch, $vs, $sn); + if ($sn =~ /([GH])(\d+)/) { + my $langIndicator = $1; + $language = "Greek"; + $language = "Hebrew" unless $langIndicator = "G"; + } + + say LOG "\$book_name: $book_name, \$bkno{$book_name}: $bkno{$book_name}"; + $html_wrap =~ s/()(<\/title>)/$1$book_name $ch:$vs$2/; + $html_wrap =~ s/<body>/$1<h1>$book_name $ch:$vs<\/h1>/; + if ($bkno{$book_name} < 40) { + } else { + $dataFile = "$Bin${d}User${d}tW_work.txt"; + $exceptions_file = "$Bin${d}Exceptions${d}Exceptions_tWs_from_MAST.txt"; + } + if ($^O eq "darwin") { + say "curl https://biblehub.com/$language/$strong.htm > $Bin/Temp/Strongs_raw.html"; + system "curl https://biblehub.com/$language/$strong.htm > $Bin/Temp/Strongs_raw.html"; + say "perl $Bin${d}get_strongs_gist.pl"; + system "perl $Bin${d}get_strongs_gist.pl"; + open OUT2, ">$Bin${d}Temp${d}Workdata.tmp" or die "$!"; + print OUT2 "$missingLine\n$language\n${browser}<>"; + close OUT2; + } + if ($^O eq "linux") { + open OUT2, ">$Bin${d}Temp${d}Workdata.tmp" or die "$!"; + print OUT2 "$missingLine\n$language\n${browser}<>"; + close OUT2; + } + say LOG "\$exceptions_file: $exceptions_file"; + if ($^O eq "MSWin32" || $^O eq "linux") { + my $bbkk = $book_name; + $bbkk =~ s/ /_/g; + $bbkk = lc $bbkk; + $blbRef = "https://www.biblehub.com/$bbkk/$ch-$vs.htm"; + $intrln_ref = "https://biblehub.com/text/$bbkk/$ch-$vs.htm" + } else { + $blbRef = "https://www.blueletterbible.org/net/$abbr{$book_name}/$ch/$vs"; + } + say LOG "\$blbRef: $blbRef"; + say LOG "xxx\n\$missingLine: $missingLine, \$ref: $ref, \$book_name: $book_name, \$ch: $ch, \$vs: $vs\n\n\$dataFile: $dataFile"; + open (my $file, "<:utf8", "$dataFile") or die "$dataFile:\n$!"; + + while (my $line = <$file>) { + chomp $line; + say LOG "\$line: $line"; + if ($line =~ /^([^#][^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$/) { + s/[\r\n]+$//; + my ($fullBk, $wabbr, $numBk, $abr, $lbsBk) = ($2, $3, $4, $4, $4); + say LOG "\$fullBk: $fullBk, \$wabbr: $wabbr, \$numBk: $numBk, \$abr: $abr, \$lbsBk: $lbsBk"; + if ($fullBk eq $book_name) { + say LOG "\$fullBk: |$fullBk|, \$wa{$fullBk}: |$wa{$fullBk}|, \$numBk: $numBk, \$abr: $abr, \$book_name: |$book_name|, \$lbsBk: $lbsBk"; + say LOG "logos4:TextComparison;ref=BibleNIV.$lbsBk${sch}.$svs;res=esv,niv2011,niv,nasb95,nrsv,gs-netbible,nlt,leb,kjv1900"; + ($wa{$fullBk}) = ($wabbr); + $abbr = lc $numBk; + say LOG "\$abbr: $abbr"; + my $mxl = 2; + say LOG "The name of the current book is \$book_name: $book_name."; + if ($book_name eq "Psalms") {$mxl = 3} + while (length $ch < $mxl) {$ch =~ s/^/0/} + while (length $vs < $mxl) {$vs =~ s/^/0/} + $xmlFile = "$repoPath/MAST_HB/${numBk}.xml"; + $tN_page = "$repoPath/en_tn/$wa{$fullBk}/$ch/${vs}.md"; + say LOG "\n\$wa{$fullBk}/\$ch/\$vs: $wa{$fullBk}/$ch/$vs\nTo system:\n$textEditor $tN_page\n"; + if ($^O eq "darwin") {system "$textEditor $tN_page"} + elsif ($^O eq "linux") { + #system "$textEditor --new-window $tN_page &"; + } + elsif ($^O eq "MSWin32" || $^O eq "MSWin64") {system "START \"\" $tN_page"} + #say "open -a /Applications/Logos.app \"logos4:TextComparison;ref=BibleBHS.$lbsBk${sch}.$svs;res=esv,niv2011,niv,nasb95,nrsv,gs-netbible,nlt,leb,kjv1900\""; + if ($^O eq "darwin") { + system `open -a /Applications/Logos.app "logos4:TextComparison;ref=BibleNIV.$lbsBk${sch}.$svs;res=esv,niv2011,niv,nasb95,nrsv,gs-netbible,nlt,leb,kjv1900"`; + say "Opening $browser $Bin/Temp/Strongs.html"; + system "$browser $Bin/Temp/Strongs.html"; + } + } + } + } + close $file; + } else { + die "\n\tInput unreadable.\n" + } +} + + +sub FindVerse { + + my $fileText = read_file("$txtFile", binmode => 'utf8'); + + if ($fileText =~ /$ref([^\n]*\n)/) { + $thisULB = $1; + } + + say LOG "FindVerse |$sn|."; + + if ($flag && $fileText =~ /($ref\t[^\n]*\n)[^\n]*\n/) { + say LOG "($ref\t[^\n]*\n)[^\n]*\n"; + exit 0; + } else { + if ($fileText =~ /$ref(\t[^\n]*\n )(([^\n]*<)$sn(.?>[^\n]*))/) { + my ($fore, $allNasb, $precon, $aft) = ($1, $2, $3, $4); + say LOG "\$ref: $ref\n\$fore: $fore\n$allNasb\n\$precon:\n$precon\n\$sn: $sn\n\$aft: $aft\n\$thisULB: $thisULB"; + my $preprecon; + if ($precon =~ /^(.*([,>\w\'\"\- —;] |[";\.\?\!]))([\w\'\-]+) (<[^<>]*> ){0,}<$/) { + ($preprecon, $word) = ($1, $3); + say LOG "\$&: $&\n\$preprecon: $preprecon\n\$word: $word"; + } elsif ($precon =~ /^(\w+) <$/) { + ($preprecon, $word) = ("", $1); + } elsif ($precon =~ /^(.*)\b(\w+)\b <$/) { + ($preprecon, $word) = ($1, $2); + } + $preprecon =~ s/</</g; + $preprecon =~ s/>/>/g; + $aft =~ s/</</g; + $aft =~ s/>/>/g; + say OUT "<p>$ref$fore</p>\n<p>$preprecon<span style=\"color:red\">$word</span> <<span style=\"color:red\">$sn</span>$aft</p>\n"; + } elsif ($fileText =~ /$ref[^\n]*\n[^\n]*\n/) { + my $display = $&; + $display =~ s/</</g; + $display =~ s/>/>/g; + $display =~ s/\n/<br \/> /g; + say OUT "<p>The Strong's code <<<span style=\"color:red\">$sn</span>>> is not found in OGNT or MAST-HB $ref.<br /><br />$display</p>"; + #system ("bbfind -g \"${ref}\\t[^\\n]*\\n[^\\n]*\" 'data${d}ULB_NASB_Strongs.txt'") or die "$!"; + + my $dump; + $dump = "<p></p><p>"; + foreach my $file ( @tWfiles ) { + my $fileText = read_file("$file", binmode => 'utf8'); + # While finds entries and Strong's numbers + while ($fileText =~ /($sn)[^\d]/g) { + my $found = $1; + #system `clear`; + my $abb = $file; + $abb =~ s/.md$//; + $dump .= "$abb: <span style=color:red>$found</span><br />\n"; + } + } + + say $sn; + + if ($^O eq "darwin") {system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn`} + + $strong =~ s/^[HG]//; + say OUT "$dump</p><p>$sn</p>"; + + #system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\[, \\n\\r\]\" {} \\;"); + #system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\$\" {} \\;"); + } + } + if ($word eq "") { + say "There is no \$word in $ref\n$thisULB"; + if ($^O eq "darwin") { + system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn`; + } + if ($^O eq "linux") { + #system "chromium https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=H$strong &"; + #system "xdg-open https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn &"; + } + if ($^O eq "MSWin32" || $^O eq "MSWin64" ) { + system "START \"\" https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$sn"; + } + die + } + return $word; +} + +sub ChecktWPages{ + + say LOG "<<$word>>"; + if ($word eq "") { + say "\nThere is no \$word for \$strong = $sn in $ref\n$thisULB\n"; + die + } + my $topDir = "$repoPath${d}en_tw${d}bible"; + say OUT "<p>"; + foreach my $file ( @tWfiles ) { + my $fileText = read_file("$file", binmode => 'utf8'); + my $foundHere; + # While finds entries and Strong's numbers + while ($fileText =~ /($sn)[^\d]|^(# [^\n]*\b$word\b)/g) { + $foundHere = 1; + #system `clear`; + my $abb = $file; + $abb =~ s/.md$//; + say OUT "$abb<br />\n"; + } + if ($foundHere == 1) {unless (exists $tW_file{$file}) {$tW_file{$file} = $file}} + } + say OUT "</p>"; +} + +sub Finish { + say "\nLooking for $strong."; + # find $topDir -name "*.md" -exec grep -H '($strong[^\d]|$strong$)' {} + + #system ("find $topDir -name \"*.md\" -exec grep -H --color \"$strong\[, \\n\\r\]\" {} \\;"); + + say OUT "<p>"; + foreach my $file ( @tWfiles ) { + if ($^O eq "MSWin32" || $^O eq "MSWin64") {$file =~ s/\//\\/g} + + my $fileText = read_file("$file", binmode => 'utf8') or die "$!"; + # While finds entries and Strong's numbers + while ($fileText =~ /([^\n]*)($sn[^\d])([^\n]*)/g) { + my ($pre, $found, $post) = ($1, $2, $3); + my $abb = $file; + #$abb =~ s/.md$//; + say OUT "<b><a href=\"$abb\">$abb</a></b>: $pre<span style=\"color:red\">$found</span>$post<br />\n"; + unless (exists $tW_file{$file}) {$tW_file{$file} = $file} + } + } + say OUT "</p>"; + say "\nLooking for $word."; + say OUT "<p>"; + foreach my $file ( @tWfiles ) { + my $fileText = read_file("$file", binmode => 'utf8'); + # While finds entries and Strong's numbers + if ($fileText =~ /#{1,1} ([^\n]*)(\b$word\b)([^\n]*)/g) { + my ($pre, $found, $post) = ($1, $2, $3); + my $abb = $file; + #$abb =~ s/.md$//; + say OUT "<b><a href=\"$abb\">$abb</a></b>: $pre<span style=\"color:red\">$found</span>$post<br />\n"; + unless (exists $tW_file{$file}) {$tW_file{$file} = $file} + } + } + say OUT "</p>"; + my $tW_files; + foreach my $key (sort keys %tW_file) { + $tW_files .= "$key "; + } + say "\nOpening .md files."; + if ($^O eq "darwin") { + #system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$strong`; + system "perl $Bin/get_strongs_gist.pl"; + system `$textEditor $tW_files`; + system `$textEditor $exceptions_file`; + } + if ($^O eq "linux") { + say "curl $intrln_ref > $Bin/Temp/This_interlinear.html"; + system "curl $intrln_ref > $Bin/Temp/This_interlinear.html"; + Reduce_Interlinear(); + #system "$browser $Bin/Temp/This_interlinear.html &"; + system "sleep 1s"; + say "curl $blbRef > $Bin/Temp/This_verse.html"; + system "curl $blbRef > $Bin/Temp/This_verse.html"; + Reduce_This_Verse(); + my @tWs = split / /,$tW_files; + foreach (@tWs) { + say "Opening $_"; + #system "$textEditor $_ &"; + #say OUT3 "$textEditor $_ &"; + $tW_list .= "$_ "; + } + #say "Opening $results_file"; + #system "$browser $results_file &"; + #system "sleep 1s"; + say "Opening $Bin/Temp/This_verse.html"; + #system "$browser $Bin/Temp/This_verse.html &"; + #say OUT3 "$browser $Bin/Temp/This_verse.html &"; + system "sleep 1s"; + #say "Opening $Bin/Temp/Strongs.html"; + #system "$textEditor $Bin/Temp/Strongs.html &"; + } + if ($^O eq "MSWin32" || $^O eq "MSWin64" ) { + say "Opening $blbRef"; + system "START \"\" $blbRef"; + say "Opening https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$strong"; + system "START \"\" https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$strong"; + my @tWs = split / /,$tW_files; + foreach (@tWs) { + say "Opening $_"; + system "START \"\" $_" + } + } + +} + +sub Reduce_This_Verse { + my $fileText = read_file("$Bin/Temp/This_verse.html", binmode => 'utf8') or die "$!: $Bin/Temp/This_verse.html"; + if ($fileText =~ /(<div id="par">.*?<\/div>)/) { + $fileText = "$html_wrap$&</body></html>"; + open OUT, ">:utf8", "$Bin/Temp/This_verse.html" or die; + say OUT $fileText; + close OUT; + } else {die} + +} + +sub Reduce_Interlinear { + my $fileText = read_file("$Bin/Temp/This_interlinear.html", binmode => 'utf8') or die "$!: $Bin/Temp/This_interlinear.html"; + if ($fileText =~ /(<table [^<>]*? class="maintext">.*?<\/table>)/) { + $fileText = "$html_wrap$&</body></html>"; + open OUT, ">:utf8", "$Bin/Temp/This_interlinear.html" or die; + say OUT $fileText; + close OUT; + } else {die} + +} + +__DATA__ +01 gen Genesis +02 exo Exodus +03 lev Leviticus +04 num Numbers +05 deu Deuteronomy +06 jos Joshua +07 jdg Judges +08 rut Ruth +09 1sa 1 Samuel +10 2sa 2 Samuel +11 1ki 1 Kings +12 2ki 2 Kings +13 1ch 1 Chronicles +14 2ch 2 Chronicles +15 ezr Ezra +16 neh Nehemiah +17 est Esther +18 job Job +19 psa Psalms +20 pro Proverbs +21 ecc Ecclesiastes +22 sng Song of Solomon +23 isa Isaiah +24 jer Jeremiah +25 lam Lamentations +26 ezk Ezekiel +27 dan Daniel +28 hos Hosea +29 jol Joel +30 amo Amos +31 oba Obadiah +32 jon Jonah +33 mic Micah +34 nam Nahum +35 hab Habakkuk +36 zep Zephaniah +37 hag Haggai +38 zec Zechariah +39 mal Malachi +41 mat Matthew +42 mrk Mark +43 luk Luke +44 jhn John +45 act Acts +46 rom Romans +47 1co 1 Corinthians +48 2co 2 Corinthians +49 gal Galatians +50 eph Ephesians +51 php Philippians +52 col Colossians +53 1th 1 Thessalonians +54 2th 2 Thessalonians +55 1ti 1 Timothy +56 2ti 2 Timothy +57 tit Titus +58 phm Philemon +59 heb Hebrews +60 jas James +61 1pe 1 Peter +62 2pe 2 Peter +63 1jn 1 John +64 2jn 2 John +65 3jn 3 John +66 jud Jude +67 rev Revelation +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "//www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="//www.w3.org/1999/xhtml"><head><meta charset="UTF-8"/><title>