diff --git a/Tagged_OGNT/Auto-tagged/57-TIT.xml b/Tagged_OGNT/Auto-tagged/57-TIT.xml index 67d841c..54edaef 100644 --- a/Tagged_OGNT/Auto-tagged/57-TIT.xml +++ b/Tagged_OGNT/Auto-tagged/57-TIT.xml @@ -24,7 +24,6 @@ - ἐπ᾽ ἐλπίδι ζωῆς αἰωνίου ἣν ἐπηγγείλατο ὁ ἀψευδὴς Θεὸς πρὸ χρόνων αἰωνίων @@ -42,7 +41,6 @@ - ἐφανέρωσεν δὲ καιροῖς ἰδίοις τὸν λόγον αὐτοῦ ἐν κηρύγματι ὃ ἐπιστεύθην ἐγὼ κατ᾽ ἐπιταγὴν τοῦ Σωτῆρος ἡμῶν Θεοῦ @@ -66,7 +64,6 @@ - Τίτῳ γνησίῳ τέκνῳ κατὰ κοινὴν πίστιν Χάρις καὶ εἰρήνη ἀπὸ Θεοῦ Πατρὸς καὶ Χριστοῦ Ἰησοῦ τοῦ Σωτῆρος ἡμῶν @@ -87,7 +84,6 @@ - Τούτου χάριν ἀπέλιπόν σε ἐν Κρήτῃ ἵνα τὰ λείποντα ἐπιδιορθώσῃ καὶ καταστήσῃς κατὰ πόλιν πρεσβυτέρους ὡς ἐγώ σοι διεταξάμην @@ -112,7 +108,6 @@ - εἴ τίς ἐστιν ἀνέγκλητος μιᾶς γυναικὸς ἀνήρ τέκνα ἔχων πιστά μὴ ἐν κατηγορίᾳ ἀσωτίας ἢ ἀνυπότακτα @@ -134,7 +129,6 @@ - Δεῖ γὰρ τὸν ἐπίσκοπον ἀνέγκλητον εἶναι ὡς Θεοῦ οἰκονόμον μὴ αὐθάδη μὴ ὀργίλον μὴ πάροινον μὴ πλήκτην μὴ αἰσχροκερδῆ @@ -159,7 +153,6 @@ - ἀλλὰ φιλόξενον φιλάγαθον σώφρονα δίκαιον ὅσιον ἐγκρατῆ @@ -172,7 +165,6 @@ self-controlled - ἀντεχόμενον τοῦ κατὰ τὴν διδαχὴν πιστοῦ λόγου ἵνα δυνατὸς ᾖ καὶ παρακαλεῖν ἐν τῇ διδασκαλίᾳ τῇ ὑγιαινούσῃ καὶ τοὺς ἀντιλέγοντας ἐλέγχειν @@ -199,7 +191,6 @@ - Εἰσὶν γὰρ πολλοὶ καὶ ἀνυπότακτοι ματαιολόγοι καὶ φρεναπάται μάλιστα οἱ ἐκ τῆς περιτομῆς @@ -216,7 +207,6 @@ - οὓς δεῖ ἐπιστομίζειν οἵτινες ὅλους οἴκους ἀνατρέπουσιν διδάσκοντες ἃ μὴ δεῖ αἰσχροῦ κέρδους χάριν @@ -233,7 +223,6 @@ - εἶπέν τις ἐξ αὐτῶν ἴδιος αὐτῶν προφήτης Κρῆτες ἀεὶ ψεῦσται κακὰ θηρία γαστέρες ἀργαί @@ -250,7 +239,6 @@ - Ἡ μαρτυρία αὕτη ἐστὶν ἀληθής δι᾽ ἣν αἰτίαν ἔλεγχε αὐτοὺς ἀποτόμως ἵνα ὑγιαίνωσιν ἐν τῇ πίστει @@ -272,7 +260,6 @@ - μὴ προσέχοντες Ἰουδαϊκοῖς μύθοις καὶ ἐντολαῖς ἀνθρώπων ἀποστρεφομένων τὴν ἀλήθειαν @@ -286,7 +273,6 @@ - Πάντα καθαρὰ τοῖς καθαροῖς τοῖς δὲ μεμιαμμένοις καὶ ἀπίστοις οὐδὲν καθαρόν ἀλλὰ μεμίανται αὐτῶν καὶ ὁ νοῦς καὶ ἡ συνείδησις @@ -312,7 +298,6 @@ - Θεὸν ὁμολογοῦσιν εἰδέναι τοῖς δὲ ἔργοις ἀρνοῦνται βδελυκτοὶ ὄντες καὶ ἀπειθεῖς καὶ πρὸς πᾶν ἔργον ἀγαθὸν ἀδόκιμοι @@ -335,7 +320,6 @@ - @@ -351,7 +335,6 @@ - Πρεσβύτας νηφαλίους εἶναι σεμνούς σώφρονας ὑγιαίνοντας τῇ πίστει τῇ ἀγάπῃ τῇ ὑπομονῇ @@ -369,7 +352,6 @@ - πρεσβύτιδας ὡσαύτως ἐν καταστήματι ἱεροπρεπεῖς μὴ διαβόλους μηδὲ οἴνῳ πολλῷ δεδουλωμένας καλοδιδασκάλους @@ -387,7 +369,6 @@ - ἵνα σωφρονίζωσιν τὰς νέας φιλάνδρους εἶναι φιλοτέκνους @@ -400,7 +381,6 @@ - σώφρονας ἁγνάς οἰκουργούς ἀγαθάς ὑποτασσομένας τοῖς ἰδίοις ἀνδράσιν ἵνα μὴ ὁ λόγος τοῦ Θεοῦ βλασφημῆται @@ -419,7 +399,6 @@ - Τοὺς νεωτέρους ὡσαύτως παρακάλει σωφρονεῖν @@ -430,7 +409,6 @@ use good sense - Περὶ πάντα σεαυτὸν παρεχόμενος τύπον καλῶν ἔργων ἐν τῇ διδασκαλίᾳ ἀφθορίαν σεμνότητα @@ -446,7 +424,6 @@ - λόγον ὑγιῆ ἀκατάγνωστον ἵνα ὁ ἐξ ἐναντίας ἐντραπῇ μηδὲν ἔχων λέγειν περὶ ἡμῶν φαῦλον @@ -463,7 +440,6 @@ - Δούλους ἰδίοις δεσπόταις ὑποτάσσεσθαι ἐν πᾶσιν εὐαρέστους εἶναι μὴ ἀντιλέγοντας @@ -479,7 +455,6 @@ - μὴ νοσφιζομένους ἀλλὰ πᾶσαν πίστιν ἐνδεικνυμένους ἀγαθήν ἵνα τὴν διδασκαλίαν τὴν τοῦ Σωτῆρος ἡμῶν Θεοῦ κοσμῶσιν ἐν πᾶσιν @@ -503,7 +478,6 @@ - Ἐπεφάνη γὰρ ἡ χάρις τοῦ Θεοῦ σωτήριος πᾶσιν ἀνθρώποις @@ -518,7 +492,6 @@ - παιδεύουσα ἡμᾶς ἵνα ἀρνησάμενοι τὴν ἀσέβειαν καὶ τὰς κοσμικὰς ἐπιθυμίας σωφρόνως καὶ δικαίως καὶ εὐσεβῶς ζήσωμεν ἐν τῷ νῦν αἰῶνι @@ -544,7 +517,6 @@ - προσδεχόμενοι τὴν μακαρίαν ἐλπίδα καὶ ἐπιφάνειαν τῆς δόξης τοῦ μεγάλου Θεοῦ καὶ Σωτῆρος ἡμῶν Ἰησοῦ Χριστοῦ @@ -566,7 +538,6 @@ - ὃς ἔδωκεν ἑαυτὸν ὑπὲρ ἡμῶν ἵνα λυτρώσηται ἡμᾶς ἀπὸ πάσης ἀνομίας καὶ καθαρίσῃ ἑαυτῷ λαὸν περιούσιον ζηλωτὴν καλῶν ἔργων @@ -589,7 +560,6 @@ - Ταῦτα λάλει καὶ παρακάλει καὶ ἔλεγχε μετὰ πάσης ἐπιταγῆς μηδείς σου περιφρονείτω @@ -607,7 +577,6 @@ - @@ -627,7 +596,6 @@ - μηδένα βλασφημεῖν ἀμάχους εἶναι ἐπιεικεῖς πᾶσαν ἐνδεικνυμένους πραΰτητα πρὸς πάντας ἀνθρώπους @@ -642,7 +610,6 @@ - Ἦμεν γάρ ποτε καὶ ἡμεῖς ἀνόητοι ἀπειθεῖς πλανώμενοι δουλεύοντες ἐπιθυμίαις καὶ ἡδοναῖς ποικίλαις ἐν κακίᾳ καὶ φθόνῳ διάγοντες στυγητοί μισοῦντες ἀλλήλους @@ -669,7 +636,6 @@ - Ὅτε δὲ ἡ χρηστότης καὶ ἡ φιλανθρωπία ἐπεφάνη τοῦ Σωτῆρος ἡμῶν Θεοῦ @@ -687,7 +653,6 @@ - οὐκ ἐξ ἔργων τῶν ἐν δικαιοσύνῃ ἃ ἐποιήσαμεν ἡμεῖς ἀλλὰ κατὰ τὸ αὐτοῦ ἔλεος ἔσωσεν ἡμᾶς διὰ λουτροῦ παλινγενεσίας καὶ ἀνακαινώσεως Πνεύματος Ἁγίου @@ -716,7 +681,6 @@ - οὗ ἐξέχεεν ἐφ᾽ ἡμᾶς πλουσίως διὰ Ἰησοῦ Χριστοῦ τοῦ Σωτῆρος ἡμῶν @@ -733,7 +697,6 @@ - ἵνα δικαιωθέντες τῇ ἐκείνου χάριτι κληρονόμοι γενηθῶμεν κατ᾽ ἐλπίδα ζωῆς αἰωνίου @@ -750,7 +713,6 @@ - Πιστὸς ὁ λόγος καὶ περὶ τούτων βούλομαί σε διαβεβαιοῦσθαι ἵνα φροντίζωσιν καλῶν ἔργων προΐστασθαι οἱ πεπιστευκότες Θεῷ ταῦτά ἐστιν καλὰ καὶ ὠφέλιμα τοῖς ἀνθρώποις @@ -778,7 +740,6 @@ - Μωρὰς δὲ ζητήσεις καὶ γενεαλογίας καὶ ἔρεις καὶ μάχας νομικὰς περιΐστασο εἰσὶν γὰρ ἀνωφελεῖς καὶ μάταιοι @@ -800,7 +761,6 @@ - αἱρετικὸν ἄνθρωπον μετὰ μίαν καὶ δευτέραν νουθεσίαν παραιτοῦ @@ -811,7 +771,6 @@ - εἰδὼς ὅτι ἐξέστραπται ὁ τοιοῦτος καὶ ἁμαρτάνει ὢν αὐτοκατάκριτος @@ -826,7 +785,6 @@ - Ὅταν πέμψω Ἀρτεμᾶν πρὸς σὲ ἢ Τυχικόν σπούδασον ἐλθεῖν πρός με εἰς Νικόπολιν ἐκεῖ γὰρ κέκρικα παραχειμάσαι @@ -849,7 +807,6 @@ - Ζηνᾶν τὸν νομικὸν καὶ Ἀπολλῶν σπουδαίως πρόπεμψον ἵνα μηδὲν αὐτοῖς λείπῃ @@ -866,7 +823,6 @@ - μανθανέτωσαν δὲ καὶ οἱ ἡμέτεροι καλῶν ἔργων προΐστασθαι εἰς τὰς ἀναγκαίας χρείας ἵνα μὴ ὦσιν ἄκαρποι @@ -888,7 +844,6 @@ - Ἀσπάζονταί σε οἱ μετ᾽ ἐμοῦ πάντες Ἄσπασαι τοὺς φιλοῦντας ἡμᾶς ἐν πίστει Ἡ χάρις μετὰ πάντων ὑμῶν @@ -909,7 +864,6 @@ - diff --git a/Tagged_OGNT/Tag_OGNT.pl b/Tagged_OGNT/Tag_OGNT.pl index f9e59c0..aba42da 100644 --- a/Tagged_OGNT/Tag_OGNT.pl +++ b/Tagged_OGNT/Tag_OGNT.pl @@ -1,5 +1,6 @@ # Takes current tW entries and populates tagged OGNT XML # This is the current best version +# It takes care of all entries but doesn't account for USFM codes in ULB # Trying to get it to work with repeated instances of same word. use 5.12.0; use File::Slurp; @@ -105,14 +106,16 @@ sub ProcessXML { my ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount); while (my $thisLine = <$thisFile>) { chomp $thisLine; + if ($thisLine =~ //) { - say LOG "\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesNotMatched\n$linesNotMatched\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText"; + say LOG "<11>\n\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText"; - ($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText, $linesNotMatched); + ($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText); + say LOG "<14>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched"; my %orderedLine; $matchedLines =~ s/\n{2,}/\n/gs; - say LOG "===\nBefore \$matchedLines:\n$matchedLines\n==="; + say LOG "<15\tBefore sort of \$matchedLines:\n$matchedLines\n"; while ($matchedLines =~ /([^◊]*)◊(\d*)\n/g) { $orderedLine{$2} = $1; say LOG "<5>\t\$2: $2\t\$1: $1"; @@ -123,16 +126,17 @@ sub ProcessXML { $matchedLines .= "$orderedLine{$line}\n" } chomp $matchedLines; - say LOG "===\nAfter \$matchedLines:\n$matchedLines\n==="; + say LOG "<16>\tAfter sort of \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched"; $residueText =~ s/(^q | q$)//g; $residueText =~ s/ {3,}/ /g; $linesNotMatched =~ s/\n+$//; + $linesToSkip =~ s/\n+$//; $matchedLines =~ s/^\n+//; say OUT "\t\t\t\t\t$greekText"; say OUT "\t\t\t\t\t$staticText"; say OUT "\t\t\t\t\t$residueText"; say OUT "$matchedLines"; - say OUT "$linesNotMatched" if ($linesNotMatched =~ /^.+$/);# This isn't giving any output + say OUT "$linesNotMatched" if ($linesNotMatched =~ /^.+$/); say OUT "$linesToSkip" if ($linesToSkip); say OUT "$thisLine"; ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount) = (); @@ -190,7 +194,8 @@ sub ProcessRelevantSNs { ($thisLine, $residueText, $linesNotMatched) = MatchAndPlace($line, $thisSN, $staticText, $residueText, $linesNotMatched); $thisLine =~ s/[ \t]+$//; $matchedLines .= $thisLine . "\n"; - say LOG "+++\n\$matchedLines\n$matchedLines\n+++" + $matchedLines =~ s/\n{2,}$/\n/s; + say LOG "<13>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched+++" } } return ($matchedLines, $residueText, $linesNotMatched); @@ -284,31 +289,6 @@ sub FixWorkText { } return ($text) } -sub Match { - my ($line, $text, $worktext, $entry, $workEntry) = @_; - my $outputLines; -# say LOG "---\n\$line:\n$line\n\$text\n$text\n\$workText\n$workText\n\$entry: $entry\t\$workEntry: $workEntry"; - say LOG "---\nBefore search, new $workText\$workText\n$workText\n\$entry: $entry\t\$workEntry: $workEntry"; - my $found; - if ($workText =~ /\b$workEntry\b/p) { - $workText = "${^PREMATCH} ${^POSTMATCH}"; - say LOG "<7.1>\tAfter found, new \$workText: $workText"; - $found = 1; - #say LOG "<7>\$workEntry: $workEntry\nNow looking for |$workEntry| in\n$text"; - if ($text =~ /^(.*?)\b$workEntry\b/) { - my $place = length $1; - $line =~ s/>$entryFound.\t \$place: $place\t\$outputLineOrder{$place}: $outputLineOrder{$place}\n"; - } - } - else { - #say LOG "\$workEntry $workEntry not found" - } - #say LOG "---"; - return ($found, $outputLines, $workText); -} - sub Read_ULB_File { $ULBText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8'); diff --git a/Tagged_OGNT/Tag_OGNT_copy_3.pl b/Tagged_OGNT/Tag_OGNT_copy_3.pl new file mode 100644 index 0000000..6bd0a48 --- /dev/null +++ b/Tagged_OGNT/Tag_OGNT_copy_3.pl @@ -0,0 +1,363 @@ +# Takes current tW entries and populates tagged OGNT XML +# This is the current best version +# It takes care of all entries but doesn't account for USFM codes in ULB +# Trying to get it to work with repeated instances of same word. +use 5.12.0; +use File::Slurp; +use File::Find ; +use Cwd ; +use utf8; +#use open IN => ":utf8", OUT => ":utf8"; +use open IO => ":utf8"; +$" = "\n"; + +mkdir "Logs"; +open(LOG, ">Logs/tW_pairs.txt") or die "$!"; + +my $topDirULB = "/Users/Henry/Documents/WACS/en_ulb"; +#my $topDirOGNT = "/Users/Henry/Google Drive/WA/Tagged_OGNT/OGNT_for_tagging"; +my $topDirOGNT = "/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Tag_test"; +#my $topDirtW = "/Users/Henry/Documents/WACS/Restructure/bible"; +my $topDirtW = "/Users/Henry/Documents/WACS/en_tw/bible"; +my ($outDir, $outFile) = ("/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Auto-tagged", ""); +my ($ULBText, $workText); +my ($file); +my (%ULBtextThisVerse, %SNsThisVerse, %entriesThisSN, %longName); + +my @OGNTfilesToRun = (); +my $filePattern = '.xml' ; +find( sub { push @OGNTfilesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDirOGNT) ; + +my @tWfilesToRun = (); +$filePattern = '.md' ; +find( sub { push @tWfilesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDirtW) ; + +Read_tW_Files(); + +close LOG; +open(LOG, ">Logs/Log.txt") or die "$!"; + +LongBookNames(); +Read_ULB_File(); +ProcessXML(); +# put unused SN at end of verse + +close LOG; + +say "\nDone."; +# ===== +sub Read_tW_Files { + foreach $file ( @tWfilesToRun ) { + #say LOG $file; + my (@sns); + my $entries; + my $fileText = read_file("$file", binmode => 'utf8'); + if ($fileText =~ /\* Strong's: ([^\n]*)\n/) { + my $sns = $1; + #say LOG "\t$sns"; + @sns = split /, /, $sns; + } + if ($fileText =~ /Forms Found in the English ULB:\n\n([^\n]*)\n/) { + $entries = $1; + die "$fileText" if $entries eq ""; + #say LOG "\t\t$entries" + } + foreach my $sn (@sns) { + $entriesThisSN{$sn} .= $entries . ", "; + #say LOG "\t\t\t$sn: $entriesThisSN{$sn}" + } + } + foreach my $sn (sort keys %entriesThisSN) { + #say LOG "$sn: $entriesThisSN{$sn}"; + my @entries = split /, /, $entriesThisSN{$sn}; + @entries = reverse sort { substr($a,0,1) <=> substr($b,0,1) + || length($a) <=> length($b) + || $a <=> $b } + @entries; + $entriesThisSN{$sn} = ""; + foreach my $slice (@entries) { + $entriesThisSN{$sn} .= "$slice, " + } + $entriesThisSN{$sn} =~ s/, $//; + say LOG "$sn: $entriesThisSN{$sn}"; + } + +} +sub LongBookNames { + while () { + chomp; + if (/([^\t]*)\t([^\t]*)\t([^\t]*)/) { + $longName{$2} = $3 + } + } +} +sub ProcessXML { +# foreach XML file + foreach my $file (@OGNTfilesToRun) { + my $greekText; + my $fileGist; + if ($file =~ /(......\.xml)/) { + $fileGist = $1; + } + say LOG $file . "\t" . $fileGist; + open(OUT, ">$outDir/$fileGist") or die "$outDir/$fileGist: $!"; + my ($pre, $gist, $post, $bk, $ch, $vs, $thisVerse, $staticText, $residueText, $matchedLines, $flag, $thisVerseForOutput, $linesWithRelevantSNs, $linesNotMatched, $orderedOutputLines, $linesToSkip); + open (my $thisFile, "<:utf8", "$file") or die "$file:\n$!"; + my ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount); + while (my $thisLine = <$thisFile>) { + chomp $thisLine; + if ($thisLine =~ //) { + say LOG "<11>\n\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText"; + + ($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText); + say LOG "<14>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched"; + my %orderedLine; + $matchedLines =~ s/\n{2,}/\n/gs; + say LOG "<15\tBefore sort of \$matchedLines:\n$matchedLines\n"; + while ($matchedLines =~ /([^◊]*)◊(\d*)\n/g) { + $orderedLine{$2} = $1; + say LOG "<5>\t\$2: $2\t\$1: $1"; + } + $matchedLines = ""; + foreach my $line (sort {$a <=> $b} keys %orderedLine) { + say LOG "\$line: $line\t\$orderedLine{$line}: $orderedLine{$line}"; + $matchedLines .= "$orderedLine{$line}\n" + } + chomp $matchedLines; + say LOG "<16>\tAfter sort of \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched"; + $residueText =~ s/(^q | q$)//g; + $residueText =~ s/ {3,}/ /g; + $linesNotMatched =~ s/\n+$//; + $matchedLines =~ s/^\n+//; + say OUT "\t\t\t\t\t$greekText"; + say OUT "\t\t\t\t\t$staticText"; + say OUT "\t\t\t\t\t$residueText"; + say OUT "$matchedLines"; + say OUT "$linesNotMatched" if ($linesNotMatched =~ /^.+$/); + say OUT "$linesToSkip" if ($linesToSkip); + say OUT "$thisLine"; + ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount) = (); + ($thisVerseForOutput, $flag, $workText, $greekText, $linesNotMatched, $linesToSkip, $residueText, $orderedOutputLines, $linesWithRelevantSNs) = (); + ($linesToSkip) = (""); + } + elsif ($thisLine =~ /([^\n<>]*)([^<]*)(<\/w>)/$1 text="$2">$3/; + if ($thisLine =~ /lemma="(G\d+)"/) { + my $thisLemma = $1; + if (exists $entriesThisSN{$thisLemma}) { + $rsnCount ++; + $linesWithRelevantSNs .= $thisLine . "\n"; + #say LOG "\$thisLemma: $thisLemma; line pushed to \@LinesWithRelevantSNs:\n--\n=>@LinesWithRelevantSNs\n--"; + } + else { + $skipCount ++; + $thisLine =~ s/><\/w>/>√<\/w>/; + $linesToSkip .= "$thisLine\n"; + #say LOG "\$thisLemma: $thisLemma; line pushed to \@LinesToSkip"; + } + } + #say LOG "$thisLine"; + } + elsif ($thisLine =~ //) { + #say LOG $thisLine; + ($bk, $ch, $vs) = ($1,$2,$3); + ($thisVerse, $greekText) = ("$longName{$bk} $ch:$vs", ""); + if ($ULBText =~ /$thisVerse\t([^\n]*)\n/) { + $staticText = $1; + $residueText = "q $staticText q"; + } + say OUT $thisLine; + ($flag) = (1); + } + else {say OUT $thisLine} + } + + close $thisFile; + close OUT; + } +} +sub ProcessRelevantSNs { + my ($relevantLines, $staticText, $residueText, $linesNotMatched) = (@_); + my ($matchedLines, $thisLine); + my @relevantLines = split /\n/, $relevantLines; + foreach my $line (@relevantLines) { + if ($line =~ /lemma="(G\d+)"/) { + my $thisSN = $1; + say LOG "\$line: $line, \$thisSN: $thisSN, \$entriesThisSN{$thisSN}\n$entriesThisSN{$thisSN}"; + ($thisLine, $residueText, $linesNotMatched) = MatchAndPlace($line, $thisSN, $staticText, $residueText, $linesNotMatched); + $thisLine =~ s/[ \t]+$//; + $matchedLines .= $thisLine . "\n"; + $matchedLines =~ s/\n{2,}$/\n/s; + say LOG "<13>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched+++" + } + } + return ($matchedLines, $residueText, $linesNotMatched); +} +sub MatchAndPlace { + my ($line, $sn, $staticText, $workText, $linesNotMatched) = @_; + #say LOG "<8>\t\$line: $line \$sn: $sn \$workText\n$workText"; + my ($workEntry, $found, $matchedLines, $first, $second, $third, $firstLen, $secondLen, $thirdLen); + my @entries = split /, /, $entriesThisSN{$sn}; + foreach my $entry (@entries) { + my $entryType; + if ($entry =~ /^(.*) \.\.\. (.*) \.\.\. (.*)$/) { + ($first, $second, $third) = ($1, $2, $3); + ($firstLen, $secondLen, $thirdLen) = (length $first, length $second, length $third); + $workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b" . ".*?" . "\\b" . $third; + say LOG "<1a>\t\$first: $first, \$second: $second, \$third: $third, \$firstLen: $firstLen, \$secondLen,: $secondLen, \$thirdLen: $thirdLen \$entry: |$entry|\t\$workEntry: |$workEntry|"; + $entryType = 1; + } + elsif ($entry =~ /^(.*) \.\.\. (.*)$/) { + ($first, $second) = ($1, $2); + ($firstLen, $secondLen) = (length $first, length $second); + $workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b"; + say LOG "<2a>\t\$first: $first, \$second: $second, \$third: $third, \$firstLen: $firstLen, \$secondLen,: $secondLen, \$entry: |$entry|\t\$workEntry: |$workEntry|"; + $entryType = 2; + } + else {$workEntry = $entry;} + + my $foundText; + #say LOG "<8.1>\t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry"; + if ($workText =~ /\b$workEntry\b/p) { + say LOG "<8.1>Found: \t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry"; + ($foundText, $workText) = ($&, "${^PREMATCH}ı${^POSTMATCH}"); + my ($place, $foundTextLength, $replacementSpaces) = (length ${^PREMATCH}, length $foundText, ""); + $line =~ s/>$entry\n\$workText,: $workText, \$matchedLines:\n$matchedLines "; + + ($workText) = FixWorkText($line, $workText, $workEntry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen); + + say LOG "<8.3>\n\$workText:\n$workText\n\$matchedLines:\n$matchedLines"; + + } + + else {$workText =~ s/ı/$replacementSpaces/;} + + $matchedLines .= "$line◊$place"; + + say LOG "<8.4>\tAfter found, new \$workText:\n$workText"; + $found = 1; + } + else { + #say LOG "\$workEntry $workEntry not found" + } + if ($found) { + last + } + } + unless ($found) { + $line =~ s/>?\$text:\n$text\n\t\t\$entry: $entry \$foundText: $foundText\t \$foundTextLength: $foundTextLength\t\$first: $first\t\$second: $second\t\$third: $third\n\$firstSpace: $firstSpace\t\$secondSpace: $secondSpace\t\$thirdSpace: $thirdSpace"; + if ($third) { + if ($foundText =~ /$first(.*)$second(.*)$third/) { + my ($firstGap, $secondGap) = ($1, $2); + my $repText = "$firstSpace$firstGap$secondSpace$secondGap$thirdSpace"; + say LOG "<9.1> \$repText: $repText"; + $text =~ s/ı/$repText/; + } + } + else { + if ($foundText =~ /$first(.*)$second/) { + my ($firstGap) = ($1); + say LOG "<9.2>\t\$firstSpace: |$firstSpace|\t\$firstGap: |$firstGap|\t\$secondSpace: |$secondSpace|"; + my $repText ="$firstSpace$firstGap$secondSpace"; + say LOG "<9.3> \$repText: |$repText|"; + $text =~ s/ı/$repText/; + } + } + return ($text) +} +sub Read_ULB_File { + + $ULBText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8'); + +} + +__DATA__ +01 gen Genesis +02 exo Exodus +03 lev Leviticus +04 num Numbers +05 deu Deuteronomy +06 jos Joshua +07 jdg Judges +08 rut Ruth +09 1sa 1 Samuel +10 2sa 2 Samuel +11 1ki 1 Kings +12 2ki 2 Kings +13 1ch 1 Chronicles +14 2ch 2 Chronicles +15 ezr Ezra +16 neh Nehemiah +17 est Esther +18 job Job +19 psa Psalms +20 pro Proverbs +21 ecc Ecclesiastes +22 sng Song of Solomon +23 isa Isaiah +24 jer Jeremiah +25 lam Lamentations +26 ezk Ezekiel +27 dan Daniel +28 hos Hosea +29 jol Joel +30 amo Amos +31 oba Obadiah +32 jon Jonah +33 mic Micah +34 nam Nahum +35 hab Habakkuk +36 zep Zephaniah +37 hag Haggai +38 zec Zechariah +39 mal Malachi +41 mat Matthew +42 mrk Mark +43 luk Luke +44 jhn John +45 act Acts +46 rom Romans +47 1co 1 Corinthians +48 2co 2 Corinthians +49 gal Galatians +50 eph Ephesians +51 php Philippians +52 col Colossians +53 1th 1 Thessalonians +54 2th 2 Thessalonians +55 1ti 1 Timothy +56 2ti 2 Timothy +57 tit Titus +58 phm Philemon +59 heb Hebrews +60 jas James +61 1pe 1 Peter +62 2pe 2 Peter +63 1jn 1 John +64 2jn 2 John +65 3jn 3 John +66 jud Jude +67 rev Revelation