diff --git a/Tagged_OGNT/Auto-tagged/57-TIT.xml b/Tagged_OGNT/Auto-tagged/57-TIT.xml
index 67d841c..54edaef 100644
--- a/Tagged_OGNT/Auto-tagged/57-TIT.xml
+++ b/Tagged_OGNT/Auto-tagged/57-TIT.xml
@@ -24,7 +24,6 @@
√
√
√
-
ἐπ᾽ ἐλπίδι ζωῆς αἰωνίου ἣν ἐπηγγείλατο ὁ ἀψευδὴς Θεὸς πρὸ χρόνων αἰωνίων
@@ -42,7 +41,6 @@
√
√
√
-
ἐφανέρωσεν δὲ καιροῖς ἰδίοις τὸν λόγον αὐτοῦ ἐν κηρύγματι ὃ ἐπιστεύθην ἐγὼ κατ᾽ ἐπιταγὴν τοῦ Σωτῆρος ἡμῶν Θεοῦ
@@ -66,7 +64,6 @@
√
√
√
-
Τίτῳ γνησίῳ τέκνῳ κατὰ κοινὴν πίστιν Χάρις καὶ εἰρήνη ἀπὸ Θεοῦ Πατρὸς καὶ Χριστοῦ Ἰησοῦ τοῦ Σωτῆρος ἡμῶν
@@ -87,7 +84,6 @@
√
√
√
-
Τούτου χάριν ἀπέλιπόν σε ἐν Κρήτῃ ἵνα τὰ λείποντα ἐπιδιορθώσῃ καὶ καταστήσῃς κατὰ πόλιν πρεσβυτέρους ὡς ἐγώ σοι διεταξάμην
@@ -112,7 +108,6 @@
√
√
√
-
εἴ τίς ἐστιν ἀνέγκλητος μιᾶς γυναικὸς ἀνήρ τέκνα ἔχων πιστά μὴ ἐν κατηγορίᾳ ἀσωτίας ἢ ἀνυπότακτα
@@ -134,7 +129,6 @@
√
√
√
-
Δεῖ γὰρ τὸν ἐπίσκοπον ἀνέγκλητον εἶναι ὡς Θεοῦ οἰκονόμον μὴ αὐθάδη μὴ ὀργίλον μὴ πάροινον μὴ πλήκτην μὴ αἰσχροκερδῆ
@@ -159,7 +153,6 @@
√
√
√
-
ἀλλὰ φιλόξενον φιλάγαθον σώφρονα δίκαιον ὅσιον ἐγκρατῆ
@@ -172,7 +165,6 @@
self-controlled
√
√
-
ἀντεχόμενον τοῦ κατὰ τὴν διδαχὴν πιστοῦ λόγου ἵνα δυνατὸς ᾖ καὶ παρακαλεῖν ἐν τῇ διδασκαλίᾳ τῇ ὑγιαινούσῃ καὶ τοὺς ἀντιλέγοντας ἐλέγχειν
@@ -199,7 +191,6 @@
√
√
√
-
Εἰσὶν γὰρ πολλοὶ καὶ ἀνυπότακτοι ματαιολόγοι καὶ φρεναπάται μάλιστα οἱ ἐκ τῆς περιτομῆς
@@ -216,7 +207,6 @@
√
√
√
-
οὓς δεῖ ἐπιστομίζειν οἵτινες ὅλους οἴκους ἀνατρέπουσιν διδάσκοντες ἃ μὴ δεῖ αἰσχροῦ κέρδους χάριν
@@ -233,7 +223,6 @@
√
√
√
-
εἶπέν τις ἐξ αὐτῶν ἴδιος αὐτῶν προφήτης Κρῆτες ἀεὶ ψεῦσται κακὰ θηρία γαστέρες ἀργαί
@@ -250,7 +239,6 @@
√
√
√
-
Ἡ μαρτυρία αὕτη ἐστὶν ἀληθής δι᾽ ἣν αἰτίαν ἔλεγχε αὐτοὺς ἀποτόμως ἵνα ὑγιαίνωσιν ἐν τῇ πίστει
@@ -272,7 +260,6 @@
√
√
√
-
μὴ προσέχοντες Ἰουδαϊκοῖς μύθοις καὶ ἐντολαῖς ἀνθρώπων ἀποστρεφομένων τὴν ἀλήθειαν
@@ -286,7 +273,6 @@
√
√
√
-
Πάντα καθαρὰ τοῖς καθαροῖς τοῖς δὲ μεμιαμμένοις καὶ ἀπίστοις οὐδὲν καθαρόν ἀλλὰ μεμίανται αὐτῶν καὶ ὁ νοῦς καὶ ἡ συνείδησις
@@ -312,7 +298,6 @@
√
√
√
-
Θεὸν ὁμολογοῦσιν εἰδέναι τοῖς δὲ ἔργοις ἀρνοῦνται βδελυκτοὶ ὄντες καὶ ἀπειθεῖς καὶ πρὸς πᾶν ἔργον ἀγαθὸν ἀδόκιμοι
@@ -335,7 +320,6 @@
√
√
√
-
@@ -351,7 +335,6 @@
√
√
√
-
Πρεσβύτας νηφαλίους εἶναι σεμνούς σώφρονας ὑγιαίνοντας τῇ πίστει τῇ ἀγάπῃ τῇ ὑπομονῇ
@@ -369,7 +352,6 @@
√
√
√
-
πρεσβύτιδας ὡσαύτως ἐν καταστήματι ἱεροπρεπεῖς μὴ διαβόλους μηδὲ οἴνῳ πολλῷ δεδουλωμένας καλοδιδασκάλους
@@ -387,7 +369,6 @@
√
√
√
-
ἵνα σωφρονίζωσιν τὰς νέας φιλάνδρους εἶναι φιλοτέκνους
@@ -400,7 +381,6 @@
√
√
√
-
σώφρονας ἁγνάς οἰκουργούς ἀγαθάς ὑποτασσομένας τοῖς ἰδίοις ἀνδράσιν ἵνα μὴ ὁ λόγος τοῦ Θεοῦ βλασφημῆται
@@ -419,7 +399,6 @@
√
√
√
-
Τοὺς νεωτέρους ὡσαύτως παρακάλει σωφρονεῖν
@@ -430,7 +409,6 @@
use good sense
√
√
-
Περὶ πάντα σεαυτὸν παρεχόμενος τύπον καλῶν ἔργων ἐν τῇ διδασκαλίᾳ ἀφθορίαν σεμνότητα
@@ -446,7 +424,6 @@
√
√
√
-
λόγον ὑγιῆ ἀκατάγνωστον ἵνα ὁ ἐξ ἐναντίας ἐντραπῇ μηδὲν ἔχων λέγειν περὶ ἡμῶν φαῦλον
@@ -463,7 +440,6 @@
√
√
√
-
Δούλους ἰδίοις δεσπόταις ὑποτάσσεσθαι ἐν πᾶσιν εὐαρέστους εἶναι μὴ ἀντιλέγοντας
@@ -479,7 +455,6 @@
√
√
√
-
μὴ νοσφιζομένους ἀλλὰ πᾶσαν πίστιν ἐνδεικνυμένους ἀγαθήν ἵνα τὴν διδασκαλίαν τὴν τοῦ Σωτῆρος ἡμῶν Θεοῦ κοσμῶσιν ἐν πᾶσιν
@@ -503,7 +478,6 @@
√
√
√
-
Ἐπεφάνη γὰρ ἡ χάρις τοῦ Θεοῦ σωτήριος πᾶσιν ἀνθρώποις
@@ -518,7 +492,6 @@
√
√
√
-
παιδεύουσα ἡμᾶς ἵνα ἀρνησάμενοι τὴν ἀσέβειαν καὶ τὰς κοσμικὰς ἐπιθυμίας σωφρόνως καὶ δικαίως καὶ εὐσεβῶς ζήσωμεν ἐν τῷ νῦν αἰῶνι
@@ -544,7 +517,6 @@
√
√
√
-
προσδεχόμενοι τὴν μακαρίαν ἐλπίδα καὶ ἐπιφάνειαν τῆς δόξης τοῦ μεγάλου Θεοῦ καὶ Σωτῆρος ἡμῶν Ἰησοῦ Χριστοῦ
@@ -566,7 +538,6 @@
√
√
√
-
ὃς ἔδωκεν ἑαυτὸν ὑπὲρ ἡμῶν ἵνα λυτρώσηται ἡμᾶς ἀπὸ πάσης ἀνομίας καὶ καθαρίσῃ ἑαυτῷ λαὸν περιούσιον ζηλωτὴν καλῶν ἔργων
@@ -589,7 +560,6 @@
√
√
√
-
Ταῦτα λάλει καὶ παρακάλει καὶ ἔλεγχε μετὰ πάσης ἐπιταγῆς μηδείς σου περιφρονείτω
@@ -607,7 +577,6 @@
√
√
√
-
@@ -627,7 +596,6 @@
√
√
√
-
μηδένα βλασφημεῖν ἀμάχους εἶναι ἐπιεικεῖς πᾶσαν ἐνδεικνυμένους πραΰτητα πρὸς πάντας ἀνθρώπους
@@ -642,7 +610,6 @@
√
√
√
-
Ἦμεν γάρ ποτε καὶ ἡμεῖς ἀνόητοι ἀπειθεῖς πλανώμενοι δουλεύοντες ἐπιθυμίαις καὶ ἡδοναῖς ποικίλαις ἐν κακίᾳ καὶ φθόνῳ διάγοντες στυγητοί μισοῦντες ἀλλήλους
@@ -669,7 +636,6 @@
√
√
√
-
Ὅτε δὲ ἡ χρηστότης καὶ ἡ φιλανθρωπία ἐπεφάνη τοῦ Σωτῆρος ἡμῶν Θεοῦ
@@ -687,7 +653,6 @@
√
√
√
-
οὐκ ἐξ ἔργων τῶν ἐν δικαιοσύνῃ ἃ ἐποιήσαμεν ἡμεῖς ἀλλὰ κατὰ τὸ αὐτοῦ ἔλεος ἔσωσεν ἡμᾶς διὰ λουτροῦ παλινγενεσίας καὶ ἀνακαινώσεως Πνεύματος Ἁγίου
@@ -716,7 +681,6 @@
√
√
√
-
οὗ ἐξέχεεν ἐφ᾽ ἡμᾶς πλουσίως διὰ Ἰησοῦ Χριστοῦ τοῦ Σωτῆρος ἡμῶν
@@ -733,7 +697,6 @@
√
√
√
-
ἵνα δικαιωθέντες τῇ ἐκείνου χάριτι κληρονόμοι γενηθῶμεν κατ᾽ ἐλπίδα ζωῆς αἰωνίου
@@ -750,7 +713,6 @@
√
√
√
-
Πιστὸς ὁ λόγος καὶ περὶ τούτων βούλομαί σε διαβεβαιοῦσθαι ἵνα φροντίζωσιν καλῶν ἔργων προΐστασθαι οἱ πεπιστευκότες Θεῷ ταῦτά ἐστιν καλὰ καὶ ὠφέλιμα τοῖς ἀνθρώποις
@@ -778,7 +740,6 @@
√
√
√
-
Μωρὰς δὲ ζητήσεις καὶ γενεαλογίας καὶ ἔρεις καὶ μάχας νομικὰς περιΐστασο εἰσὶν γὰρ ἀνωφελεῖς καὶ μάταιοι
@@ -800,7 +761,6 @@
√
√
√
-
αἱρετικὸν ἄνθρωπον μετὰ μίαν καὶ δευτέραν νουθεσίαν παραιτοῦ
@@ -811,7 +771,6 @@
√
√
√
-
εἰδὼς ὅτι ἐξέστραπται ὁ τοιοῦτος καὶ ἁμαρτάνει ὢν αὐτοκατάκριτος
@@ -826,7 +785,6 @@
√
√
√
-
Ὅταν πέμψω Ἀρτεμᾶν πρὸς σὲ ἢ Τυχικόν σπούδασον ἐλθεῖν πρός με εἰς Νικόπολιν ἐκεῖ γὰρ κέκρικα παραχειμάσαι
@@ -849,7 +807,6 @@
√
√
√
-
Ζηνᾶν τὸν νομικὸν καὶ Ἀπολλῶν σπουδαίως πρόπεμψον ἵνα μηδὲν αὐτοῖς λείπῃ
@@ -866,7 +823,6 @@
√
√
√
-
μανθανέτωσαν δὲ καὶ οἱ ἡμέτεροι καλῶν ἔργων προΐστασθαι εἰς τὰς ἀναγκαίας χρείας ἵνα μὴ ὦσιν ἄκαρποι
@@ -888,7 +844,6 @@
√
√
√
-
Ἀσπάζονταί σε οἱ μετ᾽ ἐμοῦ πάντες Ἄσπασαι τοὺς φιλοῦντας ἡμᾶς ἐν πίστει Ἡ χάρις μετὰ πάντων ὑμῶν
@@ -909,7 +864,6 @@
√
√
√
-
diff --git a/Tagged_OGNT/Tag_OGNT.pl b/Tagged_OGNT/Tag_OGNT.pl
index f9e59c0..aba42da 100644
--- a/Tagged_OGNT/Tag_OGNT.pl
+++ b/Tagged_OGNT/Tag_OGNT.pl
@@ -1,5 +1,6 @@
# Takes current tW entries and populates tagged OGNT XML
# This is the current best version
+# It takes care of all entries but doesn't account for USFM codes in ULB
# Trying to get it to work with repeated instances of same word.
use 5.12.0;
use File::Slurp;
@@ -105,14 +106,16 @@ sub ProcessXML {
my ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount);
while (my $thisLine = <$thisFile>) {
chomp $thisLine;
+ if ($thisLine =~ //) {
- say LOG "\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesNotMatched\n$linesNotMatched\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText";
+ say LOG "<11>\n\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText";
- ($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText, $linesNotMatched);
+ ($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText);
+ say LOG "<14>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
my %orderedLine;
$matchedLines =~ s/\n{2,}/\n/gs;
- say LOG "===\nBefore \$matchedLines:\n$matchedLines\n===";
+ say LOG "<15\tBefore sort of \$matchedLines:\n$matchedLines\n";
while ($matchedLines =~ /([^◊]*)◊(\d*)\n/g) {
$orderedLine{$2} = $1;
say LOG "<5>\t\$2: $2\t\$1: $1";
@@ -123,16 +126,17 @@ sub ProcessXML {
$matchedLines .= "$orderedLine{$line}\n"
}
chomp $matchedLines;
- say LOG "===\nAfter \$matchedLines:\n$matchedLines\n===";
+ say LOG "<16>\tAfter sort of \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
$residueText =~ s/(^q | q$)//g;
$residueText =~ s/ {3,}/ /g;
$linesNotMatched =~ s/\n+$//;
+ $linesToSkip =~ s/\n+$//;
$matchedLines =~ s/^\n+//;
say OUT "\t\t\t\t\t$greekText";
say OUT "\t\t\t\t\t$staticText";
say OUT "\t\t\t\t\t$residueText";
say OUT "$matchedLines";
- say OUT "$linesNotMatched" if ($linesNotMatched =~ /^.+$/);# This isn't giving any output
+ say OUT "$linesNotMatched" if ($linesNotMatched =~ /^.+$/);
say OUT "$linesToSkip" if ($linesToSkip);
say OUT "$thisLine";
($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount) = ();
@@ -190,7 +194,8 @@ sub ProcessRelevantSNs {
($thisLine, $residueText, $linesNotMatched) = MatchAndPlace($line, $thisSN, $staticText, $residueText, $linesNotMatched);
$thisLine =~ s/[ \t]+$//;
$matchedLines .= $thisLine . "\n";
- say LOG "+++\n\$matchedLines\n$matchedLines\n+++"
+ $matchedLines =~ s/\n{2,}$/\n/s;
+ say LOG "<13>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched+++"
}
}
return ($matchedLines, $residueText, $linesNotMatched);
@@ -284,31 +289,6 @@ sub FixWorkText {
}
return ($text)
}
-sub Match {
- my ($line, $text, $worktext, $entry, $workEntry) = @_;
- my $outputLines;
-# say LOG "---\n\$line:\n$line\n\$text\n$text\n\$workText\n$workText\n\$entry: $entry\t\$workEntry: $workEntry";
- say LOG "---\nBefore search, new $workText\$workText\n$workText\n\$entry: $entry\t\$workEntry: $workEntry";
- my $found;
- if ($workText =~ /\b$workEntry\b/p) {
- $workText = "${^PREMATCH} ${^POSTMATCH}";
- say LOG "<7.1>\tAfter found, new \$workText: $workText";
- $found = 1;
- #say LOG "<7>\$workEntry: $workEntry\nNow looking for |$workEntry| in\n$text";
- if ($text =~ /^(.*?)\b$workEntry\b/) {
- my $place = length $1;
- $line =~ s/>>$entry;
- $outputLines .= "$place◊$line\t";
- #say LOG "<8>Found.\t \$place: $place\t\$outputLineOrder{$place}: $outputLineOrder{$place}\n";
- }
- }
- else {
- #say LOG "\$workEntry $workEntry not found"
- }
- #say LOG "---";
- return ($found, $outputLines, $workText);
-}
-
sub Read_ULB_File {
$ULBText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
diff --git a/Tagged_OGNT/Tag_OGNT_copy_3.pl b/Tagged_OGNT/Tag_OGNT_copy_3.pl
new file mode 100644
index 0000000..6bd0a48
--- /dev/null
+++ b/Tagged_OGNT/Tag_OGNT_copy_3.pl
@@ -0,0 +1,363 @@
+# Takes current tW entries and populates tagged OGNT XML
+# This is the current best version
+# It takes care of all entries but doesn't account for USFM codes in ULB
+# Trying to get it to work with repeated instances of same word.
+use 5.12.0;
+use File::Slurp;
+use File::Find ;
+use Cwd ;
+use utf8;
+#use open IN => ":utf8", OUT => ":utf8";
+use open IO => ":utf8";
+$" = "\n";
+
+mkdir "Logs";
+open(LOG, ">Logs/tW_pairs.txt") or die "$!";
+
+my $topDirULB = "/Users/Henry/Documents/WACS/en_ulb";
+#my $topDirOGNT = "/Users/Henry/Google Drive/WA/Tagged_OGNT/OGNT_for_tagging";
+my $topDirOGNT = "/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Tag_test";
+#my $topDirtW = "/Users/Henry/Documents/WACS/Restructure/bible";
+my $topDirtW = "/Users/Henry/Documents/WACS/en_tw/bible";
+my ($outDir, $outFile) = ("/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Auto-tagged", "");
+my ($ULBText, $workText);
+my ($file);
+my (%ULBtextThisVerse, %SNsThisVerse, %entriesThisSN, %longName);
+
+my @OGNTfilesToRun = ();
+my $filePattern = '.xml' ;
+find( sub { push @OGNTfilesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDirOGNT) ;
+
+my @tWfilesToRun = ();
+$filePattern = '.md' ;
+find( sub { push @tWfilesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDirtW) ;
+
+Read_tW_Files();
+
+close LOG;
+open(LOG, ">Logs/Log.txt") or die "$!";
+
+LongBookNames();
+Read_ULB_File();
+ProcessXML();
+# put unused SN at end of verse
+
+close LOG;
+
+say "\nDone.";
+# =====
+sub Read_tW_Files {
+ foreach $file ( @tWfilesToRun ) {
+ #say LOG $file;
+ my (@sns);
+ my $entries;
+ my $fileText = read_file("$file", binmode => 'utf8');
+ if ($fileText =~ /\* Strong's: ([^\n]*)\n/) {
+ my $sns = $1;
+ #say LOG "\t$sns";
+ @sns = split /, /, $sns;
+ }
+ if ($fileText =~ /Forms Found in the English ULB:\n\n([^\n]*)\n/) {
+ $entries = $1;
+ die "$fileText" if $entries eq "";
+ #say LOG "\t\t$entries"
+ }
+ foreach my $sn (@sns) {
+ $entriesThisSN{$sn} .= $entries . ", ";
+ #say LOG "\t\t\t$sn: $entriesThisSN{$sn}"
+ }
+ }
+ foreach my $sn (sort keys %entriesThisSN) {
+ #say LOG "$sn: $entriesThisSN{$sn}";
+ my @entries = split /, /, $entriesThisSN{$sn};
+ @entries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
+ || length($a) <=> length($b)
+ || $a <=> $b }
+ @entries;
+ $entriesThisSN{$sn} = "";
+ foreach my $slice (@entries) {
+ $entriesThisSN{$sn} .= "$slice, "
+ }
+ $entriesThisSN{$sn} =~ s/, $//;
+ say LOG "$sn: $entriesThisSN{$sn}";
+ }
+
+}
+sub LongBookNames {
+ while () {
+ chomp;
+ if (/([^\t]*)\t([^\t]*)\t([^\t]*)/) {
+ $longName{$2} = $3
+ }
+ }
+}
+sub ProcessXML {
+# foreach XML file
+ foreach my $file (@OGNTfilesToRun) {
+ my $greekText;
+ my $fileGist;
+ if ($file =~ /(......\.xml)/) {
+ $fileGist = $1;
+ }
+ say LOG $file . "\t" . $fileGist;
+ open(OUT, ">$outDir/$fileGist") or die "$outDir/$fileGist: $!";
+ my ($pre, $gist, $post, $bk, $ch, $vs, $thisVerse, $staticText, $residueText, $matchedLines, $flag, $thisVerseForOutput, $linesWithRelevantSNs, $linesNotMatched, $orderedOutputLines, $linesToSkip);
+ open (my $thisFile, "<:utf8", "$file") or die "$file:\n$!";
+ my ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount);
+ while (my $thisLine = <$thisFile>) {
+ chomp $thisLine;
+ if ($thisLine =~ //) {
+ say LOG "<11>\n\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText";
+
+ ($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText);
+ say LOG "<14>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
+ my %orderedLine;
+ $matchedLines =~ s/\n{2,}/\n/gs;
+ say LOG "<15\tBefore sort of \$matchedLines:\n$matchedLines\n";
+ while ($matchedLines =~ /([^◊]*)◊(\d*)\n/g) {
+ $orderedLine{$2} = $1;
+ say LOG "<5>\t\$2: $2\t\$1: $1";
+ }
+ $matchedLines = "";
+ foreach my $line (sort {$a <=> $b} keys %orderedLine) {
+ say LOG "\$line: $line\t\$orderedLine{$line}: $orderedLine{$line}";
+ $matchedLines .= "$orderedLine{$line}\n"
+ }
+ chomp $matchedLines;
+ say LOG "<16>\tAfter sort of \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
+ $residueText =~ s/(^q | q$)//g;
+ $residueText =~ s/ {3,}/ /g;
+ $linesNotMatched =~ s/\n+$//;
+ $matchedLines =~ s/^\n+//;
+ say OUT "\t\t\t\t\t$greekText";
+ say OUT "\t\t\t\t\t$staticText";
+ say OUT "\t\t\t\t\t$residueText";
+ say OUT "$matchedLines";
+ say OUT "$linesNotMatched" if ($linesNotMatched =~ /^.+$/);
+ say OUT "$linesToSkip" if ($linesToSkip);
+ say OUT "$thisLine";
+ ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount) = ();
+ ($thisVerseForOutput, $flag, $workText, $greekText, $linesNotMatched, $linesToSkip, $residueText, $orderedOutputLines, $linesWithRelevantSNs) = ();
+ ($linesToSkip) = ("");
+ }
+ elsif ($thisLine =~ /([^\n<>]*)) {
+ $greekText .= $1 . " "
+ }
+ $thisLine =~ s/(([^<]*)(<\/w>)/$1 text="$2">$3/;
+ if ($thisLine =~ /lemma="(G\d+)"/) {
+ my $thisLemma = $1;
+ if (exists $entriesThisSN{$thisLemma}) {
+ $rsnCount ++;
+ $linesWithRelevantSNs .= $thisLine . "\n";
+ #say LOG "\$thisLemma: $thisLemma; line pushed to \@LinesWithRelevantSNs:\n--\n=>@LinesWithRelevantSNs\n--";
+ }
+ else {
+ $skipCount ++;
+ $thisLine =~ s/><\/w>/>√<\/w>/;
+ $linesToSkip .= "$thisLine\n";
+ #say LOG "\$thisLemma: $thisLemma; line pushed to \@LinesToSkip";
+ }
+ }
+ #say LOG "$thisLine";
+ }
+ elsif ($thisLine =~ //) {
+ #say LOG $thisLine;
+ ($bk, $ch, $vs) = ($1,$2,$3);
+ ($thisVerse, $greekText) = ("$longName{$bk} $ch:$vs", "");
+ if ($ULBText =~ /$thisVerse\t([^\n]*)\n/) {
+ $staticText = $1;
+ $residueText = "q $staticText q";
+ }
+ say OUT $thisLine;
+ ($flag) = (1);
+ }
+ else {say OUT $thisLine}
+ }
+
+ close $thisFile;
+ close OUT;
+ }
+}
+sub ProcessRelevantSNs {
+ my ($relevantLines, $staticText, $residueText, $linesNotMatched) = (@_);
+ my ($matchedLines, $thisLine);
+ my @relevantLines = split /\n/, $relevantLines;
+ foreach my $line (@relevantLines) {
+ if ($line =~ /lemma="(G\d+)"/) {
+ my $thisSN = $1;
+ say LOG "\$line: $line, \$thisSN: $thisSN, \$entriesThisSN{$thisSN}\n$entriesThisSN{$thisSN}";
+ ($thisLine, $residueText, $linesNotMatched) = MatchAndPlace($line, $thisSN, $staticText, $residueText, $linesNotMatched);
+ $thisLine =~ s/[ \t]+$//;
+ $matchedLines .= $thisLine . "\n";
+ $matchedLines =~ s/\n{2,}$/\n/s;
+ say LOG "<13>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched+++"
+ }
+ }
+ return ($matchedLines, $residueText, $linesNotMatched);
+}
+sub MatchAndPlace {
+ my ($line, $sn, $staticText, $workText, $linesNotMatched) = @_;
+ #say LOG "<8>\t\$line: $line \$sn: $sn \$workText\n$workText";
+ my ($workEntry, $found, $matchedLines, $first, $second, $third, $firstLen, $secondLen, $thirdLen);
+ my @entries = split /, /, $entriesThisSN{$sn};
+ foreach my $entry (@entries) {
+ my $entryType;
+ if ($entry =~ /^(.*) \.\.\. (.*) \.\.\. (.*)$/) {
+ ($first, $second, $third) = ($1, $2, $3);
+ ($firstLen, $secondLen, $thirdLen) = (length $first, length $second, length $third);
+ $workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b" . ".*?" . "\\b" . $third;
+ say LOG "<1a>\t\$first: $first, \$second: $second, \$third: $third, \$firstLen: $firstLen, \$secondLen,: $secondLen, \$thirdLen: $thirdLen \$entry: |$entry|\t\$workEntry: |$workEntry|";
+ $entryType = 1;
+ }
+ elsif ($entry =~ /^(.*) \.\.\. (.*)$/) {
+ ($first, $second) = ($1, $2);
+ ($firstLen, $secondLen) = (length $first, length $second);
+ $workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b";
+ say LOG "<2a>\t\$first: $first, \$second: $second, \$third: $third, \$firstLen: $firstLen, \$secondLen,: $secondLen, \$entry: |$entry|\t\$workEntry: |$workEntry|";
+ $entryType = 2;
+ }
+ else {$workEntry = $entry;}
+
+ my $foundText;
+ #say LOG "<8.1>\t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
+ if ($workText =~ /\b$workEntry\b/p) {
+ say LOG "<8.1>Found: \t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
+ ($foundText, $workText) = ($&, "${^PREMATCH}ı${^POSTMATCH}");
+ my ($place, $foundTextLength, $replacementSpaces) = (length ${^PREMATCH}, length $foundText, "");
+ $line =~ s/>>$entry;
+ while (length $replacementSpaces < $foundTextLength) {$replacementSpaces .= " "}
+
+ if ($entryType) {
+
+ say LOG "<8.2>\n\$workText,: $workText, \$matchedLines:\n$matchedLines ";
+
+ ($workText) = FixWorkText($line, $workText, $workEntry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen);
+
+ say LOG "<8.3>\n\$workText:\n$workText\n\$matchedLines:\n$matchedLines";
+
+ }
+
+ else {$workText =~ s/ı/$replacementSpaces/;}
+
+ $matchedLines .= "$line◊$place";
+
+ say LOG "<8.4>\tAfter found, new \$workText:\n$workText";
+ $found = 1;
+ }
+ else {
+ #say LOG "\$workEntry $workEntry not found"
+ }
+ if ($found) {
+ last
+ }
+ }
+ unless ($found) {
+ $line =~ s/>>?;
+ $linesNotMatched .= "$line\n"
+ }
+ return ($matchedLines, $workText, $linesNotMatched)
+}
+sub FixWorkText {
+ my ($thisLine, $text, $entry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen) = @_;
+ my ($firstSpace, $secondSpace, $thirdSpace);
+ while (length $firstSpace < $firstLen) {$firstSpace .= " "}
+ while (length $secondSpace < $secondLen) {$secondSpace .= " "}
+ while (length $thirdSpace < $thirdLen) {$thirdSpace .= " "}
+
+ say LOG "<9>\$text:\n$text\n\t\t\$entry: $entry \$foundText: $foundText\t \$foundTextLength: $foundTextLength\t\$first: $first\t\$second: $second\t\$third: $third\n\$firstSpace: $firstSpace\t\$secondSpace: $secondSpace\t\$thirdSpace: $thirdSpace";
+ if ($third) {
+ if ($foundText =~ /$first(.*)$second(.*)$third/) {
+ my ($firstGap, $secondGap) = ($1, $2);
+ my $repText = "$firstSpace$firstGap$secondSpace$secondGap$thirdSpace";
+ say LOG "<9.1> \$repText: $repText";
+ $text =~ s/ı/$repText/;
+ }
+ }
+ else {
+ if ($foundText =~ /$first(.*)$second/) {
+ my ($firstGap) = ($1);
+ say LOG "<9.2>\t\$firstSpace: |$firstSpace|\t\$firstGap: |$firstGap|\t\$secondSpace: |$secondSpace|";
+ my $repText ="$firstSpace$firstGap$secondSpace";
+ say LOG "<9.3> \$repText: |$repText|";
+ $text =~ s/ı/$repText/;
+ }
+ }
+ return ($text)
+}
+sub Read_ULB_File {
+
+ $ULBText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
+
+}
+
+__DATA__
+01 gen Genesis
+02 exo Exodus
+03 lev Leviticus
+04 num Numbers
+05 deu Deuteronomy
+06 jos Joshua
+07 jdg Judges
+08 rut Ruth
+09 1sa 1 Samuel
+10 2sa 2 Samuel
+11 1ki 1 Kings
+12 2ki 2 Kings
+13 1ch 1 Chronicles
+14 2ch 2 Chronicles
+15 ezr Ezra
+16 neh Nehemiah
+17 est Esther
+18 job Job
+19 psa Psalms
+20 pro Proverbs
+21 ecc Ecclesiastes
+22 sng Song of Solomon
+23 isa Isaiah
+24 jer Jeremiah
+25 lam Lamentations
+26 ezk Ezekiel
+27 dan Daniel
+28 hos Hosea
+29 jol Joel
+30 amo Amos
+31 oba Obadiah
+32 jon Jonah
+33 mic Micah
+34 nam Nahum
+35 hab Habakkuk
+36 zep Zephaniah
+37 hag Haggai
+38 zec Zechariah
+39 mal Malachi
+41 mat Matthew
+42 mrk Mark
+43 luk Luke
+44 jhn John
+45 act Acts
+46 rom Romans
+47 1co 1 Corinthians
+48 2co 2 Corinthians
+49 gal Galatians
+50 eph Ephesians
+51 php Philippians
+52 col Colossians
+53 1th 1 Thessalonians
+54 2th 2 Thessalonians
+55 1ti 1 Timothy
+56 2ti 2 Timothy
+57 tit Titus
+58 phm Philemon
+59 heb Hebrews
+60 jas James
+61 1pe 1 Peter
+62 2pe 2 Peter
+63 1jn 1 John
+64 2jn 2 John
+65 3jn 3 John
+66 jud Jude
+67 rev Revelation