Now needed for tagging

2020-07-28 17:34:00 -04:00 · 2020-07-28 17:34:00 -04:00 · 91d789da65
parent 13b8c8c948
commit 91d789da65
8 changed files with 1315 additions and 0 deletions
--- a/Build_OL_files_from_XML.pl
+++ b/Build_OL_files_from_XML.pl
@ -0,0 +1,180 @@
+# Builds easily searchable file from current OGNT and MAST-HB XML file
+# Takes verse at a time from slurped file
+# Useful for Mine routine building MAST PDF
+use 5.18.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+use utf8;
+#use open IN => ":utf8", OUT => ":utf8";
+use open IO => ":utf8";
+open LOG, ">:utf8", "Logs/log.txt" or die;
+open OUT, ">:utf8", "Output/Original_languages.txt" or die;
+
+my (@folders) = ("/Users/Henry/Documents/WACS/MAST_HB", "/Users/Henry/Documents/WACS/OGNT");
+my (%order, %long);
+my $outText;
+
+while (<DATA>) {
+	chomp;
+	if (/^([^\t]*)\t([^\t]*)\t(.*)$/) {
+		$order{$1} = $3;
+		$long{$2} = $3;
+	}
+}
+#foreach my $key (sort keys %long) {
+#	say LOG $key . "\t" . $long{$key};
+#}
+
+foreach my $folder (@folders) {
+	say LOG "$folder";
+	#system "cd $folder;xml val *.xml;echo 'Continue? (Control + C to quit, Enter to continue)';read name;";
+	my ($topDir, $lang) = ($folder, "H");
+	
+	if ($folder =~ /OGNT/) {$lang = "G"}
+	
+	my @filesToRun = ();
+	my $filePattern = '*.xml' ; 
+	find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
+	@filesToRun = sort @filesToRun;
+	foreach  my $file ( @filesToRun  ) {
+	   say LOG $file;
+	   my $fileText = read_file("$file", binmode => 'utf8');
+	   my ($bk, $ch, $vs, $lemma, $word, $nbk, $nch, $nvs, $previous, $current, $interruption, $next, $verse, $thisBookText, $prevVsText, $holdText, $thisVsText, $nextVsText, $oldHold, $shortCur, $shortIntr);
+	   while ($fileText =~ /<verse osisID="((.*?)\.(\d+)\.(\d+))"(\n|.)*?<\/verse>/spg) {
+			$verse = $&;
+			say LOG "\$1: $1, \$2: $2, \$3: $3, \$4: $4";
+			($shortCur, $bk, $ch, $vs) = ($1, $long{$2}, $3, $4);
+			$previous = $current;
+			$current = "$bk $ch:$vs";
+			say LOG "<0>\t\$current: $current";
+			my $verseText;
+			
+   			if ($verse =~ /<note>KJV:(([^\.]*)\.([^\.]*).([^<]*))<\/note>/p) { # Occurs only in OT
+				say LOG "<1>\t$&";
+				($shortIntr, $nbk, $nch, $nvs) = ($1, $long{$2}, $3, $4);
+				$interruption = "$nbk $nch:$nvs";
+				say LOG "<2>\t\$interruption: $interruption (of $current)";
+				if ($verse =~ /<verse osisID="$shortCur">\n[^<\n]*<note>KJV:$shortIntr<\/note>/) { # Complete renumber of verse
+					say LOG "<3>\t$&";
+					$current = $interruption;
+					$verseText = GetContent($verse);
+					$verseText = "$current\t$oldHold$verseText";
+					$oldHold = "";
+				}
+				elsif ($interruption ne $current && $verse =~ /<note>KJV:([^\.]*)\.([^\.]*).([^<]*)<\/note>/p) { # New verse begins here
+					say LOG "<4>\t$&";
+					($thisVsText, $nextVsText) = (${^PREMATCH}, ${^POSTMATCH});
+		   			$thisVsText = GetContent($thisVsText);
+					$nextVsText = GetContent($nextVsText);
+					$outText .= "$oldHold\n$current\t$thisVsText ";
+					$oldHold = "$nextVsText ";
+				}
+				elsif ($interruption eq $current && $verse =~ /<note>KJV:([^\.]*)\.([^\.]*).([^<]*)<\/note>/p) { # Previous verse continues here
+					say LOG "<5>\t$&";
+					($prevVsText, $thisVsText) = (${^PREMATCH}, ${^POSTMATCH});
+					$prevVsText = GetContent($prevVsText);
+					$thisVsText = GetContent($thisVsText);
+					$verseText .= "$oldHold\n$current\t$thisVsText";
+					$oldHold = "";
+				}
+			}
+			else {
+				# The whole verse should be processed in one piece
+				#$verseText = GetContent($verse);
+				#$verseText = "$current\$tverseText"
+			}
+			#$thisBookText .= "\n$verseText";
+			#$oldHold = $holdText
+	   }
+	   #$thisBookText =~ s/</<$lang/g;
+	   #$outText .= "$thisBookText\n";
+	} 
+
+
+}
+
+say OUT $outText;
+
+close OUT;
+close LOG;
+
+print "\n\tDone.";
+
+sub GetContent {
+	my ($text, $returnText) = ($_[0], "");
+	while ($text =~ /<w lemma="([^"]*)"[^>]*>([^<]*)<\/w>/) {
+		my ($lemma, $OL) = ($1, $2);
+		$lemma =~ s/[^\d"]*(\d+)[^\d"]*/$1/;
+		$returnText .= "$OL <$lemma> "
+	}
+	return $returnText
+}
+
+__DATA__
+01	gen	Genesis
+02	exo	Exodus
+03	lev	Leviticus
+04	num	Numbers
+05	deu	Deuteronomy
+06	jos	Joshua
+07	jdg	Judges
+08	rut	Ruth
+09	1sa	1 Samuel
+10	2sa	2 Samuel
+11	1ki	1 Kings
+12	2ki	2 Kings
+13	1ch	1 Chronicles
+14	2ch	2 Chronicles
+15	ezr	Ezra
+16	neh	Nehemiah
+17	est	Esther
+18	job	Job
+19	psa	Psalms
+20	pro	Proverbs
+21	ecc	Ecclesiastes
+22	sng	Song of Solomon
+23	isa	Isaiah
+24	jer	Jeremiah
+25	lam	Lamentations
+26	ezk	Ezekiel
+27	dan	Daniel
+28	hos	Hosea
+29	jol	Joel
+30	amo	Amos
+31	oba	Obadiah
+32	jon	Jonah
+33	mic	Micah
+34	nam	Nahum
+35	hab	Habakkuk
+36	zep	Zephaniah
+37	hag	Haggai
+38	zec	Zechariah
+39	mal	Malachi
+41	mat	Matthew
+42	mrk	Mark
+43	luk	Luke
+44	jhn	John
+45	act	Acts
+46	rom	Romans
+47	1co	1 Corinthians
+48	2co	2 Corinthians
+49	gal	Galatians
+50	eph	Ephesians
+51	php	Philippians
+52	col	Colossians
+53	1th	1 Thessalonians
+54	2th	2 Thessalonians
+55	1ti	1 Timothy
+56	2ti	2 Timothy
+57	tit	Titus
+58	phm	Philemon
+59	heb	Hebrews
+60	jas	James
+61	1pe	1 Peter
+62	2pe	2 Peter
+63	1jn	1 John
+64	2jn	2 John
+65	3jn	3 John
+66	jud	Jude
+67	rev	Revelation
--- a/Build_ULB_XML_for_Tagging.pl
+++ b/Build_ULB_XML_for_Tagging.pl
@ -0,0 +1,126 @@
+# Creates workable ULB.xml file that has all USFM markers in place.
+
+use 5.18.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+use utf8;
+#use open IN => ":utf8", OUT => ":utf8";
+use open IO => ":utf8";
+
+open(LOG, ">Logs/Log.txt") or die "$!";
+open(OUT, ">/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml") or die "$!";
+say OUT "<xml>";
+
+my ($topDir, $outDir) = ("/Users/Henry/Documents/WACS/en_ulb", "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml");
+
+my @filesToRun = ();
+my $filePattern = '\.usfm' ;
+#my $filePattern = '67-REV\.usfm' ;
+my $file;
+find( sub { push @filesToRun, $File::Find::name  if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
+
+@filesToRun = sort @filesToRun;
+
+ReadFiles();
+
+say OUT "</xml>";
+close OUT;
+close LOG;
+
+say "\nDone.";
+# =====
+sub ReadFiles {
+
+	foreach  $file ( @filesToRun  ) {
+		say $file;
+		my @array;
+		my $fileText = read_file("$file", binmode => 'utf8');
+		$fileText =~ s/[ \n]+$//;
+		say LOG "|$fileText|";
+
+#Delete \n
+		my ($book, $chap, $vers, $chapStart);
+		if ($fileText =~ /\\h ([^\n]*)/) {
+			$book = $1
+		}
+		#say LOG $book;
+		$fileText =~ s/\n/ /g;
+		$fileText =~ s/  / /g;
+		$fileText =~ s/\\s5/\n$&/g;
+		$fileText =~ s/\\v/√/g;
+		while ($fileText =~ s/(√[^√\n]*)(√)/$1\n$2/) {}
+		$fileText =~ s/√/\\v/g;
+		$fileText =~ s/(\\id[^\n]*)\n/\t\t<heading>$1<\/heading>\n/;
+		$fileText =~ s/ +\n/\n/g;
+		$fileText =~ s/(\\(q\d?|pi?|m|n?b))\n/\n$1 /g;
+		#say LOG $fileText;
+		@array = split /\n/, $fileText;
+		$fileText = "";
+		foreach my $line (@array) {
+			chomp;
+			if ($line =~ /<book name="(.*?)">/) {$book = $1;}
+			if ($line =~ /\\c (\d+).* \\v (\d+)/) {
+				($chap, $vers) = ($1, $2);
+				$line = "\t\t<chapter name=\"$book $chap\">\n\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>";
+				$line = "\t\t</chapter>\n$line" if $chapStart;
+				$chapStart = 1;
+			}
+			elsif ($line =~ /\\v (\d+)/) {
+				$vers = $1;
+				$line = "\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>"
+			}
+			#say LOG "===\n<AA>\n$line";
+			$line =~ s/(<verse[^>]*>)(.*\\v \d+ )(.*)(<\/verse>)/$1\n\t\t\t\t<preText>$2<\/preText>\n\t\t\t\t<text>$3<\/text>\n\t\t\t$4/s;
+			#say LOG "===\n<BB>\n$line";
+			if ($line =~ /<text>.*<\/text>/p) {
+				say LOG "<-0>\t$line";
+				my ($pre, $match, $post) = (${^PREMATCH}, ${^MATCH}, ${^POSTMATCH});
+				#say LOG "<-1>\t\$pre: $pre,\n\$match: $match,\n\$post: $post";
+				$match = TagInternalUSFM ($match);
+				$line = $pre . $match . $post;
+			}
+			
+			say LOG "---\n<CC>\n$line\n===";
+			$line =~ s# +</#</#g;
+			$fileText .= $line . "\n";
+		}
+		say OUT "\t<book name=\"$book\">\n$fileText\t\t</chapter>\n\t</book>";
+	} 
+	
+}
+
+sub TagInternalUSFM {
+	my ($line, $placeNum) = ($_[0], 1);
+	my %places;
+	#say LOG "Tagging internal USFM in \$line $line.";
+	while ($line =~ /(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/g) {
+		#say LOG "<+1>\t$2";
+		$line =~ s/(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
+		$places{$placeNum} = $2;
+		$placeNum ++;
+	}
+	#say LOG "<+2>\t$line";
+	while ($line =~ /(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/g) {
+		#say LOG "<+3>\t$2";
+		$line =~ s/(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
+		$places{$placeNum} = $2;
+		$placeNum ++;
+	}
+	#say LOG "<+4>\t$line";
+	while ($line =~ /(<text>.*)(\\([bm]|pi?|q\d?|s2))( .*<\/text>)/g) {
+		#say LOG "<+5>\t$2";
+		$line =~ s/(<text>.*)(\\([bm]|pi?|q\d?|s2))(.*<\/text>)/$1<place number="$placeNum"\/>$4/;
+		$places{$placeNum} = $2;
+		$placeNum ++;
+	}
+	#say LOG "<+6>\t$line";
+	$line =~ s/  / /g;
+	#say LOG "<+7>\t$line";
+	foreach my $place (sort keys %places) {
+		#say LOG "<+8>\tReplacing <place number=\"$place\"\/> with <usfm>$places{$place}<\/usfm> in\n$line.";
+		unless ($line =~ s/<place number="$place"\/>/<usfm>$places{$place}<\/usfm>/) {die}
+	}
+	say LOG "<+9>\t$line";
+	return $line;
+}
--- a/Check_ULB.pl
+++ b/Check_ULB.pl
@ -0,0 +1,113 @@
+# Checks ULB in ULB.xml against tagged ULB
+use 5.18.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+use utf8;
+#use open IN => ":utf8", OUT => ":utf8";
+use open IO => ":utf8";
+
+open(LOG, ">Logs/Log.txt") or die "$!";
+
+my ($ULBxml, $taggedULBDir) = ("/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/ULB_xml/ULB.xml", "/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Manual_Tagging");
+my (@filesToRun) = ();
+my %fullName;
+my $filePattern = "\.xml" ;
+my $file;
+
+my $xmlText = read_file("$ULBxml", binmode => 'utf8');
+
+GetBooksToCheck();
+Compare();
+
+sub Compare {
+	foreach my $file (@filesToRun) {
+		
+		say LOG "|$file|, |$taggedULBDir/$file.xml|";
+		my $taggedText = read_file("$taggedULBDir/$file.xml", binmode => 'utf8');
+		
+		GetGist($file, $taggedText);
+	}
+}
+	
+sub GetGist {
+	my ($fileName, $wholeTaggedText) = @_;
+	my ($verseRef, $standard, $tagged);
+	say LOG "|$fileName|, |$fullName{$fileName}|";
+	while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<w ((.|\n)*?)\n)*)\t+<\/verse>/sg) {
+		my ($preText, $gist) = ($4, $7);
+		$verseRef = $1;
+		say LOG "\$verseRef: |$verseRef|";
+		if ($xmlText =~ /<verse name="$verseRef">\n\t+<preText>([^\n]*)<\/preText>\n\t+<text>([^\n]*)<\/text>\n\t+<\/verse>/s) {
+			my ($standardPT, $standardT) = ($1, $2);
+			($tagged) = Untag($preText, $gist);
+			#say LOG $tagged;
+			$standard = $standardPT . " " . $standardT;
+			$standard =~ s/<[^<>]*>//g;
+			$standard =~ s/ {2,}/ /g;
+			$standard =~ s/ +$//;
+			if ($standard ne $tagged) {
+				say LOG "\nMISMATCH:\n\$standard\n$standard\n\$tagged\n$tagged\n"
+			}
+		}
+	}
+}
+
+sub Untag {
+	my ($pre, $txt) = ($_[0], $_[1]);
+	#say LOG "\$pre: $pre\n\$txt: $txt";
+	$txt =~ s/[\t\n]/ /g;
+	$txt =~ s/(√|<[^<>]*>)//g;
+	$txt = $pre . " " . $txt;
+	$txt =~ s/ {2,}/ /g;
+	$txt =~ s/ +$//;
+	return $txt;
+}
+
+sub GetBooksToCheck {
+	while (<DATA>) {
+		chomp;
+		unless (/^#/) {
+			if (/([^\t]*)\t([^\t]*)/) {
+				my ($file, $book) = ($1, $2);
+				say "|$file|";
+				push @filesToRun, "$file";
+				$fullName{$file} = $book;
+			}
+		}
+	}
+}
+
+
+close LOG;
+
+say "\nDone.";
+# =====
+__DATA__
+#41-MAT	Matthew
+#42-MRK	Mark
+#43-LUK	Luke
+#44-JHN	John
+#45-ACT	Acts
+#46-ROM	Romans
+#47-1CO	1 Corinthians
+#48-2CO	2 Corinthians
+#49-GAL	Galatians
+#50-EPH	Ephesians
+#51-PHP	Philippians
+#52-COL	Colossians
+#53-1TH	1 Thessalonians
+#54-2TH	2 Thessalonians
+#55-1TI	1 Timothy
+#56-2TI	2 Timothy
+57-TIT	Titus
+#58-PHM	Philemon
+#59-HEB	Hebrews
+#60-JAS	James
+#61-1PE	1 Peter
+#62-2PE	2 Peter
+#63-1JN	1 John
+#64-2JN	2 John
+#65-3JN	3 John
+#66-JUD	Jude
+#67-REV	Revelation
--- a/Construct_tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl
+++ b/Construct_tagged_ULB_XML_files_from_unified_ULB_XML_and_tWs_and_OGNT.pl
@ -0,0 +1,410 @@
+# Takes current tW entries and populates tagged OGNT XML
+# 
+# This is the current best version
+#  Requires ULB that includes USFMs.
+
+
+use 5.12.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+use utf8;
+#use open IN => ":utf8", OUT => ":utf8";
+use open IO => ":utf8";
+$" = "\n";
+
+mkdir "Logs";
+open(LOG, ">Logs/tW_pairs.txt") or die "$!";
+my $ULBfile = "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml";
+my $topDirOGNT = "/Users/Henry/Documents/WACS/OGNT";
+#my $topDirOGNT = "/Users/Henry/Documents/WACS/en_ulb_tagged/Tag_test";
+my $topDirtW = "/Users/Henry/Documents/WACS/en_tw/bible";
+my ($outDir, $outFile) = ("/Users/Henry/Documents/WACS/en_ulb_tagged/Auto-tagged", "");
+my ($ULBText, $workText, $language);
+my ($file);
+my (%ULBtextThisVerse, %ULBpreTextThisVerse, %SNsThisVerse, %entriesThisSN, %longName);
+
+my @OGNTfilesToRun = ();
+#my $filePattern = '\.xml' ;
+my $filePattern = '52-COL\.xml' ;
+find( sub { push @OGNTfilesToRun, $File::Find::name  if ( m/^(.*)$filePattern$/ ) }, $topDirOGNT) ;
+
+say LOG "\@OGNTfilesToRun:\n@OGNTfilesToRun\n";
+
+my @tWfilesToRun = ();
+$filePattern = '.md' ;
+find( sub { push @tWfilesToRun, $File::Find::name  if ( m/^(.*)$filePattern$/ ) }, $topDirtW) ;
+
+Read_tW_Files();
+
+close LOG;
+open(LOG, ">Logs/Log.txt") or die "$!";
+
+LongBookNames();
+Prepare_ULB_file();
+say LOG "Prepare_ULB_file done.\n\@OGNTfilesToRun:\n@OGNTfilesToRun\n";
+
+ProcessXML();
+#	put unused SN at end of verse
+
+close LOG;
+
+say "\nDone.";
+# =====
+sub Read_tW_Files {
+	foreach  $file ( @tWfilesToRun  ) {
+		say LOG $file;
+		my (@sns);
+		my $entries;
+		my $fileText = read_file("$file", binmode => 'utf8');
+		if ($fileText =~ /\* Strong's: ([^\n]*)\n/) {
+			my $sns = $1;
+			#say LOG "\t$sns";
+			@sns = split /, /, $sns;
+		}
+		if ($fileText =~ /Forms Found in the English ULB:\n\n([^\n]*)\n/) {
+			$entries = $1;
+			die "$fileText" if $entries eq "";			
+			#say LOG "\t\t$entries"
+		}
+		foreach my $sn (@sns) {
+			$entriesThisSN{$sn} .= $entries . ", ";
+			#say LOG "\t\t\t$sn: $entriesThisSN{$sn}"
+		}
+	}
+	foreach my $sn (sort keys %entriesThisSN) {
+		#say LOG "$sn: $entriesThisSN{$sn}";
+		my @entries = split /, /, $entriesThisSN{$sn};
+		@entries = reverse sort {  substr($a,0,1) <=> substr($b,0,1)
+			|| length($a) <=> length($b)
+			|| $a <=> $b }
+		@entries;
+		$entriesThisSN{$sn} = "";
+		foreach my $slice (@entries) {
+			$entriesThisSN{$sn} .= "$slice, "
+		}
+		$entriesThisSN{$sn} =~ s/, $//;
+		say LOG "$sn: $entriesThisSN{$sn}";
+	}
+
+}
+sub LongBookNames {
+	while (<DATA>) {
+		chomp;
+		if (/([^\t]*)\t([^\t]*)\t([^\t]*)/) {
+			$longName{$2} = $3
+		}
+	}
+}
+sub ProcessXML {
+	foreach my $file (@OGNTfilesToRun) {
+		my $greekText;
+		my $fileGist;
+		if ($file =~ /((..)....\.xml)/) {
+			($fileGist, $language) = ($1, $2);
+			if ($language > 40) {
+				$language = "G"
+			} else {$language = "H"}
+			
+		}
+		say LOG "<0>\t$file \t$fileGist";
+		open(OUT, ">$outDir/$fileGist") or die "$outDir/$fileGist: $!";
+		my ($pre, $gist, $post, $bk, $ch, $vs, $thisVerse, $staticText, $residueText, $matchedLines, $flag, $thisVerseForOutput,
+			$linesWithRelevantSNs, $linesNotMatched, $orderedOutputLines, $linesToSkip, $thisPreText);
+		open (my $thisFile, "<:utf8", "$file") or die "$file:\n$!";
+		my ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount);
+		while (my $thisLine = <$thisFile>) {
+			chomp $thisLine;
+			if ($thisLine =~ /<verse/) {say LOG "\n=========================="}
+			if ($thisLine =~ /<\/verse>/) {
+				say LOG "<0.1>\t$thisLine";
+				say LOG "<11>\n\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText";
+				
+				($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText);
+				say LOG "<14>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
+				my %orderedLine;
+				$matchedLines =~ s/\n{2,}/\n/gs;
+				say LOG "<15\tBefore sort of \$matchedLines:\n$matchedLines\n";
+				while ($matchedLines =~ /([^◊]*)◊(\d*)\n/g) {
+					$orderedLine{$2} = $1;
+					say LOG "<5>\t\$2: $2\t\$1: $1";
+				}
+				$matchedLines = "";
+				foreach my $line (sort {$a <=> $b} keys %orderedLine) {
+					say LOG "<5.5>\t\$line: $line\t\$orderedLine{$line}: $orderedLine{$line}";
+					$matchedLines .= "$orderedLine{$line}\n"
+				}
+				chomp $matchedLines;
+				say LOG "<16>\tAfter sort of \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";					
+				$residueText =~ s/<usfm>.*?<\/usfm>//g;
+				$residueText =~ s/(^q | q$)//g;
+				$residueText =~ s/ {3,}/  /g;
+				$residueText =~ s/^ +//;
+				$residueText =~ s/ +$/$1/;
+				$greekText =~ s/^ +//;
+				$greekText =~ s/ +$/$1/;
+				$staticText =~ s/^ +//;
+				$staticText =~ s/ +$/$1/;
+				my $internalUSFM;
+				$internalUSFM .= "\t\t\t\t\t$&\n" while ($staticText =~ /<usfm>.*?<\/usfm>/g);
+				$linesNotMatched =~ s/\n+$//;
+				$linesToSkip =~ s/\n+$//;
+				$matchedLines =~ s/^\n+//;
+				$internalUSFM =~ s/\n+$//;
+				say LOG "<17>\tAfter pruning \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";					
+				say OUT "\t\t\t\t\t<Greek>$greekText</Greek>";
+				say OUT "\t\t\t\t\t<preText>$thisPreText</preText>";
+				say OUT "\t\t\t\t\t<ULB>$staticText</ULB>";
+				say OUT "\t\t\t\t\t<residue>$residueText</residue>";
+				say OUT "$matchedLines" unless ($matchedLines eq "");
+				say OUT "$linesNotMatched" unless ($linesNotMatched eq "");
+				say OUT "$internalUSFM" unless ($internalUSFM eq "");
+				say OUT "$linesToSkip" if ($linesToSkip);
+				say OUT "$thisLine";
+				($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount) = ();
+				($thisVerseForOutput, $flag, $workText, $greekText, $linesNotMatched, $linesToSkip, $residueText, $orderedOutputLines, $linesWithRelevantSNs) = ();
+				($linesToSkip) = ("");
+			}
+			elsif ($thisLine =~ /<w /) {
+				say LOG "<0.2>\t$thisLine";
+				$originalLinesCount ++;
+				if ($thisLine =~ />([^\n<>]*)</) {
+					$greekText .= $1 . " "
+				}
+				$thisLine =~ s/(<w .*)>([^<]*)(<\/w>)/$1 text="$2">$3/;
+				if ($thisLine =~ /lemma="(\d+)"/) {
+					my $thisLemma = $language . $1;
+					if (exists $entriesThisSN{$thisLemma}) {
+						$rsnCount ++;
+						$linesWithRelevantSNs .= $thisLine . "\n";
+						say LOG "<0.2.1>\t\$thisLemma: $thisLemma; line pushed to \$linesWithRelevantSNs";
+					}
+					else {
+						$skipCount ++;
+						$thisLine =~ s/><\/w>/>√<\/w>/;
+						$linesToSkip .= "$thisLine\n";
+						#say LOG "<0.2.2>\t\$thisLemma: $thisLemma; line pushed to \@LinesToSkip";
+					}
+				}
+			}
+			elsif ($thisLine =~ /<verse osisID="(.*?)\.(.*?)\.(.*?)">/) {
+				say LOG "<0.3>\t$thisLine";
+				($bk, $ch, $vs) = ($1,$2,$3);
+				($thisVerse, $greekText) = ("$longName{$bk} $ch:$vs", "");
+				$staticText = $ULBtextThisVerse{$thisVerse};
+				$residueText = "q $staticText q";
+				$thisPreText = $ULBpreTextThisVerse{$thisVerse};
+				say OUT "\t\t\t\t<verse name=\"$thisVerse\">";
+				($flag) = (1);
+			}
+			else {say OUT $thisLine}
+		}
+	
+		close $thisFile;
+		close OUT;
+	}
+}
+sub ProcessRelevantSNs {
+	my ($relevantLines, $staticText, $residueText, $linesNotMatched) = (@_);
+	my ($matchedLines, $thisLine);
+	my @relevantLines = split /\n/, $relevantLines;
+	foreach my $line (@relevantLines) {
+		if ($line =~ /lemma="(\d+)"/) {
+			my $thisSN = $language . $1;
+			say LOG "<12>\t\$line: $line, \$thisSN: $thisSN, \$entriesThisSN{$thisSN}\n$entriesThisSN{$thisSN}";
+			($thisLine, $residueText, $linesNotMatched) = MatchAndPlace($line, $thisSN, $staticText, $residueText, $linesNotMatched);
+			$thisLine =~ s/[ \t]+$//;
+			$matchedLines .= $thisLine . "\n";
+			$matchedLines =~ s/\n{2,}$/\n/s;
+			say LOG "<13>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched+++"
+		}
+	}
+	return ($matchedLines, $residueText, $linesNotMatched);
+}
+sub MatchAndPlace {
+	my ($line, $sn, $staticText, $workText, $linesNotMatched) = @_;
+	#say LOG "<8>\t\$line: $line	 \$sn: $sn	 \$workText\n$workText";
+	my ($workEntry, $found, $matchedLines, $first, $second, $third, $firstLen, $secondLen, $thirdLen);
+	my @entries = split /, /, $entriesThisSN{$sn};
+	foreach my $entry (@entries) {
+		my $entryType;
+		if ($entry =~ /^(.*) \.\.\. (.*) \.\.\. (.*)$/) {
+			($first, $second, $third) = ($1, $2, $3);
+			($firstLen, $secondLen, $thirdLen) = (length $first, length $second, length $third);
+			$workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b" . ".*?" . "\\b" . $third;
+			say LOG "<1a>\t\$first: $first,	 \$second: $second,	 \$third: $third,	 \$firstLen: $firstLen,	 \$secondLen,: $secondLen,	 \$thirdLen: $thirdLen	\$entry: |$entry|\t\$workEntry: |$workEntry|";
+			$entryType = 1;
+		}
+		elsif ($entry =~ /^(.*) \.\.\. (.*)$/) {
+			($first, $second) = ($1, $2);
+			($firstLen, $secondLen) = (length $first, length $second);
+			$workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b";
+			say LOG "<2a>\t\$first: $first,	 \$second: $second,	 \$third: $third,	 \$firstLen: $firstLen,	 \$secondLen,: $secondLen,	 \$entry: |$entry|\t\$workEntry: |$workEntry|";
+			$entryType = 2;
+		}
+		else {$workEntry = $entry;}
+		
+		my $foundText;
+		#say LOG "<8.1>\t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
+		if ($workText =~ /\b$workEntry\b/p) {
+			say LOG "<8.1>Found: \t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
+				($foundText, $workText) = ($&, "${^PREMATCH}ı${^POSTMATCH}");
+				my ($place, $foundTextLength, $replacementSpaces) = (length ${^PREMATCH}, length $foundText, "");
+				$line =~ s/></>$entry</;
+				while (length $replacementSpaces < $foundTextLength) {$replacementSpaces .= " "}
+				
+				if ($entryType) {
+
+					say LOG "<8.2>\n\$workText,: $workText,	 \$matchedLines:\n$matchedLines	";
+					
+					($workText) = FixWorkText($line, $workText, $workEntry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen);
+					
+					say LOG "<8.3>\n\$workText:\n$workText\n\$matchedLines:\n$matchedLines";
+
+				}
+
+				else {$workText =~ s/ı/$replacementSpaces/;}
+				
+				$matchedLines .= "$line◊$place";
+
+				say LOG "<8.4>\tAfter found, new \$workText:\n$workText";
+				$found = 1;
+			}
+		else {
+			#say LOG "\$workEntry $workEntry not found"
+		}
+		if ($found) {
+			last
+		}
+	}
+	unless ($found) {
+		$line =~ s/></>?</;
+		$linesNotMatched .= "$line\n"
+	}
+	return ($matchedLines, $workText, $linesNotMatched)
+}
+sub FixWorkText {
+	my ($thisLine, $text, $entry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen) = @_;
+	my ($firstSpace, $secondSpace, $thirdSpace);
+	while (length $firstSpace < $firstLen) {$firstSpace .= " "}
+	while (length $secondSpace < $secondLen) {$secondSpace .= " "}
+	while (length $thirdSpace < $thirdLen) {$thirdSpace .= " "}
+	
+	say LOG "<9>\$text:\n$text\n\t\t\$entry: $entry	\$foundText: $foundText\t \$foundTextLength: $foundTextLength\t\$first: $first\t\$second: $second\t\$third: $third\n\$firstSpace: $firstSpace\t\$secondSpace: $secondSpace\t\$thirdSpace: $thirdSpace";
+	if ($third) {
+		if ($foundText =~ /$first(.*)$second(.*)$third/) {
+			my ($firstGap, $secondGap) = ($1, $2);
+			my $repText = "$firstSpace$firstGap$secondSpace$secondGap$thirdSpace";
+			say LOG "<9.1> \$repText: $repText";
+			$text =~ s/ı/$repText/;
+		}
+	}
+	else {
+		if ($foundText =~ /$first(.*)$second/) {
+			my ($firstGap) = ($1);
+			say LOG "<9.2>\t\$firstSpace: |$firstSpace|\t\$firstGap: |$firstGap|\t\$secondSpace: |$secondSpace|";
+			my $repText ="$firstSpace$firstGap$secondSpace";
+			say LOG "<9.3> \$repText: |$repText|";
+			$text =~ s/ı/$repText/;
+		}
+	}
+	return ($text)
+}
+sub Prepare_ULB_file {
+
+	my $thisVerse;
+	#$ULBText = read_file($ULBfile, binmode => 'utf8');
+
+	#while ($ULBText =~ /<verse name="(.*?)">\n<preText>(.*?)<\/preText>\n.*<text>(.*?)<\/text>.*<\/verse>/sg) {
+	#	($ULBtextThisVerse{$1}, $ULBpreTextThisVerse{$1}) = ($3, $2);
+	#}
+	#
+	
+	open (my $file, "<:utf8", "$ULBfile") or die "$ULBfile:\n$!";
+	
+		while (my $thisLine = <$file>) {
+			chomp $thisLine;
+			if ($thisLine =~ /verse name="(.*?)"/) {
+				$thisVerse = $1;
+				#say LOG "$thisVerse:\n$thisLine"
+			}
+			elsif ($thisLine =~ /<preText>(.*?)<\/preText>/) {
+				$ULBpreTextThisVerse{$thisVerse} = $1;
+				#say LOG "$thisVerse:\n$ULBpreTextThisVerse{$thisVerse}"
+			}
+			elsif ($thisLine =~ /<text>(.*?)<\/text>/) {
+				$ULBtextThisVerse{$thisVerse} = $1;
+				#say LOG "$thisVerse:\n$ULBtextThisVerse{$thisVerse}"
+			}
+		}
+	
+	close $file;
+}
+
+__DATA__
+01	gen	Genesis
+02	exo	Exodus
+03	lev	Leviticus
+04	num	Numbers
+05	deu	Deuteronomy
+06	jos	Joshua
+07	jdg	Judges
+08	rut	Ruth
+09	1sa	1 Samuel
+10	2sa	2 Samuel
+11	1ki	1 Kings
+12	2ki	2 Kings
+13	1ch	1 Chronicles
+14	2ch	2 Chronicles
+15	ezr	Ezra
+16	neh	Nehemiah
+17	est	Esther
+18	job	Job
+19	psa	Psalms
+20	pro	Proverbs
+21	ecc	Ecclesiastes
+22	sng	Song of Solomon
+23	isa	Isaiah
+24	jer	Jeremiah
+25	lam	Lamentations
+26	ezk	Ezekiel
+27	dan	Daniel
+28	hos	Hosea
+29	jol	Joel
+30	amo	Amos
+31	oba	Obadiah
+32	jon	Jonah
+33	mic	Micah
+34	nam	Nahum
+35	hab	Habakkuk
+36	zep	Zephaniah
+37	hag	Haggai
+38	zec	Zechariah
+39	mal	Malachi
+41	mat	Matthew
+42	mrk	Mark
+43	luk	Luke
+44	jhn	John
+45	act	Acts
+46	rom	Romans
+47	1co	1 Corinthians
+48	2co	2 Corinthians
+49	gal	Galatians
+50	eph	Ephesians
+51	php	Philippians
+52	col	Colossians
+53	1th	1 Thessalonians
+54	2th	2 Thessalonians
+55	1ti	1 Timothy
+56	2ti	2 Timothy
+57	tit	Titus
+58	phm	Philemon
+59	heb	Hebrews
+60	jas	James
+61	1pe	1 Peter
+62	2pe	2 Peter
+63	1jn	1 John
+64	2jn	2 John
+65	3jn	3 John
+66	jud	Jude
+67	rev	Revelation
--- a/Find_first_occurrence_of_proper_nouns.pl
+++ b/Find_first_occurrence_of_proper_nouns.pl
@ -0,0 +1,59 @@
+use 5.12.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+
+my %location;
+
+open LOG, ">log/log.log" or die;
+
+open(IN, "/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt") or die "$!";
+
+say "Reading ULB";
+
+while (<IN>) {
+	#print LOG "$_";
+	chomp;
+	while (s/^([^\n\t]*)\t([^\n]*?)([A-Z][a-z]+(-[A-Z][a-z]+)?)/$1\t$2/) {
+	#	say LOG $3;
+		unless (exists $location{$3}) {$location{$3} = $1}
+	}
+	
+}
+
+close IN;
+
+say "Outputting hash";
+
+open(OUT, ">out/results.txt") or die "$!";
+
+foreach my $word (sort keys %location) {
+	say OUT "$word, $location{$word}";
+}
+
+close OUT;
+
+say "Deleting common words";
+
+my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
+
+foreach my $word (sort keys %location) {
+	my $temp = lc $word;
+	#say LOG $word . "\t" . $temp;
+	if ($fileText =~ /\b$temp\b/) {
+		delete $location{$word}
+	}
+}
+
+say "Outputting final product";
+
+open(OUT, ">out/results.txt") or die "$!";
+
+foreach my $word (sort keys %location) {
+	say OUT "$word, $location{$word}";
+}
+
+close OUT;
+
+
+close LOG;
--- a/Find_first_occurrence_of_tWs.pl
+++ b/Find_first_occurrence_of_tWs.pl
@ -0,0 +1,365 @@
+use 5.12.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+
+my %value;
+
+open LOG, ">log/log.log" or die;
+
+while (<DATA>) {
+	chomp;
+	if (/^(.*)$/) {
+		$value{$1} = $1
+	}
+}
+
+my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
+
+$fileText = "\n" . $fileText;
+
+foreach my $thisWord (sort keys %value) {
+	print $thisWord . "\n";
+	if ($fileText =~ /\n([^\n\t]*)\t[^\n]*$thisWord\b/) {
+		say LOG $thisWord . ", " . $1;
+	}
+	
+}
+
+close LOG;
+
+__DATA__
+Aaron
+Abel
+Abiathar
+Abijah
+Abimelek
+Abner
+Abraham
+Absalom
+Adam
+Adonijah
+Ahab
+Ahaz
+Ahaziah
+Ahijah
+Ai
+Amalek
+Amaziah
+Ammon
+Amnon
+Amorite
+Amos
+Amoz
+Andrew
+Annas
+Antioch
+Apollos
+Aquila
+Arabah
+Arabia
+Aram
+Ararat
+Artaxerxes
+Asa
+Asaph
+Ashdod
+Asher
+Asherah
+Ashkelon
+Asia
+Assyria
+Athaliah
+Azariah
+Baal
+Baasha
+Babel
+Babylon
+Balaam
+Barabbas
+Barnabas
+Bartholomew
+Baruch
+Bashan
+Bathsheba
+Beelzebul
+Beersheba
+Benaiah
+Benjamin
+Berea
+Beth Shemesh
+Bethany
+Bethel
+Bethlehem
+Bethuel
+Boaz
+Caesar
+Caesarea
+Caiaphas
+Cain
+Caleb
+Cana
+Canaan
+Capernaum
+Carmel
+Chaldea
+Cilicia
+Colossae
+Corinth
+Cornelius
+Crete
+Cush
+Cyprus
+Cyrene
+Cyrus
+Damascus
+Dan
+Daniel
+Darius
+David
+Delilah
+Eden
+Edom
+Egypt
+Ekron
+Elam
+Eleazar
+Eliakim
+Elijah
+Elisha
+Elizabeth
+En Gedi
+Enoch
+Ephesus
+Ephraim
+Ephrath
+Esau
+Esther
+Ethiopia
+Euphrates River
+Eve
+Ezekiel
+Ezra
+Gabriel
+Gad
+Galatia
+Galilee
+Gath
+Gaza
+Gerar
+Geshur
+Gethsemane
+Gibeah
+Gibeon
+Gideon
+Gilead
+Gilgal
+Girgashites
+Golgotha
+Goliath
+Gomorrah
+Goshen
+Greece
+Greek
+Habakkuk
+Hagar
+Haggai
+Ham
+Hamath
+Hamor
+Hananiah
+Hannah
+Haran
+Hebron
+Hermon
+Herod
+Herodias
+Hezekiah
+Hilkiah
+Hittite
+Hivite
+Horeb
+Hosea
+Hoshea
+Iconium
+Isaac
+Isaiah
+Ishmael
+Issachar
+Jacob
+James 
+Japheth
+Jebus
+Jehoiachin
+Jehoiada
+Jehoiakim
+Jehoram
+Jehoshaphat
+Jehu
+Jephthah
+Jeremiah
+Jericho
+Jeroboam
+Jerusalem
+Jesse
+Jethro
+Jezebel
+Jezreel
+Joab
+Joash
+Job
+Joel
+John 
+John Mark
+Jonah
+Jonathan
+Joppa
+Joram
+Jordan River
+Joseph 
+Joshua
+Josiah
+Jotham
+Judah
+Judas.*Iscariot
+Judas son of James
+Judea
+Kadesh
+Kedar
+Kedesh
+Kerethites
+Kidron Valley
+Korah
+Laban
+Lamech
+Lazarus
+Leah
+Lebanon
+Levi
+Leviathan
+Lot
+Luke
+Lystra
+Maakah
+Macedonia
+Maker
+Malachi
+Manasseh
+Martha
+Mary
+Mary 
+Mary.*Magdalene
+Matthew
+Mede
+Melchizedek
+Memphis
+Meshech
+Mesopotamia
+Micah
+Michael
+Midian
+Miriam
+Mishael
+Mizpah
+Moab
+Molech
+Mordecai
+Moses
+Mount of Olives
+Naaman
+Nahor
+Nahum
+Naphtali
+Nathan
+Nazareth
+Nebuchadnezzar
+Negev
+Nehemiah
+Nile River
+Nineveh
+Noah
+Obadiah
+Omri
+Paddan Aram
+Paran
+Paul
+Peor
+Perizzite
+Persia
+Peter
+Pharaoh
+Philip
+Philippi
+Philistia
+Philistines
+Phinehas
+Phoenicia
+Pilate
+Pontus
+Potiphar
+Priscilla
+Rabbah
+Rachel
+Rahab
+Ramah
+Ramoth
+Rebekah
+Rehoboam
+Reuben
+Rimmon
+Rome
+Ruth
+Salt Sea
+Samaria
+Samson
+Samuel
+Sarah
+Saul
+Sea of Galilee
+Sea of Reeds
+Sennacherib
+Seth
+Sharon
+Sheba
+Shechem
+Shem
+Shiloh
+Shimei
+Shinar
+Sidon
+Silas
+Simeon
+Simon the Zealot
+Sinai
+Sodom
+Solomon
+Stephen
+Sukkoth
+Syria
+Tamar
+Tarshish
+Tarsus
+Terah
+Thessalonica
+Thomas
+Timothy
+Tirzah
+Titus
+Troas
+Tubal
+Tychicus
+Tyre
+Ur
+Uriah
+Uzziah
+Vashti
+Xerxes
+Zacchaeus
+Zadok
+Zebedee
+Zebulun
+Zechariah 
+Zedekiah
+Zephaniah
+Zerubbabel
+Zoar
--- a/Get_Strong_variants.pl
+++ b/Get_Strong_variants.pl
@ -0,0 +1,24 @@
+$pre = "https://biblehub.com/greek/";
+$var = "a, b, c, d, e, f, g, h";
+$post = ".htm";
+@array = split (/, /, $var);
+
+#$out = system "curl --fail https://biblehub.com/greek/2.htm";
+#print "\n\n\t\$out: $out";
+
+open OUT, ">out/results.txt";
+
+foreach $xx (1611 .. 1613) {
+	foreach $var (@array) {
+		$string = $pre . $xx . $var . $post;
+		$out = `curl $string`;
+		if ($out =~ /We're sorry, we were not able to find that passage./) {
+			last
+		} else {
+			print OUT "\$xx: $xx. \$string: $string.";
+		}
+	}
+	
+}
+
+close OUT;
--- a/Inventory_usfm_markers.pl
+++ b/Inventory_usfm_markers.pl
@ -0,0 +1,38 @@
+use 5.18.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+
+my $topDir = "/Users/Henry/Documents/WACS/en_ulb";
+
+my %found;
+my @filesToRun = ();
+my $filePattern = '*.usfm' ; 
+
+
+open LOG, ">/Users/Henry/Google Drive/WA/Scripts/out/log.log" or die;
+open OUT, ">/Users/Henry/Google Drive/WA/Scripts/out/output.txt" or die;
+
+find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
+
+foreach  my $file ( @filesToRun  ) 
+{
+    print "$file\n" ;
+	my $fileText = read_file("$file", binmode => 'utf8');
+	$fileText =~ s/\n/ /g;
+	while ($fileText =~ /(\\[^ ]*) /g) {
+		my $code = $1;
+		unless (exists $found{$code}) {
+			$found{$code} = $code
+		}
+	}
+} 	
+
+foreach my $code (sort keys %found) {
+	say OUT $code
+}
+
+close OUT;
+close LOG;
+
+print "\n\tDone."