From work on PDF.

2020-06-16 17:35:59 -04:00 · 2020-06-16 17:35:59 -04:00 · 2a41961398
parent 779b351201
commit 2a41961398
2 changed files with 149 additions and 0 deletions
--- a/MAST_tW_PDF_Updater/FilesForUpdates/Buld_MAST_OGNT_from_csv.pl
+++ b/MAST_tW_PDF_Updater/FilesForUpdates/Buld_MAST_OGNT_from_csv.pl
@ -0,0 +1,104 @@
+# includes word order from Greek
+use 5.12.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ; 
+use utf8;
+#use open IN => ":utf8", OUT => ":utf8";
+use open IO => ":utf8";
+binmode STDOUT, ":encoding(UTF-8)";
+mkdir "OGNT";
+mkdir "Logs";
+
+open LOG, ">Logs/log.log";
+
+my (%bk);
+my ($last_bn, $last_ch, $last_vs, $bklc) = ("00", "00", "00");
+
+say "Reading data ...";
+while (<DATA>) {
+	chomp;
+	if (/^(\d\d)-(...)/) {
+		$bk{$1} = $2;
+	}
+}
+
+
+open IN, "/Users/Henry/Google Drive/WA/Scripts/Open_GNT/OpenGNT_version3_3.csv" or die "$!";
+#open IN, "OpenGNT_version3_3.csv" or die "$!";
+
+say "Reading input ...";
+
+while (<IN>) {
+	chomp;
+	Separate();
+}
+
+say OUT "			</verse>\n		</chapter>\n	</book>\n</xml>";
+
+say "Closing input and output files ...";
+
+close OUT;
+close IN;
+close LOG;
+
+say "Done.";
+
+sub Separate {
+	if (/([^\t]*)\t[^\t]*\t[^\t]*\t[^\t]*\t.\t[^\t]*\t〔(\d+)｜(\d+)｜(\d+)〕\t〔[^\｜]*｜[^\｜]*｜([^\｜]*)｜([^\｜]*)｜([^\｜]*)｜([^\｜]*)〕/) {
+		my ($OGNTSort, $bn, $ch, $vs, $word, $lexeme, $gram, $sn) = ($1, $2, $3, $4, $5, $6, $7, $8);
+		say LOG "$1, $2, $3, $4, $5, $6, $7";
+		$sn =~ s/[GH]//;
+		$bn = $bn + 1;
+		if ($bn ne $last_bn) {
+			my ($this_bk) = ($bk{$bn});
+			$bklc = lc $bk{$bn};
+			if (OUT-> opened()) {
+				say OUT "			</verse>\n		</chapter>\n	</book>\n</xml>";
+				close OUT;
+			}
+			open OUT, ">:utf8", "OGNT/$bn-$bk{$bn}.xml" or die "$! $bn-$bk{$bn}.xml";
+			say OUT "\n<xml>\n	<div type=\"book\" osisID=\"$bklc\">\n		<chapter osisID=\"$bklc.$ch\">\n			<verse osisID=\"$bklc.$ch.$vs\">";
+			($last_bn, $last_ch, $last_vs) = ($bn, $ch, $vs)
+		}
+		elsif ($ch ne $last_ch) {
+			say OUT "			</verse>\n		</chapter>\n		<chapter osisID=\"$bklc.$ch\">\n			<verse osisID=\"$bklc.$ch.$vs\">";
+			($last_ch, $last_vs) = ($ch, $vs)
+		}
+		elsif ($vs ne $last_vs) {
+			my ($this_bk, $bklc) = ($bk{$bn}, lc $bk{$bn});
+			say OUT "			</verse>\n		<verse osisID=\"$bklc.$ch.$vs\">";
+			$last_vs = $vs;
+		}
+	say OUT "\t\t\t\t<w OGNTsort=\"$OGNTSort\" ULBorder=\"\<##\>\"lemma=\"$sn\" morph=\"$gram\" lexeme=\"$lexeme\">$word</w>"
+	}
+}
+
+__DATA__
+41-MAT.xml
+42-MRK.xml
+43-LUK.xml
+44-JHN.xml
+45-ACT.xml
+46-ROM.xml
+47-1CO.xml
+48-2CO.xml
+49-GAL.xml
+50-EPH.xml
+51-PHP.xml
+52-COL.xml
+53-1TH.xml
+54-2TH.xml
+55-1TI.xml
+56-2TI.xml
+57-TIT.xml
+58-PHM.xml
+59-HEB.xml
+60-JAS.xml
+61-1PE.xml
+62-2PE.xml
+63-1JN.xml
+64-2JN.xml
+65-3JN.xml
+66-JUD.xml
+67-REV.xml
--- a/MAST_tW_PDF_Updater/FilesForUpdates/Restructure_names.pl
+++ b/MAST_tW_PDF_Updater/FilesForUpdates/Restructure_names.pl
@ -0,0 +1,45 @@
+use 5.12.0;
+use File::Slurp;
+use File::Find ; 
+use Cwd ;
+use utf8;
+#use open IN => ":utf8", OUT => ":utf8";
+use open IO => ":utf8";
+
+
+open(LOG, ">/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Logs/log.txt") or die "$!";
+
+my $topDir = "/Users/Henry/Documents/WACS/Restructure/bible/names";
+
+my @filesToRun = ();
+my $filePattern = '*.md' ; 
+find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
+
+foreach  my $file ( @filesToRun  )
+{
+	say LOG $file;
+	my $shortFile = $file;
+	$shortFile =~ s/^.*\/([^\/]*\.md)$/\/Users\/Henry\/Documents\/WACS\/Tips_and_Hacks\/MAST_tW_PDF_Updater\/FilesForUpdates\/Output\/names\/$1/;
+	my $fileText = read_file($file, binmode => 'utf8');
+	if ($fileText =~ /Forms Found in the English ULB/) { say LOG "\tForms Found in the English ULB"; }
+	else {
+		my ($nameLine, $mainName, $otherNames, $mainText);
+		if ($fileText =~ /^# ([^\n]*)\n(.*)$/s) {
+			($nameLine, $mainText) = ($1, $2);
+			say LOG "\$nameLine: $nameLine\n\$mainText:\n$mainText\n\n";
+			if ($nameLine =~ /^([^,]*), (.*)$/) {
+				($mainName, $otherNames) = ($1, $2);
+			} else {
+				$mainName = $nameLine
+			}
+			$fileText = "# $mainName\n\n$mainText\n\n## Forms Found in the English ULB:\n\n$nameLine";
+			$fileText =~ s/\n{3,}/\n\n/g;
+			open(OUT, ">$shortFile") or die "$!";
+			say OUT $fileText;
+			close OUT;
+		}
+	}
+	   
+} 
+
+say "Done."