From work on PDF.

This commit is contained in:
Henry Whitney 2020-06-16 17:35:59 -04:00
parent 779b351201
commit 2a41961398
2 changed files with 149 additions and 0 deletions

View File

@ -0,0 +1,104 @@
# includes word order from Greek
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
binmode STDOUT, ":encoding(UTF-8)";
mkdir "OGNT";
mkdir "Logs";
open LOG, ">Logs/log.log";
my (%bk);
my ($last_bn, $last_ch, $last_vs, $bklc) = ("00", "00", "00");
say "Reading data ...";
while (<DATA>) {
chomp;
if (/^(\d\d)-(...)/) {
$bk{$1} = $2;
}
}
open IN, "/Users/Henry/Google Drive/WA/Scripts/Open_GNT/OpenGNT_version3_3.csv" or die "$!";
#open IN, "OpenGNT_version3_3.csv" or die "$!";
say "Reading input ...";
while (<IN>) {
chomp;
Separate();
}
say OUT " </verse>\n </chapter>\n </book>\n</xml>";
say "Closing input and output files ...";
close OUT;
close IN;
close LOG;
say "Done.";
sub Separate {
if (/([^\t]*)\t[^\t]*\t[^\t]*\t[^\t]*\t.\t[^\t]*\t(\d+)(\d+)(\d+)\t[^\]*[^\]*([^\]*)([^\]*)([^\]*)([^\]*)/) {
my ($OGNTSort, $bn, $ch, $vs, $word, $lexeme, $gram, $sn) = ($1, $2, $3, $4, $5, $6, $7, $8);
say LOG "$1, $2, $3, $4, $5, $6, $7";
$sn =~ s/[GH]//;
$bn = $bn + 1;
if ($bn ne $last_bn) {
my ($this_bk) = ($bk{$bn});
$bklc = lc $bk{$bn};
if (OUT-> opened()) {
say OUT " </verse>\n </chapter>\n </book>\n</xml>";
close OUT;
}
open OUT, ">:utf8", "OGNT/$bn-$bk{$bn}.xml" or die "$! $bn-$bk{$bn}.xml";
say OUT "\n<xml>\n <div type=\"book\" osisID=\"$bklc\">\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
($last_bn, $last_ch, $last_vs) = ($bn, $ch, $vs)
}
elsif ($ch ne $last_ch) {
say OUT " </verse>\n </chapter>\n <chapter osisID=\"$bklc.$ch\">\n <verse osisID=\"$bklc.$ch.$vs\">";
($last_ch, $last_vs) = ($ch, $vs)
}
elsif ($vs ne $last_vs) {
my ($this_bk, $bklc) = ($bk{$bn}, lc $bk{$bn});
say OUT " </verse>\n <verse osisID=\"$bklc.$ch.$vs\">";
$last_vs = $vs;
}
say OUT "\t\t\t\t<w OGNTsort=\"$OGNTSort\" ULBorder=\"\<##\>\"lemma=\"$sn\" morph=\"$gram\" lexeme=\"$lexeme\">$word</w>"
}
}
__DATA__
41-MAT.xml
42-MRK.xml
43-LUK.xml
44-JHN.xml
45-ACT.xml
46-ROM.xml
47-1CO.xml
48-2CO.xml
49-GAL.xml
50-EPH.xml
51-PHP.xml
52-COL.xml
53-1TH.xml
54-2TH.xml
55-1TI.xml
56-2TI.xml
57-TIT.xml
58-PHM.xml
59-HEB.xml
60-JAS.xml
61-1PE.xml
62-2PE.xml
63-1JN.xml
64-2JN.xml
65-3JN.xml
66-JUD.xml
67-REV.xml

View File

@ -0,0 +1,45 @@
use 5.12.0;
use File::Slurp;
use File::Find ;
use Cwd ;
use utf8;
#use open IN => ":utf8", OUT => ":utf8";
use open IO => ":utf8";
open(LOG, ">/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Logs/log.txt") or die "$!";
my $topDir = "/Users/Henry/Documents/WACS/Restructure/bible/names";
my @filesToRun = ();
my $filePattern = '*.md' ;
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
foreach my $file ( @filesToRun )
{
say LOG $file;
my $shortFile = $file;
$shortFile =~ s/^.*\/([^\/]*\.md)$/\/Users\/Henry\/Documents\/WACS\/Tips_and_Hacks\/MAST_tW_PDF_Updater\/FilesForUpdates\/Output\/names\/$1/;
my $fileText = read_file($file, binmode => 'utf8');
if ($fileText =~ /Forms Found in the English ULB/) { say LOG "\tForms Found in the English ULB"; }
else {
my ($nameLine, $mainName, $otherNames, $mainText);
if ($fileText =~ /^# ([^\n]*)\n(.*)$/s) {
($nameLine, $mainText) = ($1, $2);
say LOG "\$nameLine: $nameLine\n\$mainText:\n$mainText\n\n";
if ($nameLine =~ /^([^,]*), (.*)$/) {
($mainName, $otherNames) = ($1, $2);
} else {
$mainName = $nameLine
}
$fileText = "# $mainName\n\n$mainText\n\n## Forms Found in the English ULB:\n\n$nameLine";
$fileText =~ s/\n{3,}/\n\n/g;
open(OUT, ">$shortFile") or die "$!";
say OUT $fileText;
close OUT;
}
}
}
say "Done."