update for MAST PDF
This commit is contained in:
parent
c94831a0b2
commit
cb1679ac8f
File diff suppressed because it is too large
Load Diff
|
@ -349,13 +349,13 @@ sub Finish {
|
|||
$tW_files .= "$key ";
|
||||
say LOG "\$key: $key\t\$tW_file{$key}: $tW_file{$key}"
|
||||
}
|
||||
#say "\nOpening .md files.";
|
||||
#if ($^O eq "darwin") {
|
||||
# #system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$strong`;
|
||||
# system "perl $Bin/get_strongs_gist.pl";
|
||||
# system `$textEditor $tW_files`;
|
||||
# system `$textEditor $exceptions_file`;
|
||||
#}
|
||||
say "\nOpening .md files\n\$tW_files: $tW_files";
|
||||
if ($^O eq "darwin") {
|
||||
#system `$browser https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?strongs=$strong`;
|
||||
system "perl $Bin/get_strongs_gist.pl";
|
||||
system `$textEditor $tW_files`;
|
||||
system `$textEditor $exceptions_file`;
|
||||
}
|
||||
#if ($^O eq "linux") {
|
||||
# say "curl $intrln_ref > $Bin/Temp/This_interlinear.html";
|
||||
# system "curl $intrln_ref > $Bin/Temp/This_interlinear.html";
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
|
||||
|
||||
open(LOG, ">/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Logs/log.txt") or die "$!";
|
||||
|
||||
my $topDir = "/Users/Henry/Documents/WACS/Restructure/bible/names";
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '*.md' ;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
foreach my $file ( @filesToRun )
|
||||
{
|
||||
say LOG $file;
|
||||
my $shortFile = $file;
|
||||
$shortFile =~ s/^.*\/([^\/]*\.md)$/\/Users\/Henry\/Documents\/WACS\/Tips_and_Hacks\/MAST_tW_PDF_Updater\/FilesForUpdates\/Output\/names\/$1/;
|
||||
my $fileText = read_file($file, binmode => 'utf8');
|
||||
if ($fileText =~ /Forms Found in the English ULB/) { say LOG "\tForms Found in the English ULB"; }
|
||||
else {
|
||||
my ($nameLine, $mainName, $otherNames, $mainText);
|
||||
if ($fileText =~ /^# ([^\n]*)\n(.*)$/s) {
|
||||
($nameLine, $mainText) = ($1, $2);
|
||||
say LOG "\$nameLine: $nameLine\n\$mainText:\n$mainText\n\n";
|
||||
if ($nameLine =~ /^([^,]*), (.*)$/) {
|
||||
($mainName, $otherNames) = ($1, $2);
|
||||
} else {
|
||||
$mainName = $nameLine
|
||||
}
|
||||
$fileText = "# $mainName\n\n$mainText\n\n## Forms Found in the English ULB:\n\n$nameLine";
|
||||
$fileText =~ s/\n{3,}/\n\n/g;
|
||||
open(OUT, ">$shortFile") or die "$!";
|
||||
say OUT $fileText;
|
||||
close OUT;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
say "Done."
|
|
@ -1,62 +0,0 @@
|
|||
# Adds Synonyms and Related Words section and
|
||||
# Forms Found in the English ULB section
|
||||
# to tW pages
|
||||
|
||||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
|
||||
open LOG, ">Logs/log.log";
|
||||
|
||||
my $topDir = "/Users/Henry/Documents/WACS/W_Q_Restructure/bible";
|
||||
my $topOutDir = "/Users/Henry/Documents/WACS/W_Q_Restructure_new/bible";
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '*.md' ;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
foreach my $file ( @filesToRun ) {
|
||||
say $file;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my $outText = Process($fileText);
|
||||
Output($file, $outText);
|
||||
}
|
||||
|
||||
close LOG;
|
||||
|
||||
say "Done.";
|
||||
|
||||
# =====================
|
||||
|
||||
sub Process {
|
||||
my $text = $_[0];
|
||||
my ($entries, $keyWord, $bulk, $forms);
|
||||
if ($text =~ /^# ([^\n]*)\n/) {
|
||||
$entries = $1;
|
||||
}
|
||||
if ($text =~ /^# (([^\n,]*)(\n|,))/) {
|
||||
$keyWord = $2
|
||||
}
|
||||
if ($text =~ /(## (Facts|Definition):.*)$/s) {
|
||||
$bulk = $1
|
||||
}
|
||||
my @forms = split /, /, $entries;
|
||||
@forms = sort @forms;
|
||||
$forms = join(', ', @forms);
|
||||
$text = "# $keyWord\n\n## Synonyms and Related Words:\n\n$forms\n\n$bulk\n\n## Forms Found in the English ULB\n\n$forms\n\n\n\n";
|
||||
while ($text =~ s/\n{3,}/\n\n/g) {}
|
||||
#$text =~ s/\n+$/\n/;
|
||||
return $text;
|
||||
}
|
||||
|
||||
sub Output {
|
||||
my ($OutFile, $text) = ($_[0], $_[1]);
|
||||
$OutFile =~ s/$topDir/$topOutDir/;
|
||||
open(OUT, ">$OutFile") or die $!;
|
||||
print OUT $text;
|
||||
close OUT
|
||||
}
|
|
@ -11,9 +11,9 @@ Repository directory: /Users/Henry/Documents/WACS
|
|||
|
||||
translationNotes path: en_tn
|
||||
Unlocked Literal Bible path: en_ulb
|
||||
# translationNotes path: gl_.*_tn
|
||||
translationWords path: gl_.*_bible.en_tw
|
||||
# Unlocked Literal Bible path: gl_.*_ulb
|
||||
# translationNotes path: en_tn
|
||||
translationWords path: en_tw/bible
|
||||
# Unlocked Literal Bible path: en_ulb
|
||||
Hebrew Bible XML directory: MAST_HB
|
||||
Greek Bible XML directory: OGNT
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ my (@fileList);
|
|||
# ==============================
|
||||
|
||||
chdir("$pwd");
|
||||
open LOG, ">:utf8", "Logs${d}Exc_log.log" or die "\$log: Logs${d}Exc_log.log: $!";
|
||||
open LOG, ">:utf8", "Logs${d}1_Data_and_inputs.txt" or die "\$log: Logs${d}1_Data_andinputs.txt: $!";
|
||||
open OUT, ">:utf8", $output or die "$!";
|
||||
open MISSING, ">$missing" or die "$!";
|
||||
|
||||
|
@ -79,12 +79,14 @@ GetUserDefaults();
|
|||
GetULBBooksToProcess();
|
||||
ReadExceptions();
|
||||
close LOG;
|
||||
open LOG, ">:utf8", "Logs${d}tW_pairs_log.txt" or die "Logs${d}tW_pairs_log.txt: $!";
|
||||
open LOG, ">:utf8", "Logs${d}2_tW_pairs_log.txt" or die "Logs${d}2_tW_pairs_log.txt: $!";
|
||||
PairtWEntriesTotWPageAndUniqSNs();
|
||||
close LOG;
|
||||
open LOG, ">:utf8", "Logs${d}tWs_from_MAST_log.txt" or die "tWs_from_MAST_log.txt: $!";
|
||||
open LOG, ">:utf8", "Logs${d}3_tWs_from_MAST_log.txt" or die "3_tWs_from_MAST_log.txt: $!";
|
||||
GetRelevantSNsForEachVerse();
|
||||
LinkULBtoCV();
|
||||
close LOG;
|
||||
open LOG, ">:utf8", "Logs${d}4_Process_log.txt" or die "4_Process_log.txt: $!";
|
||||
ProcessEachVerse();
|
||||
|
||||
say OUT $finalOutString;
|
||||
|
@ -123,6 +125,8 @@ sub GetUserDefaults {
|
|||
die "No path to repo found" if $repoPath eq "";
|
||||
|
||||
($topTwDir, $topOTSourceLangDir, $topNTSourceLangDir) = ("$repoPath${d}$twPath", "$repoPath${d}MAST_HB", "$repoPath${d}OGNT");
|
||||
|
||||
say LOG "\$topTwDir: $topTwDir\n\$topOTSourceLangDir: $topOTSourceLangDir\n\$topNTSourceLangDir: $topNTSourceLangDir ";
|
||||
|
||||
close $defaults;
|
||||
}
|
||||
|
@ -144,12 +148,13 @@ sub GetULBBooksToProcess {
|
|||
}
|
||||
|
||||
$sourceFile = "$topSourceLangDir${d}$this_bk.xml";
|
||||
say LOG $sourceFile;
|
||||
push @fileList, $sourceFile;
|
||||
}
|
||||
}
|
||||
|
||||
close $file;
|
||||
#say LOG "\@fileList:\n@fileList";
|
||||
say LOG "===\n\@fileList:\n@fileList\n===\n";
|
||||
}
|
||||
|
||||
sub ReadExceptions {
|
||||
|
@ -162,23 +167,24 @@ sub ReadExceptions {
|
|||
#say LOG $line;
|
||||
my $rf;
|
||||
if ($line =~ /^([^#\n][^\t\n]*)\t([GH]\d+)\t\|\|$/) {
|
||||
my ($oldNew) = ($2);
|
||||
my ($SNtoSkip) = ($2);
|
||||
$rf = $1;
|
||||
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
($deleteNum{$rf}) .= "$oldNew√";
|
||||
say LOG "<1>\t\$line: $line, \$rf: $rf, \$SNtoSkip: $SNtoSkip";
|
||||
($deleteNum{$rf}) .= "$SNtoSkip√";
|
||||
$specifiedText{$rf} = 1;
|
||||
#say LOG "\$specifiedText{$rf}: $specifiedText{$rf}";
|
||||
} elsif ($line =~ /^([^#\n][^\t\n]*)\t(\d+\t\d+)/) {
|
||||
} elsif ($line =~ /^([^#\n][^\t\n]*)\t([GH]\d+\t[GH]\d+)/) {
|
||||
my ($oldNew) = ($2);
|
||||
$rf = $1;
|
||||
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
say LOG "<2>\t\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
($adjust{$rf}) .= "$oldNew√";
|
||||
$specifiedText{$rf} = 1;
|
||||
}
|
||||
elsif ($line =~ /^([^#\n\t][^\t\n]*)\t(.\d+)\t([^\t\n]*)\t([^\t\n]*)$/) {
|
||||
my ($rf, $sn, $snippet, $page) = ($1, $2, $3, $4);
|
||||
#say LOG "\$line: $line, \$rf: $rf, \$oldNew: $oldNew";
|
||||
say LOG "<3>\t\$rf: $rf, \$sn: $sn, \$snippet: $snippet, \$page: $page ";
|
||||
$specifiedEntries{$rf} .= "$sn≈$snippet≈$page√";
|
||||
$relevantSNsInCV{$rf} =~ s/$sn√?//;
|
||||
$specifiedText{$rf} = 1;
|
||||
}
|
||||
|
||||
|
@ -206,7 +212,7 @@ sub PairtWEntriesTotWPageAndUniqSNs {
|
|||
if ($file =~ /\/([^\/]*)\/[^\/]*\.md/) {
|
||||
$dir{$shortFile} = $1
|
||||
}
|
||||
say LOG "<0>\$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
|
||||
say LOG "<4>\$shortFile: $shortFile\t\$dir{$shortFile}: $dir{$shortFile}";
|
||||
#say "|$shortFile|"; die;
|
||||
#if ($shortFile =~ /^(kt|names)/) {
|
||||
#my $fileText = read_file("$file", binmode => 'utf8');
|
||||
|
@ -297,12 +303,13 @@ sub GetRelevantSNsForEachVerse {
|
|||
if ($sourceFile =~ /(..)-...\.xml$/) {
|
||||
$hg = "H" if ($1 < 40);
|
||||
}
|
||||
#say LOG "opening \$sourceFile: $sourceFile";
|
||||
say LOG "opening \$sourceFile: $sourceFile";
|
||||
open IN, "$sourceFile" or die "$sourceFile can't be opened\n\n";
|
||||
my ($thisBook, $thisChap, $thisVers, $thisCV);
|
||||
my (@pages);
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
say LOG "<\@>\t$_";
|
||||
if (/<verse osisID="([^\.]*).(\d+).(\d+)">/) {
|
||||
#say LOG "$thisCV: \$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}";# Making sure previous verse is populated
|
||||
my ($bk, $ch, $vs) = ($1, $2, $3);
|
||||
|
@ -315,15 +322,20 @@ sub GetRelevantSNsForEachVerse {
|
|||
#say LOG "##\t$bk $ch:$vs, $thisCV";
|
||||
}
|
||||
else {
|
||||
s/(lemma=").*?(\d+).*?("\n)/$1$2$3/;
|
||||
while (/<w lemma="(\d+)"/g) {
|
||||
#say LOG $_;
|
||||
my ($thisSN) = ($hg . $1);
|
||||
#say LOG "\t\$thisSN: $thisSN, \$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}";
|
||||
if (exists $relevantSNs{$thisSN}) {
|
||||
$relevantSNsInCV{$thisCV} .= "$thisSN√" unless ($relevantSNsInCV{$thisCV} =~ /\b$thisSN\b/);
|
||||
if (/lemma="([^"]*)"/) {
|
||||
my $gist = $1;
|
||||
say LOG "<\@\@>\t\$gist: $gist";
|
||||
if ($gist =~ /\d+/) {
|
||||
s/(lemma=")[^\d]*?(\d+)[^\d]*?(")/$1$2$3/;
|
||||
while (/<w lemma="(\d+)"/g) {
|
||||
my ($thisSN) = ($hg . $1);
|
||||
say LOG "<\@\@\@>\t\$thisSN: $thisSN, \$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}";
|
||||
if (exists $relevantSNs{$thisSN}) {
|
||||
$relevantSNsInCV{$thisCV} .= "$thisSN√" unless ($relevantSNsInCV{$thisCV} =~ /\b$thisSN\b/);
|
||||
}
|
||||
say LOG ">\t\$thisSN: $thisSN, \$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}";
|
||||
}
|
||||
}
|
||||
#say LOG ">\t\$thisSN: $thisSN, \$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -348,16 +360,18 @@ sub ProcessEachVerse {
|
|||
foreach my $key (sort keys %orderRef) {
|
||||
# for each verse
|
||||
my ($thisCV) = ($orderRef{$key});
|
||||
say LOG "\n<1>\n$thisCV\t$ULBtext{$thisCV}\n\$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}\t\$deleteNum{$thisCV}: $deleteNum{$thisCV}, \$specifiedEntries{$thisCV}: $specifiedEntries{$thisCV}";
|
||||
say LOG "\n<5>\n$thisCV\t$ULBtext{$thisCV}\n\t\$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}\n\t\$deleteNum{$thisCV}: $deleteNum{$thisCV},\n\t\$specifiedEntries{$thisCV}: $specifiedEntries{$thisCV}";
|
||||
|
||||
($relevantSNsInCV{$thisCV}) = DeleteSpecifiedWords ($relevantSNsInCV{$thisCV}, $specifiedEntries{$thisCV});
|
||||
|
||||
($relevantSNsInCV{$thisCV}) = DeleteObviatedSNs($relevantSNsInCV{$thisCV}, $deleteNum{$thisCV});
|
||||
# delete obviated SNs
|
||||
|
||||
say LOG "\t\$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}";
|
||||
say LOG "<6>\t\$relevantSNsInCV{$thisCV}: $relevantSNsInCV{$thisCV}";
|
||||
my $processSequence = "$specifiedEntries{$thisCV}√$relevantSNsInCV{$thisCV}";
|
||||
$processSequence =~ s/√+/√/g;
|
||||
$processSequence =~ s/^√+//;
|
||||
say LOG "\t\t\$processSequence: $processSequence";
|
||||
say LOG "\t\$processSequence: $processSequence";
|
||||
|
||||
$finalOutString .= ExecuteProcessSequence($thisCV, $processSequence, $ULBtext{$thisCV});
|
||||
|
||||
|
@ -365,9 +379,22 @@ sub ProcessEachVerse {
|
|||
|
||||
}
|
||||
|
||||
sub DeleteSpecifiedWords {
|
||||
my ($sns, $toDelete) = @_;
|
||||
say LOG "<5.1>\t\$sns:\t\t$sns\n\t\t\$toDelete:\t$toDelete";
|
||||
my @delete = split /√/, $toDelete;
|
||||
foreach my $one (@delete) {
|
||||
say LOG "<5.1.1>\t\$one: $one";
|
||||
$one =~ s/^([^≈]*)≈.*$/$1/;
|
||||
say LOG "<5.1.2>\t\$one: $one";
|
||||
$sns =~ s/^(.*)$one(√)?(.*)$/$1$2$3/;
|
||||
}
|
||||
return $sns;
|
||||
}
|
||||
|
||||
sub DeleteObviatedSNs {
|
||||
my ($sns, $toDelete) = @_;
|
||||
my @sns = split /√/, $sns;
|
||||
say LOG "<5.2>\t\$sns:\t\t$sns\n\t\t\$toDelete:\t$toDelete";
|
||||
my @delete = split /√/, $toDelete;
|
||||
foreach my $one (@delete) {
|
||||
$sns =~ s/^(.*)$one(√)?(.*)$/$1$2$3/;
|
||||
|
@ -379,7 +406,7 @@ sub ExecuteProcessSequence {
|
|||
my ($tempText, $thisCVOutString, $position, $outputFormRef) = ($trueText, "");
|
||||
my (%snippetSequence);
|
||||
my (@SNsequence) = split /√/, $sequence;
|
||||
say LOG "$ref: @SNsequence";
|
||||
say LOG "$ref:\n@SNsequence";
|
||||
if ($ref =~ /^([^:]*) (\d+):(\d+)/) {
|
||||
$outputFormRef = "$1,$2,$3"
|
||||
}
|
||||
|
@ -388,13 +415,15 @@ sub ExecuteProcessSequence {
|
|||
# for each relevant SN in verse
|
||||
# for each tW entry
|
||||
# if specified tW
|
||||
say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}";
|
||||
say LOG "=====\n\$ref: $ref\t\$candidate: $candidate\t\$entriesThisSN{$candidate}: $entriesThisSN{$candidate}\n$tempText";
|
||||
my ($found, $sn, $ulbWord, $tWpage);
|
||||
if ($candidate =~ /([^≈]*)≈([^≈]*)≈([^≈]*)/) {
|
||||
# get position in true text to array
|
||||
# delete found text from temp text
|
||||
($sn, $ulbWord, $tWpage) = ($1,$2,$3);
|
||||
while ($ulbWord =~ s/^(.*) \.\.\. (.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)\\b(.*?)\\b($3)/) {}
|
||||
while ($ulbWord =~ s/^(.*) \.\.\. (.*)/($1)\\b(.*?)\\b($2)/) {}
|
||||
say LOG "<A>\t\$ulbWord: $ulbWord";
|
||||
if ($tempText =~ s/^(.*)\b$ulbWord\b(.*)$/$1$2/) {
|
||||
$position = length $1;
|
||||
$snippetSequence{$position} = "$ulbWord,$dir{$tWpage},$tWpage";
|
||||
|
@ -416,16 +445,18 @@ sub ExecuteProcessSequence {
|
|||
# get ULB snippet to verse match list
|
||||
# get position in true text to array
|
||||
# delete found text from temp text
|
||||
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3/i) {
|
||||
print LOG "<2>\t\$thisEntry |$thisEntry| is found in the first test\n___";
|
||||
$tempText =~ s/^(.*)\b$thisEntry\b(.*)$/$1$2/;
|
||||
if ($thisEntry =~ /\(\.\*\?\)/ && $tempText =~ /$thisEntry/i) {
|
||||
say LOG "<7>\t\$thisEntry |$thisEntry| is found in the first test";
|
||||
if ($tempText =~ s/^(.*)\b($thisEntry)\b(.*)$/$1$3$4/i) {
|
||||
say LOG "<7.1>\t\$1: $1 \$2: $2 \$3: $3 \$4: $4\t\$5: $5";
|
||||
}
|
||||
if ($trueText =~ /^(.*)\b($thisEntry)\b.*$/) {
|
||||
$position = length $1;
|
||||
}
|
||||
$snippetSequence{$position} = "$thisEntry,$dir{$pagesThisEntry{$thisEntry}},$pagesThisEntry{$thisEntry}";
|
||||
$found = 1;
|
||||
goto Breakout;
|
||||
}
|
||||
}
|
||||
elsif ($tempText =~ s/\b($thisEntry)[^\w']//i || $tempText =~ s/\b($thisEntry)["']//i || $tempText =~ s/["']($thisEntry)\b//i) {
|
||||
say LOG "\$thisEntry |$thisEntry| is found in the second test\n---
|
||||
";
|
||||
|
|
Loading…
Reference in New Issue