Now needed for tagging
This commit is contained in:
parent
13b8c8c948
commit
91d789da65
|
@ -0,0 +1,180 @@
|
|||
# Builds easily searchable file from current OGNT and MAST-HB XML file
|
||||
# Takes verse at a time from slurped file
|
||||
# Useful for Mine routine building MAST PDF
|
||||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
open LOG, ">:utf8", "Logs/log.txt" or die;
|
||||
open OUT, ">:utf8", "Output/Original_languages.txt" or die;
|
||||
|
||||
my (@folders) = ("/Users/Henry/Documents/WACS/MAST_HB", "/Users/Henry/Documents/WACS/OGNT");
|
||||
my (%order, %long);
|
||||
my $outText;
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/^([^\t]*)\t([^\t]*)\t(.*)$/) {
|
||||
$order{$1} = $3;
|
||||
$long{$2} = $3;
|
||||
}
|
||||
}
|
||||
#foreach my $key (sort keys %long) {
|
||||
# say LOG $key . "\t" . $long{$key};
|
||||
#}
|
||||
|
||||
foreach my $folder (@folders) {
|
||||
say LOG "$folder";
|
||||
#system "cd $folder;xml val *.xml;echo 'Continue? (Control + C to quit, Enter to continue)';read name;";
|
||||
my ($topDir, $lang) = ($folder, "H");
|
||||
|
||||
if ($folder =~ /OGNT/) {$lang = "G"}
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '*.xml' ;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
@filesToRun = sort @filesToRun;
|
||||
foreach my $file ( @filesToRun ) {
|
||||
say LOG $file;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
my ($bk, $ch, $vs, $lemma, $word, $nbk, $nch, $nvs, $previous, $current, $interruption, $next, $verse, $thisBookText, $prevVsText, $holdText, $thisVsText, $nextVsText, $oldHold, $shortCur, $shortIntr);
|
||||
while ($fileText =~ /<verse osisID="((.*?)\.(\d+)\.(\d+))"(\n|.)*?<\/verse>/spg) {
|
||||
$verse = $&;
|
||||
say LOG "\$1: $1, \$2: $2, \$3: $3, \$4: $4";
|
||||
($shortCur, $bk, $ch, $vs) = ($1, $long{$2}, $3, $4);
|
||||
$previous = $current;
|
||||
$current = "$bk $ch:$vs";
|
||||
say LOG "<0>\t\$current: $current";
|
||||
my $verseText;
|
||||
|
||||
if ($verse =~ /<note>KJV:(([^\.]*)\.([^\.]*).([^<]*))<\/note>/p) { # Occurs only in OT
|
||||
say LOG "<1>\t$&";
|
||||
($shortIntr, $nbk, $nch, $nvs) = ($1, $long{$2}, $3, $4);
|
||||
$interruption = "$nbk $nch:$nvs";
|
||||
say LOG "<2>\t\$interruption: $interruption (of $current)";
|
||||
if ($verse =~ /<verse osisID="$shortCur">\n[^<\n]*<note>KJV:$shortIntr<\/note>/) { # Complete renumber of verse
|
||||
say LOG "<3>\t$&";
|
||||
$current = $interruption;
|
||||
$verseText = GetContent($verse);
|
||||
$verseText = "$current\t$oldHold$verseText";
|
||||
$oldHold = "";
|
||||
}
|
||||
elsif ($interruption ne $current && $verse =~ /<note>KJV:([^\.]*)\.([^\.]*).([^<]*)<\/note>/p) { # New verse begins here
|
||||
say LOG "<4>\t$&";
|
||||
($thisVsText, $nextVsText) = (${^PREMATCH}, ${^POSTMATCH});
|
||||
$thisVsText = GetContent($thisVsText);
|
||||
$nextVsText = GetContent($nextVsText);
|
||||
$outText .= "$oldHold\n$current\t$thisVsText ";
|
||||
$oldHold = "$nextVsText ";
|
||||
}
|
||||
elsif ($interruption eq $current && $verse =~ /<note>KJV:([^\.]*)\.([^\.]*).([^<]*)<\/note>/p) { # Previous verse continues here
|
||||
say LOG "<5>\t$&";
|
||||
($prevVsText, $thisVsText) = (${^PREMATCH}, ${^POSTMATCH});
|
||||
$prevVsText = GetContent($prevVsText);
|
||||
$thisVsText = GetContent($thisVsText);
|
||||
$verseText .= "$oldHold\n$current\t$thisVsText";
|
||||
$oldHold = "";
|
||||
}
|
||||
}
|
||||
else {
|
||||
# The whole verse should be processed in one piece
|
||||
#$verseText = GetContent($verse);
|
||||
#$verseText = "$current\$tverseText"
|
||||
}
|
||||
#$thisBookText .= "\n$verseText";
|
||||
#$oldHold = $holdText
|
||||
}
|
||||
#$thisBookText =~ s/</<$lang/g;
|
||||
#$outText .= "$thisBookText\n";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
say OUT $outText;
|
||||
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
print "\n\tDone.";
|
||||
|
||||
sub GetContent {
|
||||
my ($text, $returnText) = ($_[0], "");
|
||||
while ($text =~ /<w lemma="([^"]*)"[^>]*>([^<]*)<\/w>/) {
|
||||
my ($lemma, $OL) = ($1, $2);
|
||||
$lemma =~ s/[^\d"]*(\d+)[^\d"]*/$1/;
|
||||
$returnText .= "$OL <$lemma> "
|
||||
}
|
||||
return $returnText
|
||||
}
|
||||
|
||||
__DATA__
|
||||
01 gen Genesis
|
||||
02 exo Exodus
|
||||
03 lev Leviticus
|
||||
04 num Numbers
|
||||
05 deu Deuteronomy
|
||||
06 jos Joshua
|
||||
07 jdg Judges
|
||||
08 rut Ruth
|
||||
09 1sa 1 Samuel
|
||||
10 2sa 2 Samuel
|
||||
11 1ki 1 Kings
|
||||
12 2ki 2 Kings
|
||||
13 1ch 1 Chronicles
|
||||
14 2ch 2 Chronicles
|
||||
15 ezr Ezra
|
||||
16 neh Nehemiah
|
||||
17 est Esther
|
||||
18 job Job
|
||||
19 psa Psalms
|
||||
20 pro Proverbs
|
||||
21 ecc Ecclesiastes
|
||||
22 sng Song of Solomon
|
||||
23 isa Isaiah
|
||||
24 jer Jeremiah
|
||||
25 lam Lamentations
|
||||
26 ezk Ezekiel
|
||||
27 dan Daniel
|
||||
28 hos Hosea
|
||||
29 jol Joel
|
||||
30 amo Amos
|
||||
31 oba Obadiah
|
||||
32 jon Jonah
|
||||
33 mic Micah
|
||||
34 nam Nahum
|
||||
35 hab Habakkuk
|
||||
36 zep Zephaniah
|
||||
37 hag Haggai
|
||||
38 zec Zechariah
|
||||
39 mal Malachi
|
||||
41 mat Matthew
|
||||
42 mrk Mark
|
||||
43 luk Luke
|
||||
44 jhn John
|
||||
45 act Acts
|
||||
46 rom Romans
|
||||
47 1co 1 Corinthians
|
||||
48 2co 2 Corinthians
|
||||
49 gal Galatians
|
||||
50 eph Ephesians
|
||||
51 php Philippians
|
||||
52 col Colossians
|
||||
53 1th 1 Thessalonians
|
||||
54 2th 2 Thessalonians
|
||||
55 1ti 1 Timothy
|
||||
56 2ti 2 Timothy
|
||||
57 tit Titus
|
||||
58 phm Philemon
|
||||
59 heb Hebrews
|
||||
60 jas James
|
||||
61 1pe 1 Peter
|
||||
62 2pe 2 Peter
|
||||
63 1jn 1 John
|
||||
64 2jn 2 John
|
||||
65 3jn 3 John
|
||||
66 jud Jude
|
||||
67 rev Revelation
|
|
@ -0,0 +1,126 @@
|
|||
# Creates workable ULB.xml file that has all USFM markers in place.
|
||||
|
||||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
|
||||
open(LOG, ">Logs/Log.txt") or die "$!";
|
||||
open(OUT, ">/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml") or die "$!";
|
||||
say OUT "<xml>";
|
||||
|
||||
my ($topDir, $outDir) = ("/Users/Henry/Documents/WACS/en_ulb", "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml");
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '\.usfm' ;
|
||||
#my $filePattern = '67-REV\.usfm' ;
|
||||
my $file;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
@filesToRun = sort @filesToRun;
|
||||
|
||||
ReadFiles();
|
||||
|
||||
say OUT "</xml>";
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
say "\nDone.";
|
||||
# =====
|
||||
sub ReadFiles {
|
||||
|
||||
foreach $file ( @filesToRun ) {
|
||||
say $file;
|
||||
my @array;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
$fileText =~ s/[ \n]+$//;
|
||||
say LOG "|$fileText|";
|
||||
|
||||
#Delete \n
|
||||
my ($book, $chap, $vers, $chapStart);
|
||||
if ($fileText =~ /\\h ([^\n]*)/) {
|
||||
$book = $1
|
||||
}
|
||||
#say LOG $book;
|
||||
$fileText =~ s/\n/ /g;
|
||||
$fileText =~ s/ / /g;
|
||||
$fileText =~ s/\\s5/\n$&/g;
|
||||
$fileText =~ s/\\v/√/g;
|
||||
while ($fileText =~ s/(√[^√\n]*)(√)/$1\n$2/) {}
|
||||
$fileText =~ s/√/\\v/g;
|
||||
$fileText =~ s/(\\id[^\n]*)\n/\t\t<heading>$1<\/heading>\n/;
|
||||
$fileText =~ s/ +\n/\n/g;
|
||||
$fileText =~ s/(\\(q\d?|pi?|m|n?b))\n/\n$1 /g;
|
||||
#say LOG $fileText;
|
||||
@array = split /\n/, $fileText;
|
||||
$fileText = "";
|
||||
foreach my $line (@array) {
|
||||
chomp;
|
||||
if ($line =~ /<book name="(.*?)">/) {$book = $1;}
|
||||
if ($line =~ /\\c (\d+).* \\v (\d+)/) {
|
||||
($chap, $vers) = ($1, $2);
|
||||
$line = "\t\t<chapter name=\"$book $chap\">\n\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>";
|
||||
$line = "\t\t</chapter>\n$line" if $chapStart;
|
||||
$chapStart = 1;
|
||||
}
|
||||
elsif ($line =~ /\\v (\d+)/) {
|
||||
$vers = $1;
|
||||
$line = "\t\t\t<verse name=\"$book $chap:$vers\">$line</verse>"
|
||||
}
|
||||
#say LOG "===\n<AA>\n$line";
|
||||
$line =~ s/(<verse[^>]*>)(.*\\v \d+ )(.*)(<\/verse>)/$1\n\t\t\t\t<preText>$2<\/preText>\n\t\t\t\t<text>$3<\/text>\n\t\t\t$4/s;
|
||||
#say LOG "===\n<BB>\n$line";
|
||||
if ($line =~ /<text>.*<\/text>/p) {
|
||||
say LOG "<-0>\t$line";
|
||||
my ($pre, $match, $post) = (${^PREMATCH}, ${^MATCH}, ${^POSTMATCH});
|
||||
#say LOG "<-1>\t\$pre: $pre,\n\$match: $match,\n\$post: $post";
|
||||
$match = TagInternalUSFM ($match);
|
||||
$line = $pre . $match . $post;
|
||||
}
|
||||
|
||||
say LOG "---\n<CC>\n$line\n===";
|
||||
$line =~ s# +</#</#g;
|
||||
$fileText .= $line . "\n";
|
||||
}
|
||||
say OUT "\t<book name=\"$book\">\n$fileText\t\t</chapter>\n\t</book>";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sub TagInternalUSFM {
|
||||
my ($line, $placeNum) = ($_[0], 1);
|
||||
my %places;
|
||||
#say LOG "Tagging internal USFM in \$line $line.";
|
||||
while ($line =~ /(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/g) {
|
||||
#say LOG "<+1>\t$2";
|
||||
$line =~ s/(<text>.*)(\\f .*?\\f\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
|
||||
$places{$placeNum} = $2;
|
||||
$placeNum ++;
|
||||
}
|
||||
#say LOG "<+2>\t$line";
|
||||
while ($line =~ /(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/g) {
|
||||
#say LOG "<+3>\t$2";
|
||||
$line =~ s/(<text>.*)(\\qs .*?\\qs\*)(.*<\/text>)/$1<place number="$placeNum"\/>$3/;
|
||||
$places{$placeNum} = $2;
|
||||
$placeNum ++;
|
||||
}
|
||||
#say LOG "<+4>\t$line";
|
||||
while ($line =~ /(<text>.*)(\\([bm]|pi?|q\d?|s2))( .*<\/text>)/g) {
|
||||
#say LOG "<+5>\t$2";
|
||||
$line =~ s/(<text>.*)(\\([bm]|pi?|q\d?|s2))(.*<\/text>)/$1<place number="$placeNum"\/>$4/;
|
||||
$places{$placeNum} = $2;
|
||||
$placeNum ++;
|
||||
}
|
||||
#say LOG "<+6>\t$line";
|
||||
$line =~ s/ / /g;
|
||||
#say LOG "<+7>\t$line";
|
||||
foreach my $place (sort keys %places) {
|
||||
#say LOG "<+8>\tReplacing <place number=\"$place\"\/> with <usfm>$places{$place}<\/usfm> in\n$line.";
|
||||
unless ($line =~ s/<place number="$place"\/>/<usfm>$places{$place}<\/usfm>/) {die}
|
||||
}
|
||||
say LOG "<+9>\t$line";
|
||||
return $line;
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
# Checks ULB in ULB.xml against tagged ULB
|
||||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
|
||||
open(LOG, ">Logs/Log.txt") or die "$!";
|
||||
|
||||
my ($ULBxml, $taggedULBDir) = ("/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/ULB_xml/ULB.xml", "/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Manual_Tagging");
|
||||
my (@filesToRun) = ();
|
||||
my %fullName;
|
||||
my $filePattern = "\.xml" ;
|
||||
my $file;
|
||||
|
||||
my $xmlText = read_file("$ULBxml", binmode => 'utf8');
|
||||
|
||||
GetBooksToCheck();
|
||||
Compare();
|
||||
|
||||
sub Compare {
|
||||
foreach my $file (@filesToRun) {
|
||||
|
||||
say LOG "|$file|, |$taggedULBDir/$file.xml|";
|
||||
my $taggedText = read_file("$taggedULBDir/$file.xml", binmode => 'utf8');
|
||||
|
||||
GetGist($file, $taggedText);
|
||||
}
|
||||
}
|
||||
|
||||
sub GetGist {
|
||||
my ($fileName, $wholeTaggedText) = @_;
|
||||
my ($verseRef, $standard, $tagged);
|
||||
say LOG "|$fileName|, |$fullName{$fileName}|";
|
||||
while ($wholeTaggedText =~ /<verse name="($fullName{$fileName} \d+:\d+)">((.|\n)*?)<preText>(.*?)<\/preText>((.|\n)*?)\n\t+((<w ((.|\n)*?)\n)*)\t+<\/verse>/sg) {
|
||||
my ($preText, $gist) = ($4, $7);
|
||||
$verseRef = $1;
|
||||
say LOG "\$verseRef: |$verseRef|";
|
||||
if ($xmlText =~ /<verse name="$verseRef">\n\t+<preText>([^\n]*)<\/preText>\n\t+<text>([^\n]*)<\/text>\n\t+<\/verse>/s) {
|
||||
my ($standardPT, $standardT) = ($1, $2);
|
||||
($tagged) = Untag($preText, $gist);
|
||||
#say LOG $tagged;
|
||||
$standard = $standardPT . " " . $standardT;
|
||||
$standard =~ s/<[^<>]*>//g;
|
||||
$standard =~ s/ {2,}/ /g;
|
||||
$standard =~ s/ +$//;
|
||||
if ($standard ne $tagged) {
|
||||
say LOG "\nMISMATCH:\n\$standard\n$standard\n\$tagged\n$tagged\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub Untag {
|
||||
my ($pre, $txt) = ($_[0], $_[1]);
|
||||
#say LOG "\$pre: $pre\n\$txt: $txt";
|
||||
$txt =~ s/[\t\n]/ /g;
|
||||
$txt =~ s/(√|<[^<>]*>)//g;
|
||||
$txt = $pre . " " . $txt;
|
||||
$txt =~ s/ {2,}/ /g;
|
||||
$txt =~ s/ +$//;
|
||||
return $txt;
|
||||
}
|
||||
|
||||
sub GetBooksToCheck {
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
unless (/^#/) {
|
||||
if (/([^\t]*)\t([^\t]*)/) {
|
||||
my ($file, $book) = ($1, $2);
|
||||
say "|$file|";
|
||||
push @filesToRun, "$file";
|
||||
$fullName{$file} = $book;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
close LOG;
|
||||
|
||||
say "\nDone.";
|
||||
# =====
|
||||
__DATA__
|
||||
#41-MAT Matthew
|
||||
#42-MRK Mark
|
||||
#43-LUK Luke
|
||||
#44-JHN John
|
||||
#45-ACT Acts
|
||||
#46-ROM Romans
|
||||
#47-1CO 1 Corinthians
|
||||
#48-2CO 2 Corinthians
|
||||
#49-GAL Galatians
|
||||
#50-EPH Ephesians
|
||||
#51-PHP Philippians
|
||||
#52-COL Colossians
|
||||
#53-1TH 1 Thessalonians
|
||||
#54-2TH 2 Thessalonians
|
||||
#55-1TI 1 Timothy
|
||||
#56-2TI 2 Timothy
|
||||
57-TIT Titus
|
||||
#58-PHM Philemon
|
||||
#59-HEB Hebrews
|
||||
#60-JAS James
|
||||
#61-1PE 1 Peter
|
||||
#62-2PE 2 Peter
|
||||
#63-1JN 1 John
|
||||
#64-2JN 2 John
|
||||
#65-3JN 3 John
|
||||
#66-JUD Jude
|
||||
#67-REV Revelation
|
|
@ -0,0 +1,410 @@
|
|||
# Takes current tW entries and populates tagged OGNT XML
|
||||
#
|
||||
# This is the current best version
|
||||
# Requires ULB that includes USFMs.
|
||||
|
||||
|
||||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
$" = "\n";
|
||||
|
||||
mkdir "Logs";
|
||||
open(LOG, ">Logs/tW_pairs.txt") or die "$!";
|
||||
my $ULBfile = "/Users/Henry/Documents/WACS/en_ulb_tagged/ULB_xml/ULB.xml";
|
||||
my $topDirOGNT = "/Users/Henry/Documents/WACS/OGNT";
|
||||
#my $topDirOGNT = "/Users/Henry/Documents/WACS/en_ulb_tagged/Tag_test";
|
||||
my $topDirtW = "/Users/Henry/Documents/WACS/en_tw/bible";
|
||||
my ($outDir, $outFile) = ("/Users/Henry/Documents/WACS/en_ulb_tagged/Auto-tagged", "");
|
||||
my ($ULBText, $workText, $language);
|
||||
my ($file);
|
||||
my (%ULBtextThisVerse, %ULBpreTextThisVerse, %SNsThisVerse, %entriesThisSN, %longName);
|
||||
|
||||
my @OGNTfilesToRun = ();
|
||||
#my $filePattern = '\.xml' ;
|
||||
my $filePattern = '52-COL\.xml' ;
|
||||
find( sub { push @OGNTfilesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDirOGNT) ;
|
||||
|
||||
say LOG "\@OGNTfilesToRun:\n@OGNTfilesToRun\n";
|
||||
|
||||
my @tWfilesToRun = ();
|
||||
$filePattern = '.md' ;
|
||||
find( sub { push @tWfilesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDirtW) ;
|
||||
|
||||
Read_tW_Files();
|
||||
|
||||
close LOG;
|
||||
open(LOG, ">Logs/Log.txt") or die "$!";
|
||||
|
||||
LongBookNames();
|
||||
Prepare_ULB_file();
|
||||
say LOG "Prepare_ULB_file done.\n\@OGNTfilesToRun:\n@OGNTfilesToRun\n";
|
||||
|
||||
ProcessXML();
|
||||
# put unused SN at end of verse
|
||||
|
||||
close LOG;
|
||||
|
||||
say "\nDone.";
|
||||
# =====
|
||||
sub Read_tW_Files {
|
||||
foreach $file ( @tWfilesToRun ) {
|
||||
say LOG $file;
|
||||
my (@sns);
|
||||
my $entries;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
if ($fileText =~ /\* Strong's: ([^\n]*)\n/) {
|
||||
my $sns = $1;
|
||||
#say LOG "\t$sns";
|
||||
@sns = split /, /, $sns;
|
||||
}
|
||||
if ($fileText =~ /Forms Found in the English ULB:\n\n([^\n]*)\n/) {
|
||||
$entries = $1;
|
||||
die "$fileText" if $entries eq "";
|
||||
#say LOG "\t\t$entries"
|
||||
}
|
||||
foreach my $sn (@sns) {
|
||||
$entriesThisSN{$sn} .= $entries . ", ";
|
||||
#say LOG "\t\t\t$sn: $entriesThisSN{$sn}"
|
||||
}
|
||||
}
|
||||
foreach my $sn (sort keys %entriesThisSN) {
|
||||
#say LOG "$sn: $entriesThisSN{$sn}";
|
||||
my @entries = split /, /, $entriesThisSN{$sn};
|
||||
@entries = reverse sort { substr($a,0,1) <=> substr($b,0,1)
|
||||
|| length($a) <=> length($b)
|
||||
|| $a <=> $b }
|
||||
@entries;
|
||||
$entriesThisSN{$sn} = "";
|
||||
foreach my $slice (@entries) {
|
||||
$entriesThisSN{$sn} .= "$slice, "
|
||||
}
|
||||
$entriesThisSN{$sn} =~ s/, $//;
|
||||
say LOG "$sn: $entriesThisSN{$sn}";
|
||||
}
|
||||
|
||||
}
|
||||
sub LongBookNames {
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/([^\t]*)\t([^\t]*)\t([^\t]*)/) {
|
||||
$longName{$2} = $3
|
||||
}
|
||||
}
|
||||
}
|
||||
sub ProcessXML {
|
||||
foreach my $file (@OGNTfilesToRun) {
|
||||
my $greekText;
|
||||
my $fileGist;
|
||||
if ($file =~ /((..)....\.xml)/) {
|
||||
($fileGist, $language) = ($1, $2);
|
||||
if ($language > 40) {
|
||||
$language = "G"
|
||||
} else {$language = "H"}
|
||||
|
||||
}
|
||||
say LOG "<0>\t$file \t$fileGist";
|
||||
open(OUT, ">$outDir/$fileGist") or die "$outDir/$fileGist: $!";
|
||||
my ($pre, $gist, $post, $bk, $ch, $vs, $thisVerse, $staticText, $residueText, $matchedLines, $flag, $thisVerseForOutput,
|
||||
$linesWithRelevantSNs, $linesNotMatched, $orderedOutputLines, $linesToSkip, $thisPreText);
|
||||
open (my $thisFile, "<:utf8", "$file") or die "$file:\n$!";
|
||||
my ($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount);
|
||||
while (my $thisLine = <$thisFile>) {
|
||||
chomp $thisLine;
|
||||
if ($thisLine =~ /<verse/) {say LOG "\n=========================="}
|
||||
if ($thisLine =~ /<\/verse>/) {
|
||||
say LOG "<0.1>\t$thisLine";
|
||||
say LOG "<11>\n\$linesWithRelevantSNs\n$linesWithRelevantSNs\n\$linesToSkip\n$linesToSkip\$residueText\n$residueText";
|
||||
|
||||
($matchedLines, $residueText, $linesNotMatched) = ProcessRelevantSNs($linesWithRelevantSNs, $staticText, $residueText);
|
||||
say LOG "<14>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
|
||||
my %orderedLine;
|
||||
$matchedLines =~ s/\n{2,}/\n/gs;
|
||||
say LOG "<15\tBefore sort of \$matchedLines:\n$matchedLines\n";
|
||||
while ($matchedLines =~ /([^◊]*)◊(\d*)\n/g) {
|
||||
$orderedLine{$2} = $1;
|
||||
say LOG "<5>\t\$2: $2\t\$1: $1";
|
||||
}
|
||||
$matchedLines = "";
|
||||
foreach my $line (sort {$a <=> $b} keys %orderedLine) {
|
||||
say LOG "<5.5>\t\$line: $line\t\$orderedLine{$line}: $orderedLine{$line}";
|
||||
$matchedLines .= "$orderedLine{$line}\n"
|
||||
}
|
||||
chomp $matchedLines;
|
||||
say LOG "<16>\tAfter sort of \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
|
||||
$residueText =~ s/<usfm>.*?<\/usfm>//g;
|
||||
$residueText =~ s/(^q | q$)//g;
|
||||
$residueText =~ s/ {3,}/ /g;
|
||||
$residueText =~ s/^ +//;
|
||||
$residueText =~ s/ +$/$1/;
|
||||
$greekText =~ s/^ +//;
|
||||
$greekText =~ s/ +$/$1/;
|
||||
$staticText =~ s/^ +//;
|
||||
$staticText =~ s/ +$/$1/;
|
||||
my $internalUSFM;
|
||||
$internalUSFM .= "\t\t\t\t\t$&\n" while ($staticText =~ /<usfm>.*?<\/usfm>/g);
|
||||
$linesNotMatched =~ s/\n+$//;
|
||||
$linesToSkip =~ s/\n+$//;
|
||||
$matchedLines =~ s/^\n+//;
|
||||
$internalUSFM =~ s/\n+$//;
|
||||
say LOG "<17>\tAfter pruning \$matchedLines:\n$matchedLines\n\$linesNotMatched\n$linesNotMatched";
|
||||
say OUT "\t\t\t\t\t<Greek>$greekText</Greek>";
|
||||
say OUT "\t\t\t\t\t<preText>$thisPreText</preText>";
|
||||
say OUT "\t\t\t\t\t<ULB>$staticText</ULB>";
|
||||
say OUT "\t\t\t\t\t<residue>$residueText</residue>";
|
||||
say OUT "$matchedLines" unless ($matchedLines eq "");
|
||||
say OUT "$linesNotMatched" unless ($linesNotMatched eq "");
|
||||
say OUT "$internalUSFM" unless ($internalUSFM eq "");
|
||||
say OUT "$linesToSkip" if ($linesToSkip);
|
||||
say OUT "$thisLine";
|
||||
($originalLinesCount, $rsnCount, $skipCount, $noRSNCount, $outCount) = ();
|
||||
($thisVerseForOutput, $flag, $workText, $greekText, $linesNotMatched, $linesToSkip, $residueText, $orderedOutputLines, $linesWithRelevantSNs) = ();
|
||||
($linesToSkip) = ("");
|
||||
}
|
||||
elsif ($thisLine =~ /<w /) {
|
||||
say LOG "<0.2>\t$thisLine";
|
||||
$originalLinesCount ++;
|
||||
if ($thisLine =~ />([^\n<>]*)</) {
|
||||
$greekText .= $1 . " "
|
||||
}
|
||||
$thisLine =~ s/(<w .*)>([^<]*)(<\/w>)/$1 text="$2">$3/;
|
||||
if ($thisLine =~ /lemma="(\d+)"/) {
|
||||
my $thisLemma = $language . $1;
|
||||
if (exists $entriesThisSN{$thisLemma}) {
|
||||
$rsnCount ++;
|
||||
$linesWithRelevantSNs .= $thisLine . "\n";
|
||||
say LOG "<0.2.1>\t\$thisLemma: $thisLemma; line pushed to \$linesWithRelevantSNs";
|
||||
}
|
||||
else {
|
||||
$skipCount ++;
|
||||
$thisLine =~ s/><\/w>/>√<\/w>/;
|
||||
$linesToSkip .= "$thisLine\n";
|
||||
#say LOG "<0.2.2>\t\$thisLemma: $thisLemma; line pushed to \@LinesToSkip";
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif ($thisLine =~ /<verse osisID="(.*?)\.(.*?)\.(.*?)">/) {
|
||||
say LOG "<0.3>\t$thisLine";
|
||||
($bk, $ch, $vs) = ($1,$2,$3);
|
||||
($thisVerse, $greekText) = ("$longName{$bk} $ch:$vs", "");
|
||||
$staticText = $ULBtextThisVerse{$thisVerse};
|
||||
$residueText = "q $staticText q";
|
||||
$thisPreText = $ULBpreTextThisVerse{$thisVerse};
|
||||
say OUT "\t\t\t\t<verse name=\"$thisVerse\">";
|
||||
($flag) = (1);
|
||||
}
|
||||
else {say OUT $thisLine}
|
||||
}
|
||||
|
||||
close $thisFile;
|
||||
close OUT;
|
||||
}
|
||||
}
|
||||
sub ProcessRelevantSNs {
|
||||
my ($relevantLines, $staticText, $residueText, $linesNotMatched) = (@_);
|
||||
my ($matchedLines, $thisLine);
|
||||
my @relevantLines = split /\n/, $relevantLines;
|
||||
foreach my $line (@relevantLines) {
|
||||
if ($line =~ /lemma="(\d+)"/) {
|
||||
my $thisSN = $language . $1;
|
||||
say LOG "<12>\t\$line: $line, \$thisSN: $thisSN, \$entriesThisSN{$thisSN}\n$entriesThisSN{$thisSN}";
|
||||
($thisLine, $residueText, $linesNotMatched) = MatchAndPlace($line, $thisSN, $staticText, $residueText, $linesNotMatched);
|
||||
$thisLine =~ s/[ \t]+$//;
|
||||
$matchedLines .= $thisLine . "\n";
|
||||
$matchedLines =~ s/\n{2,}$/\n/s;
|
||||
say LOG "<13>\t\$matchedLines\n$matchedLines\n\$linesNotMatched\n$linesNotMatched+++"
|
||||
}
|
||||
}
|
||||
return ($matchedLines, $residueText, $linesNotMatched);
|
||||
}
|
||||
sub MatchAndPlace {
|
||||
my ($line, $sn, $staticText, $workText, $linesNotMatched) = @_;
|
||||
#say LOG "<8>\t\$line: $line \$sn: $sn \$workText\n$workText";
|
||||
my ($workEntry, $found, $matchedLines, $first, $second, $third, $firstLen, $secondLen, $thirdLen);
|
||||
my @entries = split /, /, $entriesThisSN{$sn};
|
||||
foreach my $entry (@entries) {
|
||||
my $entryType;
|
||||
if ($entry =~ /^(.*) \.\.\. (.*) \.\.\. (.*)$/) {
|
||||
($first, $second, $third) = ($1, $2, $3);
|
||||
($firstLen, $secondLen, $thirdLen) = (length $first, length $second, length $third);
|
||||
$workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b" . ".*?" . "\\b" . $third;
|
||||
say LOG "<1a>\t\$first: $first, \$second: $second, \$third: $third, \$firstLen: $firstLen, \$secondLen,: $secondLen, \$thirdLen: $thirdLen \$entry: |$entry|\t\$workEntry: |$workEntry|";
|
||||
$entryType = 1;
|
||||
}
|
||||
elsif ($entry =~ /^(.*) \.\.\. (.*)$/) {
|
||||
($first, $second) = ($1, $2);
|
||||
($firstLen, $secondLen) = (length $first, length $second);
|
||||
$workEntry = "\\b" . $first . "\\b" . ".*?" . "\\b" . $second . "\\b";
|
||||
say LOG "<2a>\t\$first: $first, \$second: $second, \$third: $third, \$firstLen: $firstLen, \$secondLen,: $secondLen, \$entry: |$entry|\t\$workEntry: |$workEntry|";
|
||||
$entryType = 2;
|
||||
}
|
||||
else {$workEntry = $entry;}
|
||||
|
||||
my $foundText;
|
||||
#say LOG "<8.1>\t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
|
||||
if ($workText =~ /\b$workEntry\b/p) {
|
||||
say LOG "<8.1>Found: \t\$entryType: $entryType\t\$entry: $entry\t\$workEntry: $workEntry";
|
||||
($foundText, $workText) = ($&, "${^PREMATCH}ı${^POSTMATCH}");
|
||||
my ($place, $foundTextLength, $replacementSpaces) = (length ${^PREMATCH}, length $foundText, "");
|
||||
$line =~ s/></>$entry</;
|
||||
while (length $replacementSpaces < $foundTextLength) {$replacementSpaces .= " "}
|
||||
|
||||
if ($entryType) {
|
||||
|
||||
say LOG "<8.2>\n\$workText,: $workText, \$matchedLines:\n$matchedLines ";
|
||||
|
||||
($workText) = FixWorkText($line, $workText, $workEntry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen);
|
||||
|
||||
say LOG "<8.3>\n\$workText:\n$workText\n\$matchedLines:\n$matchedLines";
|
||||
|
||||
}
|
||||
|
||||
else {$workText =~ s/ı/$replacementSpaces/;}
|
||||
|
||||
$matchedLines .= "$line◊$place";
|
||||
|
||||
say LOG "<8.4>\tAfter found, new \$workText:\n$workText";
|
||||
$found = 1;
|
||||
}
|
||||
else {
|
||||
#say LOG "\$workEntry $workEntry not found"
|
||||
}
|
||||
if ($found) {
|
||||
last
|
||||
}
|
||||
}
|
||||
unless ($found) {
|
||||
$line =~ s/></>?</;
|
||||
$linesNotMatched .= "$line\n"
|
||||
}
|
||||
return ($matchedLines, $workText, $linesNotMatched)
|
||||
}
|
||||
sub FixWorkText {
|
||||
my ($thisLine, $text, $entry, $foundText, $foundTextLength, $first, $firstLen, $second, $secondLen, $third, $thirdLen) = @_;
|
||||
my ($firstSpace, $secondSpace, $thirdSpace);
|
||||
while (length $firstSpace < $firstLen) {$firstSpace .= " "}
|
||||
while (length $secondSpace < $secondLen) {$secondSpace .= " "}
|
||||
while (length $thirdSpace < $thirdLen) {$thirdSpace .= " "}
|
||||
|
||||
say LOG "<9>\$text:\n$text\n\t\t\$entry: $entry \$foundText: $foundText\t \$foundTextLength: $foundTextLength\t\$first: $first\t\$second: $second\t\$third: $third\n\$firstSpace: $firstSpace\t\$secondSpace: $secondSpace\t\$thirdSpace: $thirdSpace";
|
||||
if ($third) {
|
||||
if ($foundText =~ /$first(.*)$second(.*)$third/) {
|
||||
my ($firstGap, $secondGap) = ($1, $2);
|
||||
my $repText = "$firstSpace$firstGap$secondSpace$secondGap$thirdSpace";
|
||||
say LOG "<9.1> \$repText: $repText";
|
||||
$text =~ s/ı/$repText/;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ($foundText =~ /$first(.*)$second/) {
|
||||
my ($firstGap) = ($1);
|
||||
say LOG "<9.2>\t\$firstSpace: |$firstSpace|\t\$firstGap: |$firstGap|\t\$secondSpace: |$secondSpace|";
|
||||
my $repText ="$firstSpace$firstGap$secondSpace";
|
||||
say LOG "<9.3> \$repText: |$repText|";
|
||||
$text =~ s/ı/$repText/;
|
||||
}
|
||||
}
|
||||
return ($text)
|
||||
}
|
||||
sub Prepare_ULB_file {
|
||||
|
||||
my $thisVerse;
|
||||
#$ULBText = read_file($ULBfile, binmode => 'utf8');
|
||||
|
||||
#while ($ULBText =~ /<verse name="(.*?)">\n<preText>(.*?)<\/preText>\n.*<text>(.*?)<\/text>.*<\/verse>/sg) {
|
||||
# ($ULBtextThisVerse{$1}, $ULBpreTextThisVerse{$1}) = ($3, $2);
|
||||
#}
|
||||
#
|
||||
|
||||
open (my $file, "<:utf8", "$ULBfile") or die "$ULBfile:\n$!";
|
||||
|
||||
while (my $thisLine = <$file>) {
|
||||
chomp $thisLine;
|
||||
if ($thisLine =~ /verse name="(.*?)"/) {
|
||||
$thisVerse = $1;
|
||||
#say LOG "$thisVerse:\n$thisLine"
|
||||
}
|
||||
elsif ($thisLine =~ /<preText>(.*?)<\/preText>/) {
|
||||
$ULBpreTextThisVerse{$thisVerse} = $1;
|
||||
#say LOG "$thisVerse:\n$ULBpreTextThisVerse{$thisVerse}"
|
||||
}
|
||||
elsif ($thisLine =~ /<text>(.*?)<\/text>/) {
|
||||
$ULBtextThisVerse{$thisVerse} = $1;
|
||||
#say LOG "$thisVerse:\n$ULBtextThisVerse{$thisVerse}"
|
||||
}
|
||||
}
|
||||
|
||||
close $file;
|
||||
}
|
||||
|
||||
__DATA__
|
||||
01 gen Genesis
|
||||
02 exo Exodus
|
||||
03 lev Leviticus
|
||||
04 num Numbers
|
||||
05 deu Deuteronomy
|
||||
06 jos Joshua
|
||||
07 jdg Judges
|
||||
08 rut Ruth
|
||||
09 1sa 1 Samuel
|
||||
10 2sa 2 Samuel
|
||||
11 1ki 1 Kings
|
||||
12 2ki 2 Kings
|
||||
13 1ch 1 Chronicles
|
||||
14 2ch 2 Chronicles
|
||||
15 ezr Ezra
|
||||
16 neh Nehemiah
|
||||
17 est Esther
|
||||
18 job Job
|
||||
19 psa Psalms
|
||||
20 pro Proverbs
|
||||
21 ecc Ecclesiastes
|
||||
22 sng Song of Solomon
|
||||
23 isa Isaiah
|
||||
24 jer Jeremiah
|
||||
25 lam Lamentations
|
||||
26 ezk Ezekiel
|
||||
27 dan Daniel
|
||||
28 hos Hosea
|
||||
29 jol Joel
|
||||
30 amo Amos
|
||||
31 oba Obadiah
|
||||
32 jon Jonah
|
||||
33 mic Micah
|
||||
34 nam Nahum
|
||||
35 hab Habakkuk
|
||||
36 zep Zephaniah
|
||||
37 hag Haggai
|
||||
38 zec Zechariah
|
||||
39 mal Malachi
|
||||
41 mat Matthew
|
||||
42 mrk Mark
|
||||
43 luk Luke
|
||||
44 jhn John
|
||||
45 act Acts
|
||||
46 rom Romans
|
||||
47 1co 1 Corinthians
|
||||
48 2co 2 Corinthians
|
||||
49 gal Galatians
|
||||
50 eph Ephesians
|
||||
51 php Philippians
|
||||
52 col Colossians
|
||||
53 1th 1 Thessalonians
|
||||
54 2th 2 Thessalonians
|
||||
55 1ti 1 Timothy
|
||||
56 2ti 2 Timothy
|
||||
57 tit Titus
|
||||
58 phm Philemon
|
||||
59 heb Hebrews
|
||||
60 jas James
|
||||
61 1pe 1 Peter
|
||||
62 2pe 2 Peter
|
||||
63 1jn 1 John
|
||||
64 2jn 2 John
|
||||
65 3jn 3 John
|
||||
66 jud Jude
|
||||
67 rev Revelation
|
|
@ -0,0 +1,59 @@
|
|||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
|
||||
my %location;
|
||||
|
||||
open LOG, ">log/log.log" or die;
|
||||
|
||||
open(IN, "/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt") or die "$!";
|
||||
|
||||
say "Reading ULB";
|
||||
|
||||
while (<IN>) {
|
||||
#print LOG "$_";
|
||||
chomp;
|
||||
while (s/^([^\n\t]*)\t([^\n]*?)([A-Z][a-z]+(-[A-Z][a-z]+)?)/$1\t$2/) {
|
||||
# say LOG $3;
|
||||
unless (exists $location{$3}) {$location{$3} = $1}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
close IN;
|
||||
|
||||
say "Outputting hash";
|
||||
|
||||
open(OUT, ">out/results.txt") or die "$!";
|
||||
|
||||
foreach my $word (sort keys %location) {
|
||||
say OUT "$word, $location{$word}";
|
||||
}
|
||||
|
||||
close OUT;
|
||||
|
||||
say "Deleting common words";
|
||||
|
||||
my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
|
||||
|
||||
foreach my $word (sort keys %location) {
|
||||
my $temp = lc $word;
|
||||
#say LOG $word . "\t" . $temp;
|
||||
if ($fileText =~ /\b$temp\b/) {
|
||||
delete $location{$word}
|
||||
}
|
||||
}
|
||||
|
||||
say "Outputting final product";
|
||||
|
||||
open(OUT, ">out/results.txt") or die "$!";
|
||||
|
||||
foreach my $word (sort keys %location) {
|
||||
say OUT "$word, $location{$word}";
|
||||
}
|
||||
|
||||
close OUT;
|
||||
|
||||
|
||||
close LOG;
|
|
@ -0,0 +1,365 @@
|
|||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
|
||||
my %value;
|
||||
|
||||
open LOG, ">log/log.log" or die;
|
||||
|
||||
while (<DATA>) {
|
||||
chomp;
|
||||
if (/^(.*)$/) {
|
||||
$value{$1} = $1
|
||||
}
|
||||
}
|
||||
|
||||
my $fileText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
|
||||
|
||||
$fileText = "\n" . $fileText;
|
||||
|
||||
foreach my $thisWord (sort keys %value) {
|
||||
print $thisWord . "\n";
|
||||
if ($fileText =~ /\n([^\n\t]*)\t[^\n]*$thisWord\b/) {
|
||||
say LOG $thisWord . ", " . $1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
close LOG;
|
||||
|
||||
__DATA__
|
||||
Aaron
|
||||
Abel
|
||||
Abiathar
|
||||
Abijah
|
||||
Abimelek
|
||||
Abner
|
||||
Abraham
|
||||
Absalom
|
||||
Adam
|
||||
Adonijah
|
||||
Ahab
|
||||
Ahaz
|
||||
Ahaziah
|
||||
Ahijah
|
||||
Ai
|
||||
Amalek
|
||||
Amaziah
|
||||
Ammon
|
||||
Amnon
|
||||
Amorite
|
||||
Amos
|
||||
Amoz
|
||||
Andrew
|
||||
Annas
|
||||
Antioch
|
||||
Apollos
|
||||
Aquila
|
||||
Arabah
|
||||
Arabia
|
||||
Aram
|
||||
Ararat
|
||||
Artaxerxes
|
||||
Asa
|
||||
Asaph
|
||||
Ashdod
|
||||
Asher
|
||||
Asherah
|
||||
Ashkelon
|
||||
Asia
|
||||
Assyria
|
||||
Athaliah
|
||||
Azariah
|
||||
Baal
|
||||
Baasha
|
||||
Babel
|
||||
Babylon
|
||||
Balaam
|
||||
Barabbas
|
||||
Barnabas
|
||||
Bartholomew
|
||||
Baruch
|
||||
Bashan
|
||||
Bathsheba
|
||||
Beelzebul
|
||||
Beersheba
|
||||
Benaiah
|
||||
Benjamin
|
||||
Berea
|
||||
Beth Shemesh
|
||||
Bethany
|
||||
Bethel
|
||||
Bethlehem
|
||||
Bethuel
|
||||
Boaz
|
||||
Caesar
|
||||
Caesarea
|
||||
Caiaphas
|
||||
Cain
|
||||
Caleb
|
||||
Cana
|
||||
Canaan
|
||||
Capernaum
|
||||
Carmel
|
||||
Chaldea
|
||||
Cilicia
|
||||
Colossae
|
||||
Corinth
|
||||
Cornelius
|
||||
Crete
|
||||
Cush
|
||||
Cyprus
|
||||
Cyrene
|
||||
Cyrus
|
||||
Damascus
|
||||
Dan
|
||||
Daniel
|
||||
Darius
|
||||
David
|
||||
Delilah
|
||||
Eden
|
||||
Edom
|
||||
Egypt
|
||||
Ekron
|
||||
Elam
|
||||
Eleazar
|
||||
Eliakim
|
||||
Elijah
|
||||
Elisha
|
||||
Elizabeth
|
||||
En Gedi
|
||||
Enoch
|
||||
Ephesus
|
||||
Ephraim
|
||||
Ephrath
|
||||
Esau
|
||||
Esther
|
||||
Ethiopia
|
||||
Euphrates River
|
||||
Eve
|
||||
Ezekiel
|
||||
Ezra
|
||||
Gabriel
|
||||
Gad
|
||||
Galatia
|
||||
Galilee
|
||||
Gath
|
||||
Gaza
|
||||
Gerar
|
||||
Geshur
|
||||
Gethsemane
|
||||
Gibeah
|
||||
Gibeon
|
||||
Gideon
|
||||
Gilead
|
||||
Gilgal
|
||||
Girgashites
|
||||
Golgotha
|
||||
Goliath
|
||||
Gomorrah
|
||||
Goshen
|
||||
Greece
|
||||
Greek
|
||||
Habakkuk
|
||||
Hagar
|
||||
Haggai
|
||||
Ham
|
||||
Hamath
|
||||
Hamor
|
||||
Hananiah
|
||||
Hannah
|
||||
Haran
|
||||
Hebron
|
||||
Hermon
|
||||
Herod
|
||||
Herodias
|
||||
Hezekiah
|
||||
Hilkiah
|
||||
Hittite
|
||||
Hivite
|
||||
Horeb
|
||||
Hosea
|
||||
Hoshea
|
||||
Iconium
|
||||
Isaac
|
||||
Isaiah
|
||||
Ishmael
|
||||
Issachar
|
||||
Jacob
|
||||
James
|
||||
Japheth
|
||||
Jebus
|
||||
Jehoiachin
|
||||
Jehoiada
|
||||
Jehoiakim
|
||||
Jehoram
|
||||
Jehoshaphat
|
||||
Jehu
|
||||
Jephthah
|
||||
Jeremiah
|
||||
Jericho
|
||||
Jeroboam
|
||||
Jerusalem
|
||||
Jesse
|
||||
Jethro
|
||||
Jezebel
|
||||
Jezreel
|
||||
Joab
|
||||
Joash
|
||||
Job
|
||||
Joel
|
||||
John
|
||||
John Mark
|
||||
Jonah
|
||||
Jonathan
|
||||
Joppa
|
||||
Joram
|
||||
Jordan River
|
||||
Joseph
|
||||
Joshua
|
||||
Josiah
|
||||
Jotham
|
||||
Judah
|
||||
Judas.*Iscariot
|
||||
Judas son of James
|
||||
Judea
|
||||
Kadesh
|
||||
Kedar
|
||||
Kedesh
|
||||
Kerethites
|
||||
Kidron Valley
|
||||
Korah
|
||||
Laban
|
||||
Lamech
|
||||
Lazarus
|
||||
Leah
|
||||
Lebanon
|
||||
Levi
|
||||
Leviathan
|
||||
Lot
|
||||
Luke
|
||||
Lystra
|
||||
Maakah
|
||||
Macedonia
|
||||
Maker
|
||||
Malachi
|
||||
Manasseh
|
||||
Martha
|
||||
Mary
|
||||
Mary
|
||||
Mary.*Magdalene
|
||||
Matthew
|
||||
Mede
|
||||
Melchizedek
|
||||
Memphis
|
||||
Meshech
|
||||
Mesopotamia
|
||||
Micah
|
||||
Michael
|
||||
Midian
|
||||
Miriam
|
||||
Mishael
|
||||
Mizpah
|
||||
Moab
|
||||
Molech
|
||||
Mordecai
|
||||
Moses
|
||||
Mount of Olives
|
||||
Naaman
|
||||
Nahor
|
||||
Nahum
|
||||
Naphtali
|
||||
Nathan
|
||||
Nazareth
|
||||
Nebuchadnezzar
|
||||
Negev
|
||||
Nehemiah
|
||||
Nile River
|
||||
Nineveh
|
||||
Noah
|
||||
Obadiah
|
||||
Omri
|
||||
Paddan Aram
|
||||
Paran
|
||||
Paul
|
||||
Peor
|
||||
Perizzite
|
||||
Persia
|
||||
Peter
|
||||
Pharaoh
|
||||
Philip
|
||||
Philippi
|
||||
Philistia
|
||||
Philistines
|
||||
Phinehas
|
||||
Phoenicia
|
||||
Pilate
|
||||
Pontus
|
||||
Potiphar
|
||||
Priscilla
|
||||
Rabbah
|
||||
Rachel
|
||||
Rahab
|
||||
Ramah
|
||||
Ramoth
|
||||
Rebekah
|
||||
Rehoboam
|
||||
Reuben
|
||||
Rimmon
|
||||
Rome
|
||||
Ruth
|
||||
Salt Sea
|
||||
Samaria
|
||||
Samson
|
||||
Samuel
|
||||
Sarah
|
||||
Saul
|
||||
Sea of Galilee
|
||||
Sea of Reeds
|
||||
Sennacherib
|
||||
Seth
|
||||
Sharon
|
||||
Sheba
|
||||
Shechem
|
||||
Shem
|
||||
Shiloh
|
||||
Shimei
|
||||
Shinar
|
||||
Sidon
|
||||
Silas
|
||||
Simeon
|
||||
Simon the Zealot
|
||||
Sinai
|
||||
Sodom
|
||||
Solomon
|
||||
Stephen
|
||||
Sukkoth
|
||||
Syria
|
||||
Tamar
|
||||
Tarshish
|
||||
Tarsus
|
||||
Terah
|
||||
Thessalonica
|
||||
Thomas
|
||||
Timothy
|
||||
Tirzah
|
||||
Titus
|
||||
Troas
|
||||
Tubal
|
||||
Tychicus
|
||||
Tyre
|
||||
Ur
|
||||
Uriah
|
||||
Uzziah
|
||||
Vashti
|
||||
Xerxes
|
||||
Zacchaeus
|
||||
Zadok
|
||||
Zebedee
|
||||
Zebulun
|
||||
Zechariah
|
||||
Zedekiah
|
||||
Zephaniah
|
||||
Zerubbabel
|
||||
Zoar
|
|
@ -0,0 +1,24 @@
|
|||
$pre = "https://biblehub.com/greek/";
|
||||
$var = "a, b, c, d, e, f, g, h";
|
||||
$post = ".htm";
|
||||
@array = split (/, /, $var);
|
||||
|
||||
#$out = system "curl --fail https://biblehub.com/greek/2.htm";
|
||||
#print "\n\n\t\$out: $out";
|
||||
|
||||
open OUT, ">out/results.txt";
|
||||
|
||||
foreach $xx (1611 .. 1613) {
|
||||
foreach $var (@array) {
|
||||
$string = $pre . $xx . $var . $post;
|
||||
$out = `curl $string`;
|
||||
if ($out =~ /We're sorry, we were not able to find that passage./) {
|
||||
last
|
||||
} else {
|
||||
print OUT "\$xx: $xx. \$string: $string.";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
close OUT;
|
|
@ -0,0 +1,38 @@
|
|||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
|
||||
my $topDir = "/Users/Henry/Documents/WACS/en_ulb";
|
||||
|
||||
my %found;
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '*.usfm' ;
|
||||
|
||||
|
||||
open LOG, ">/Users/Henry/Google Drive/WA/Scripts/out/log.log" or die;
|
||||
open OUT, ">/Users/Henry/Google Drive/WA/Scripts/out/output.txt" or die;
|
||||
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
foreach my $file ( @filesToRun )
|
||||
{
|
||||
print "$file\n" ;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
$fileText =~ s/\n/ /g;
|
||||
while ($fileText =~ /(\\[^ ]*) /g) {
|
||||
my $code = $1;
|
||||
unless (exists $found{$code}) {
|
||||
$found{$code} = $code
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $code (sort keys %found) {
|
||||
say OUT $code
|
||||
}
|
||||
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
print "\n\tDone."
|
Loading…
Reference in New Issue