work on tagged OGNT
This commit is contained in:
parent
182dfeb169
commit
044c3f5fae
|
@ -0,0 +1,63 @@
|
|||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
|
||||
open(LOG, ">Logs/Log.txt") or die "$!";
|
||||
open(OUT, ">Output/ULB.xml") or die "$!";
|
||||
say OUT "<xml>";
|
||||
|
||||
my ($topDir, $outDir) = ("/Users/Henry/Documents/WACS/en_ulb", "/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Output");
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '63-1JN\.usfm' ;
|
||||
my $file;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
ReadFiles();
|
||||
|
||||
say OUT "</xml>";
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
say "\nDone.";
|
||||
# =====
|
||||
sub ReadFiles {
|
||||
|
||||
foreach $file ( @filesToRun ) {
|
||||
say $file;
|
||||
my $fileText = read_file("$file", binmode => 'utf8');
|
||||
#say LOG $fileText;
|
||||
|
||||
#Delete \n
|
||||
my ($book, $chap, $vers);
|
||||
if ($fileText =~ /\\h ([^\n]*)/) {
|
||||
$book = $1
|
||||
}
|
||||
#say LOG $book;
|
||||
$fileText =~ s/\n/ /g;
|
||||
$fileText =~ s/ / /g;
|
||||
#say LOG $fileText;
|
||||
$fileText =~ s/^([^\n]*?)(\\s5)/\t<book name="$book">\n\t\t<heading>$1<\/heading>$2/;
|
||||
$fileText =~ s/\\v/√/g;
|
||||
$fileText =~ s/\\s5[^√]*?\\c (\d+)/\n$&/g;
|
||||
if ($fileText =~ s#\\s5 \\c (\d+)[^\n]*#\t\t<chapter name="$book $1">\n$&\n\t\t</chapter>#g) {$chap = $1}
|
||||
$fileText =~ s/(<chapter[^>]*>\n)([^\n]*?\\c \d+) /$1\t\t\t<preVerse name="$book $chap:0">$2<\/preVerse>\n/gs;
|
||||
$fileText =~ s/(\\s5[^\n√]*)√ (\d+) /\n\t\t\t<preVerse name="$book $chap:$2">$1\\v$2<\/preVerse>\n/g;
|
||||
say LOG $fileText;
|
||||
$fileText =~ s/\n([^\n√]*)/\n\t\t\t\t<preVerse>$1<\/preVerse>\n/g;
|
||||
#while ($fileText =~ s/(<preVerse name="([^:]*:)\d+">.*?</preVerse>\n)(\\p √ (\d+)) /$1<>/) { }
|
||||
|
||||
$fileText =~ s/√/\\v/g;
|
||||
#$fileText =~ s/(\\s5.*?\\v \d+ )/\t\t\t<preVerse>$1<\/preVerse>\n/g;
|
||||
#Capture heading and text
|
||||
#Capture chapters
|
||||
#Capture verses
|
||||
say OUT $fileText;
|
||||
say OUT "\t</book>";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
# second try, reading file line by line
|
||||
|
||||
use 5.18.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
use Cwd ;
|
||||
use utf8;
|
||||
#use open IN => ":utf8", OUT => ":utf8";
|
||||
use open IO => ":utf8";
|
||||
|
||||
open(LOG, ">Logs/Log.txt") or die "$!";
|
||||
open(OUT, ">Output/ULB.xml") or die "$!";
|
||||
say OUT "<xml>";
|
||||
|
||||
my ($topDir, $outDir) = ("/Users/Henry/Documents/WACS/en_ulb", "/Users/Henry/Documents/WACS/Tips_and_Hacks/Tagged_OGNT/Output");
|
||||
|
||||
my @filesToRun = ();
|
||||
my $filePattern = '63-1JN\.usfm' ;
|
||||
my $file;
|
||||
find( sub { push @filesToRun, $File::Find::name if ( m/^(.*)$filePattern$/ ) }, $topDir) ;
|
||||
|
||||
ReadFiles();
|
||||
|
||||
say OUT "</xml>";
|
||||
close OUT;
|
||||
close LOG;
|
||||
|
||||
say "\nDone.";
|
||||
# =====
|
||||
sub ReadFiles {
|
||||
|
||||
foreach $file ( @filesToRun ) {
|
||||
say $file;
|
||||
|
||||
open(IN, $file) or die "$!";
|
||||
|
||||
my ($heading, $book, $chap, $vers, $flag);
|
||||
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
say LOG $_;
|
||||
if ($flag) {
|
||||
die
|
||||
}
|
||||
elsif (/^\\(ide?|h|toc\d|mt|cl)/) {
|
||||
$heading .= "$_ ";
|
||||
}
|
||||
else {
|
||||
$flag = 1;
|
||||
say OUT "\t<heading>$heading</heading>"
|
||||
}
|
||||
#if ($fileText =~ /\\h ([^\n]*)/) {
|
||||
# $book = $1
|
||||
#}
|
||||
##say LOG $book;
|
||||
#$fileText =~ s/\n/ /g;
|
||||
#$fileText =~ s/ / /g;
|
||||
##say LOG $fileText;
|
||||
#$fileText =~ s/^([^\n]*?)(\\s5)/\t<book name="$book">\n\t\t<heading>$1<\/heading>$2/;
|
||||
#$fileText =~ s/\\v/√/g;
|
||||
#$fileText =~ s/\\s5[^√]*?\\c (\d+)/\n$&/g;
|
||||
#if ($fileText =~ s#\\s5 \\c (\d+)[^\n]*#\t\t<chapter name="$book $1">\n$&\n\t\t</chapter>#g) {$chap = $1}
|
||||
#$fileText =~ s/(<chapter[^>]*>\n)([^\n]*?\\c \d+) /$1\t\t\t<preVerse name="$book $chap:0">$2<\/preVerse>\n/gs;
|
||||
#$fileText =~ s/(\\s5[^\n√]*)√ (\d+) /\n\t\t\t<preVerse name="$book $chap:$2">$1\\v$2<\/preVerse>\n/g;
|
||||
#say LOG $fileText;
|
||||
#$fileText =~ s/\n([^\n√]*)/\n\t\t\t\t<preVerse>$1<\/preVerse>\n/g;
|
||||
##while ($fileText =~ s/(<preVerse name="([^:]*:)\d+">.*?</preVerse>\n)(\\p √ (\d+)) /$1<>/) { }
|
||||
#
|
||||
#$fileText =~ s/√/\\v/g;
|
||||
#$fileText =~ s/(\\s5.*?\\v \d+ )/\t\t\t<preVerse>$1<\/preVerse>\n/g;
|
||||
#Capture heading and text
|
||||
#Capture chapters
|
||||
#Capture verses
|
||||
}
|
||||
close IN;
|
||||
}
|
||||
|
||||
}
|
|
@ -13,9 +13,9 @@
|
|||
\fqa
|
||||
\fqa*
|
||||
\ft
|
||||
\m
|
||||
\m # continued paragraph from before (quote or poetry), no indent
|
||||
\ms # psalms section heading
|
||||
\nb # follows chapter line
|
||||
\nb # no break from previous paragraph; follows chapter line
|
||||
\p
|
||||
\pi # special formatting
|
||||
\q
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Takes current tW entries and populates tagged OGNT XML
|
||||
# This is the current best version
|
||||
# It takes care of all entries but doesn't account for USFM codes in ULB
|
||||
# Trying to get it to work with repeated instances of same word.
|
||||
# Requires ULB that includes USFMs.
|
||||
|
||||
use 5.12.0;
|
||||
use File::Slurp;
|
||||
use File::Find ;
|
||||
|
@ -38,7 +38,7 @@ close LOG;
|
|||
open(LOG, ">Logs/Log.txt") or die "$!";
|
||||
|
||||
LongBookNames();
|
||||
Read_ULB_File();
|
||||
Prepare_ULB_file();
|
||||
ProcessXML();
|
||||
# put unused SN at end of verse
|
||||
|
||||
|
@ -289,7 +289,7 @@ sub FixWorkText {
|
|||
}
|
||||
return ($text)
|
||||
}
|
||||
sub Read_ULB_File {
|
||||
sub Prepare_ULB_file {
|
||||
|
||||
$ULBText = read_file("/Users/Henry/Documents/WACS/Tips_and_Hacks/MAST_tW_PDF_Updater/FilesForUpdates/Temp/ULB_text.txt", binmode => 'utf8');
|
||||
|
||||
|
|
Loading…
Reference in New Issue