Add files via upload
This commit is contained in:
parent
2be48dbf32
commit
2f19fdbf49
|
@ -0,0 +1,37 @@
|
|||
# compile a NA-equivalent text from Berean Greek data (inclusive)
|
||||
# the text is compiled to become the main text of OpenGNT project
|
||||
|
||||
import re
|
||||
|
||||
inputFile = 'berean_tablesInclusive.csv'
|
||||
outputFile = 'OGNT_v3.csv'
|
||||
|
||||
# open database
|
||||
f = open(inputFile,'r')
|
||||
newData = f.read()
|
||||
f.close()
|
||||
|
||||
# clean up
|
||||
newData = re.sub('^([^\n\t]*?\t)[^\n\t]*?\t[^\n\t]*?\t[^\n\t]*?\t[^\n\t]*?\t[^\n\t]*?\t[^\n\t]*?\t([^\n\t]*?\t[^\n\t]*?\t[^\n\t]*?\t)[^\n\t]*?\t([^\n\t]*?\t[^\n\t]*?\t[^\n\t]*?)\t.*?$', r'\1\2\3', newData, flags=re.M)
|
||||
newData = re.sub('^[^\t\n]*?\t0\t0\t0\t.*?\n', '', newData, flags=re.M)
|
||||
|
||||
# take away some of TR variants; those variants are reserved in footnotes
|
||||
newData = re.sub('^.*?{[^{}]*?}.*?\n', '', newData, flags=re.M)
|
||||
# take away some of BYZ variants; those variants are reserved in footnotes
|
||||
newData = re.sub('^.*?⧼[^⧼⧽]*?⧽.*?\n', '', newData, flags=re.M)
|
||||
# take away some of WH variants; those variants are reserved in footnotes
|
||||
newData = re.sub('^.*?\([^\(\)]*?\).*?\n', '', newData, flags=re.M)
|
||||
# take away Nestle 1904 variants; those variants are reserved in footnotes
|
||||
newData = re.sub('^.*?〈[^〈〉]*?〉.*?\n', '', newData, flags=re.M)
|
||||
# take away some of SBLGNT variants; those variants are reserved in footnotes
|
||||
newData = re.sub('^.*?〈[^〈〉]*?〉.*?\n', '', newData, flags=re.M)
|
||||
|
||||
# 2 lines below replace words in main text with variants, use for mapping purposes ONLY
|
||||
#newData = re.sub('^([^\t\n]*?\t[^\t\n]*?\t[^\t\n]*?\t[^\t\n]*?\t)[^\t\n]*?\t([^\t\n]*?\t)([^\t\n+@$]+?)$', r'\1\3\t\2\3', newData, flags=re.M)
|
||||
#newData = re.sub('^([^\t\n]*?\t[^\t\n]*?\t[^\t\n]*?\t[^\t\n]*?\t[^\t\n]*?)\t.*?$', r'\1', newData, flags=re.M)
|
||||
#newData = re.sub('[*=]|', '', newData)
|
||||
|
||||
# close database
|
||||
f = open(outputFile,'w')
|
||||
f.write(newData)
|
||||
f.close()
|
|
@ -0,0 +1,42 @@
|
|||
import re
|
||||
|
||||
inputFile = 'berean_accented.csv'
|
||||
outputFile = 'berean_unaccented.csv'
|
||||
|
||||
f = open(inputFile,'r')
|
||||
newData = f.read()
|
||||
f.close()
|
||||
|
||||
# Greek unicode characters
|
||||
|
||||
newData = re.sub('[ἀἄᾄἂἆἁἅᾅἃάᾴὰᾶᾷᾳ]', 'α', newData)
|
||||
newData = re.sub('[ἈἌἎἉἍἋ]', 'Α', newData)
|
||||
|
||||
newData = re.sub('[ἐἔἑἕἓέὲ]', 'ε', newData)
|
||||
newData = re.sub('[ἘἜἙἝἛ]', 'Ε', newData)
|
||||
|
||||
newData = re.sub('[ἠἤᾔἢἦᾖᾐἡἥἣἧᾗᾑήῄὴῆῇῃ]', 'η', newData)
|
||||
newData = re.sub('[ἨἬἪἮἩἭἫ]', 'Η', newData)
|
||||
|
||||
newData = re.sub('[ἰἴἶἱἵἳἷίὶῖϊΐῒ]', 'ι', newData)
|
||||
newData = re.sub('[ἸἼἹἽ]', 'Ι', newData)
|
||||
|
||||
newData = re.sub('[ὀὄὂὁὅὃόὸ]', 'ο', newData)
|
||||
newData = re.sub('[ὈὌὉὍὋ]', 'Ο', newData)
|
||||
|
||||
newData = re.sub('[ῥ]', 'ρ', newData)
|
||||
newData = re.sub('[Ῥ]', 'Ρ', newData)
|
||||
|
||||
newData = re.sub('[ὐὔὒὖὑὕὓὗύὺῦϋΰῢ]', 'υ', newData)
|
||||
newData = re.sub('[ὙὝὟ]', 'Υ', newData)
|
||||
|
||||
newData = re.sub('[ὠὤὢὦᾠὡὥὧᾧώῴὼῶῷῳ]', 'ω', newData)
|
||||
newData = re.sub('[ὨὬὪὮὩὭὯ]', 'Ω', newData)
|
||||
|
||||
newData = re.sub("[\-\—\,\;\:\\\?\.\·\·\'\‘\’\‹\›\“\”\«\»\(\)\[\]\{\}\⧼\⧽\〈\〉\*\‿\᾽\⇔\¦]", "", newData)
|
||||
|
||||
newData = re.sub(' $', '', newData, flags=re.M)
|
||||
|
||||
f = open(outputFile,'w')
|
||||
f.write(newData)
|
||||
f.close()
|
Loading…
Reference in New Issue