68 lines
1.7 KiB
Bash
68 lines
1.7 KiB
Bash
#! /usr/bin/sed -E -i .bak -f
|
||
# depends on what you need, the following first command may not be necessary in your case, please check
|
||
s/$/ /g
|
||
# convert diaeresis
|
||
s/[ϊΐῒϋΰῢ]/*/g
|
||
# alphabet
|
||
s/[αΑἀἈἄἌᾄἂἆἎἁἉἅἍᾅἃἋάᾴὰᾶᾷᾳ]/a/g
|
||
s/[βΒ]/v/g
|
||
s/[γΓ]/g/g
|
||
s/[δΔ]/ð/g
|
||
s/[εΕἐἘἔἜἑἙἕἝἓἛέὲ]/e/g
|
||
s/[ζΖ]/z/g
|
||
s/[ηΗἠἨἤἬᾔἢἪἦἮᾖᾐἡἩἥἭἣἧᾗᾑήῄὴῆῇῃ]/iy/g
|
||
s/[θΘ]/θ/g
|
||
s/[ιἰἸἴἼἶἱἹἵἽἳἷίὶῖ]/iy/g
|
||
s/[κΚ]/k/g
|
||
s/[λΛ]/l/g
|
||
s/[μΜ]/m/g
|
||
s/[νΝ]/n/g
|
||
s/[ξΞ]/ks/g
|
||
s/[οΟὀὈὄὌὂὁὉὅὍὃὋόὸ]/o/g
|
||
s/[πΠ]/p/g
|
||
s/[ρΡῥῬ]/r/g
|
||
s/ r/ rh/g
|
||
s/[σςΣ]/s/g
|
||
s/s([vgðmnr])/z\1/g
|
||
# check if you need the following command
|
||
# s/s ([vgðmnr])/z \1/g
|
||
# check the word at the end of a line and the word at the beginning of next line, if necessary
|
||
s/[τΤ]/t/g
|
||
s/[υΥὐὔὒὖὑὙὕὝὓὗὟύὺῦ]/IY/g
|
||
s/[φΦ]/f/g
|
||
s/[χΧ]/kh/g
|
||
s/[ψΨ]/ps/g
|
||
s/[ωὠὤὬὢὦὮᾠὡὩὥὭὧὯᾧώῴὼῶῷῳ]/o/g
|
||
# diphthongs
|
||
s/aiy/e/g
|
||
s/eiy/iy/g
|
||
s/oiy/iy/g
|
||
s/IYiy/iy/g
|
||
s/oIY/u/g
|
||
s/aIY([pktfkθsz])/af\1/g
|
||
s/aIY/av/g
|
||
s/eIY([pktfkθsz])/ef\1/g
|
||
s/eIY/ev/g
|
||
s/iyIY([pktfkθsz])/iyf\1/g
|
||
s/iyIY/iyv/g
|
||
# change IY to small letter
|
||
s/IY/iy/g
|
||
# consonant clusters
|
||
s/gg/ngG/g
|
||
s/gk/ngG/g
|
||
s/gkh/ngkh/g
|
||
s/gks/ngks/g
|
||
s/mp([^ \.\(\)\[\]—,;·’⟦⟧⸂⸃⸄⸅⸀⸁12])/mb\1/g
|
||
s/nt([^ \.\(\)\[\]—,;·’⟦⟧⸂⸃⸄⸅⸀⸁12])/nd/g
|
||
# deal with letter ghama
|
||
s/ge/ye/g
|
||
s/giy/yiy/g
|
||
# change G to small letter
|
||
s/G/g/g
|
||
# restore diaeresis
|
||
s/*/iy/g
|
||
s/ $//g
|
||
s/᾽//g
|
||
# reminder: read line 29
|
||
# replace s( \r[0-9]+?\t[vgðmnr]) with z\1 if necessary
|