old-version-1.01
|
INSTALL,
Makefile,
Makefile.os2,
Makefile.unx,
alt-gos.rus,
alt-koi8.rus,
announcement,
example.alt.uu,
example.ko8.uu,
example.pho,
example.tex,
gos-alt.rus,
gos-koi8.rus,
hex-koi8.rus,
koi7-8.rus,
koi7nl-8.rus,
koi8-7.rus,
koi8-alt.rus,
koi8-gos.rus,
koi8-lc.rus,
koi8-phg.rus,
koi8-php.rus,
koi8-tex.rus,
order.txt,
paths.h,
phg-koi8.rus,
pho-8sim.rus,
pho-koi8.rus,
php-koi8.rus,
readme.doc,
reg_exp.c,
reg_exp.h,
reg_sub.c,
tex-koi8.rus,
translit.1,
translit.c,
translit.ps,
translit.tar.Z,
translit.tar.z.uu,
translit.txt,
translit.zip,
translit.zip.uu,
|
|
|
# Jan Labanowski, jkl@ccl.net, Jan. 10, 1992 # File lc_koi8.dat
# This is a transliteration data file for converting from various phonetic
# transliteration schemes to KOI-8 as used by RELCOM (GOST 19768-74).
# It is not possible to exactly represent phonetic transliteration
# since it is very flexible and frequently contradictory.
# This file is compilcated (in my humble opinion) and it will take
# a substantial amount of time to process longer files on a slower
# computer. However, this is what you get, of you want something more
# or less general. If your phonetic transliteration is consistent
# and unequivocal, you will be much better of to write a specific
# transliteration file, e.g. the GOST 16876-71 transliteration file (phg) or
# Pokrovsky scheme (php).
# The English text should be embraced in braces {}, while Russian one,
# is outside the braces.
# To be used with translit.c program by Jan Labanowski. For a format of
# this file consult translit documenation
1 file version number
" " # string delimiters
[ ] # list delimites
{ } # regular expression delimiters
#starting sequence
""
#ending sequence
""
2 # number of input SHIFT sequences
"" "" "" "" "" "" # no SHIFT-OUT/IN for Russian letters
"{" "" "" "" "}" "" # Latin text in braces {}
0 # number of output SHIFT sequences, two sets of input characters
# conversion table
# inp_set inp_seq out_set out_seq
# Latin(ASCII) is embraced in {}
2 ["'A-Za-z] 0 ["'A-Za-z]
# Cyrillic letters
# If already converted to KOI8 by backstepping, send it to output
1 {([\0x80-\0xFF])} 0 {\1}
# Convert " followed by a capital letter to capital Tvyordyj znak and backstep
# otherwise " will be treated as a small tvyordyj znak
1 {"([^A-Za-z])} 0 {"\1} # " at the end
1 {([^A-Za-z])"} 0 {\1"} # " at the beginning
1 {"([A-Z])} -2 {\0xFF\1} # capital Tvyordyj znak
1 {Q[Hh]} 0 "\0xFF" # Some use it as Tvyordyj
1 "qh" 0 "\0xDF" # Some use it as tvyordyj
1 "\0x22" 0 "\0xDF" # tvyordyj znak
# Convert ' preceded by a capital letter to capital Myagkij znak and backstep
# otherwise ' will be trated as a small myagkij znak
1 "''" 0 "''" # double quote
1 {([^A-Za-z])'} 0 {\1'} # opening quote
# Muagkij znak
1 {((S[Hh][Cc][Hh])|(S[Hh])|(C[Hh])|(T[Cc][Hh])|([A-Z]))'} -1 {\1\0xF8}
1 "Q" 0 "\0xF8"
1 "'" 0 "\0xD8" # myagkij znak
1 "q" 0 "\0xD8"
1 {(([YIJ]?[EOUA])|([J]?[EOAUY]))((Y)|([IJ]))([^A-Za-z])} -1 {\1\0xEA\7} #-J
# 12 3 45 6 7
1 {(([yij]?[eoua])|([j]?[eoauy]))((y)|([ij]))([^A-Za-z])} -1 {\1\0xCA\7} #-j
# 12 3 45 6 7
# the story of ts versus c (the ts for c was a stupid idea of Library of
# Congress --- very, very stupid... T and S should be T and S, not C).
1 "INTS" 0 "\0xE9\0xED\0xE3" #INC
1 "INC" 0 "\0xE9\0xED\0xE3" #INC
1 "ints" 0 "\0xC9\0xCD\0xC3" #inc
1 "inc" 0 "\0xC9\0xCD\0xC3" #inc
1 "CI" 0 "\0xE3\0xE9"
1 "ci" 0 "\0xC3\0xC9"
1 {AVIA(TS|C)} 0 "\0xE1\0xE2\0xE9\0xE1\0xE3" #aviac
1 {avia(ts|c)} 0 "\0xC1\0xC2\0xC9\0xC1\0xC3"
1 {tsi([iyjo])} -2 {\0xC3\0xC9\1} # ci
1 {TSI([IYJO])} -2 {\0xE3\0xE9\1} # ci
1 {T[Ss]([Aa])} -2 {\0xE3\1} # CA
1 {t[Ss]([Aa])} -2 {\0xC3\1} # ca
1 {([DdKk])T[Ss]} -1 {\1\0xE3} # DC or KC
1 {([DdKk])t[Ss]} -1 {\1\0xC3} # dc or kc
1 {TS([^A-Za-z])} -2 {\0xE3\1} # C
1 {ts([^A-Za-z])} -2 {\0xC3\1} # c
# Je --- people frequently write e instead of Je. E oborotnoje is
# frequently at the beginning of foreign origin words
1 "AER" 0 "\0xE1\0xFC\0xF2"
1 {([Aa])er} -1 {\1\0xDC\0xD2}
1 {([A-Za-z])'[IiYyJj]?E} -1 {\1\0xF8\0xE5} # Je
1 {([A-Za-z])'[IiYyJj]?e} -1 {\1\0xD8\0xC5} # je
# Capital Je
1 {([^A-Za-z])E(([Mm][Uu]?[^A-Za-z])|([Mm][Ll])|([Ll][^EeIiLlYyJj'])\
|([Ll][YyIiJj]?[Ee][^A-Za-z])|([Rr][Uu])|([Ss][HhTtLl])|([Kk][Aa]))}
-1 {\1\0xE5\2} # Je
# Small je
1 {([^A-Za-z])e(([Mm][Uu]?[^A-Za-z])|([Mm][Ll])|([Ll][^EeIiLlYyJj'])\
|([Ll][YyIiJj]?[Ee][^A-Za-z])|([Rr][Uu])|([Ss][HhTtLl])|([Kk][Aa]))}
-1 {\1\0xC5\2} # je
# Capital Eh
1 {([^A-Za-z])E(([Ll][Ee][KkGg])|([KLMNPRSTFklmnprstf]))} -1 {\1\0xFC\2} #Eh
# Small eh
1 {([^A-Za-z])e(([Ll][Ee][KkGg])|([KLMNPRSTFklmnprstf]))} -1 {\1\0xDC\2} #eh
1 {([iIOoPpUuFfYy])i[Ee]} -1 {\1\0xC5} # ie->je
1 {([iIOoPpUuFfYy])I[Ee]} -1 {\1\0xE5} # ie->je
# Eh is e oborotnoje but not at the end of the word
1 {E[Hh]([^A-Za-z]+)} 0 {\0xE5\0xE8\1}
1 {e[Hh]([^A-Za-z]+)} 0 {\0xC5\0xC8\1}
1 {E[Hh]} 0 "\0xFC" # E oborotnoje
1 "eh" 0 "\0xDC" # e oboritnoje
# Various I kratkoe
1 {J[Ii]} 0 "\0xEA" # I kratkoje
1 {J[Jj]} 0 "\0xEA"
1 "ji" 0 "\0xCA" # i kratkoje
1 "jj" 0 "\0xCA"
# SHCH
1 {s[Hh][Cc][Hh]} 0 "\0xDD"
1 "w" 0 "\0xDD"
1 {S[Hh][Cc][Hh]} 0 "\0xFD"
1 "W" 0 "\0xFD"
1 {[YJ][Oo]} 0 "\0xB3" # capital Jo
1 {J[Ee]} 0 "\0xE5" # Je
1 {RIU(M[^A-Za-z])} -1 {\0xF2\0xE9\0xF5\2} # IU
1 {([^A-Za-z])I([Uu][Dd])} -1 {\1\0xE9\2}
1 "DIUS" 0 "\0xE4\0xE9\0xF5\0xF3"
1 {[IYJ][Uu]} 0 "\0xE0" # Ju
1 {([Dd])I([Aa][KkGgPp])} -1 {\1\0xE9\2} # dia
1 "RIAL" 0 "\0xF2\0xE9\0xE1\0xEC" # rial
1 "KIA" 0 "\0xEB\0xE9\0xE1" # kia
1 {[IYJ][Aa]} 0 "\0xF1" # Ja
1 {Z[Hh]} 0 "\0xF6"
1 {K[Hh]} 0 "\0xE8"
1 {H[Hh]} 0 "\0xE8"
1 {C[Hh]} 0 "\0xFE"
1 {S[Hh]} 0 "\0xFB"
1 "zh" 0 "\0xD6"
1 "kh" 0 "\0xC8"
1 "hh" 0 "\0xC8"
1 "ch" 0 "\0xDE"
1 "sh" 0 "\0xDB"
1 {[yj]o} 0 "\0xA3" #jo
1 "je" 0 "\0xC5" #je
1 {([Rr])iu([Mm][^A-Za-z])} -1 {\1\0xC9\0xD5\2} # iu
1 {([^A-Za-z])i(ud)} -1 {\1\0xC9\2}
1 "dius" 0 "\0xC4\0xC9\0xD5\0xD3"
1 {[iyj]u} 0 "\0xC0" #ju
1 {([Dd])ia([kgp])} -1 {\1\0xC9\0xC1\2} # dia
1 "rial" 0 "\0xD2\0xC9\0xC1\0xCC" # rial
1 "kia" 0 "\0xCB\0xC9\0xC1" # kia
1 {[iyj]a} 0 "\0xD1" #ja
1 "A" 0 "\0xE1"
1 "B" 0 "\0xE2"
1 "V" 0 "\0xF7"
1 "G" 0 "\0xE7"
1 "D" 0 "\0xE4"
1 "Z" 0 "\0xFA"
1 "I" 0 "\0xE9"
1 "J" 0 "\0xEA" # I kratkoje
1 "K" 0 "\0xEB"
1 "L" 0 "\0xEC"
1 "M" 0 "\0xED"
1 "N" 0 "\0xEE"
1 "O" 0 "\0xEF"
1 "P" 0 "\0xF0"
1 "R" 0 "\0xF2"
1 "S" 0 "\0xF3"
1 "T" 0 "\0xF4"
1 "U" 0 "\0xF5"
1 "F" 0 "\0xE6"
1 "X" 0 "\0xE8" # Kha
1 "H" 0 "\0xE8" # Kha
1 "C" 0 "\0xE3"
1 "Y" 0 "\0xF9"
1 "E" 0 "\0xE5" #Je
1 "a" 0 "\0xC1"
1 "b" 0 "\0xC2"
1 "v" 0 "\0xD7"
1 "g" 0 "\0xC7"
1 "d" 0 "\0xC4"
1 "z" 0 "\0xDA"
1 "i" 0 "\0xC9"
1 "j" 0 "\0xCA"
1 "k" 0 "\0xCB"
1 "l" 0 "\0xCC"
1 "m" 0 "\0xCD"
1 "n" 0 "\0xCE"
1 "o" 0 "\0xCF"
1 "p" 0 "\0xD0"
1 "r" 0 "\0xD2"
1 "s" 0 "\0xD3"
1 "t" 0 "\0xD4"
1 "u" 0 "\0xD5"
1 "f" 0 "\0xC6"
1 "x" 0 "\0xC8" # kha
1 "h" 0 "\0xC8" # kha
1 "c" 0 "\0xC3"
1 "y" 0 "\0xD9"
1 "e" 0 "\0xC5" # je
|