old-version-1.02
|
Makefile,
Makefile.os2,
Makefile.unx,
alt-gos.rus,
alt-koi8.rus,
announcement,
example.alt,
example.ko8,
example.pho,
example.tex,
gos-alt.rus,
gos-koi8.rus,
hex-text.rus,
k8-tavtt.rus,
koi7-8.rus,
koi7nl-8.rus,
koi8-7.rus,
koi8-alt.rus,
koi8-gos.rus,
koi8-lc.rus,
koi8-phg.rus,
koi8-php.rus,
koi8-tex.rus,
order.txt,
paths.h,
phg-koi8.rus,
pho-8sim.rus,
pho-koi8.rus,
php-koi8.rus,
readme.doc,
reg_exp.c,
reg_exp.h,
reg_sub.c,
tex-koi8.rus,
translit.1,
translit.c,
translit.txt,
|
|
|
# Jan Labanowski, jkl@ccl.net, Jan. 10, 1992
# File koi8_tex.dat
# This is a transliteration data file for converting from KOI-8 as used
# by RELCOM (GOST 19768-74) to LaTeX
# The TeX tranliteration sequences follow AMS cyrillic convention for
# WNCYR fonts with cyracc.def file
# To be used with translit.c program by Jan Labanowski. For a format of
# this file consult translit documentation
1 file version number
" " # string delimiters
[ ] # list delimites
{ } # regular expression delimiters
# starting sequence for LaTeX (kindly contributed by Oleg Zabluda
# Wed May 26 22:20:38 1993
"\documentstyle{article}
\input cyracc.def
\font\tencyr=wncyr10
\def\cyr{\tencyr\cyracc}
\settowidth{\textwidth}{\cyr\hspace{90em}}
\pagestyle{empty}
\begin{document}
"
#ending sequence
"
\end{document}
"
0 # number of input SHIFT sequences, only one set of input characters
2 # number of output SHIFT sequences, two sets of input characters
# SHIFT-OUT SHIFT-IN
"" "" #shift sequences for set 1 (Latin)
"{\cyr " "}" #cyrillic enclosed in {\cyr ... }
# conversion table
# inp_set inp_seq out_set out_seq
# characters which are not in ASCII (and DEL) and not in KOI8 to *
0 [\0x7F-\0xA2\0xA4-\0xB2\0xB4-\0xBF] 0 "$\star$"
# dehyphenate words, e.g. con- (NL)cert is changed to concert(NL)
# Below is a complicated (?) regular expression. It joins a hyphenated
# word. It looks for one of more letters (saves them as substring 1)
# followed by a hyphen (which may be followed by zero or more spaces
# or tabs). The hyphen must be followed by a NewLine (characters 0A-0D hex
# are various new line sequences) and saves NewLine sequence. Then it looks
# for zero or more tabs and spaces (at the beginning of the line). Then it
# looks for the rest of the hyphenated word and saves it as substring 3.
# The word may have punctuation attached. Then it looks again for some spaces
# or tabs. The substitute string junks all sequences which were not withn (),
# i.e., hyphen and spaces/tabs and inserts only substrings but in a different
# order. The 1 (word beginning) is followed by 3 (word end) and followed by
# the NewLine. The {\2\1\3} would be equally good. The string is then returned
# back for processing (output code is -1). Note that since input regular
# expression is very long, I chopped it into several lines by using \NL.
# If \ is followed by a white space, the \ and all white space which follow it
# is removed by the program. Be carefull not to use "\white_space" in strings,
# lists or regular expressions. If you must, enter \ as a code (i.e., \0x5C).
# uncomment lines below if you want to dehyphenate
# 0 {([A-Za-z\0xA3\0xB3\0xC0-\0xFF]+)-[ \0x09]*([\0x0A-\0x0D]+)[ \0x09]*(\
# [A-Za-z\0xA3\0xB3\0xC0-\0xFF,.?;:")'`!]+)[ \0x09]}
# -1 {\1\3\2}
# All latin letters are converted to the same letters but with the output
# set 1
0 [A-Za-z] 1 [A-Za-z] #Latin letters A-Z and a-z
# Add \\ before all NewLine sequences
0 {([\0x0B-\0x0D]*)\0x0A([\0x0B-\0x0D]*)} 0 {\\\\\1\0x0A\2}
# Convert all double spaces to protected LaTeX spaces. Note that the
# backslash is followed by a space here, and had to be entered as its code
0 " " 0 "{\0x5C \0x5C }"
# Quote some special TeX characters
# these do not require going out of {\cyr ....}
0 "[" 0 "$[$"
0 "]" 0 "$]$"
0 "^" 0 "$\wedge$"
0 "{" 0 "$\lbrace$"
0 "}" 0 "$\rbrace$"
0 "~" 0 "$\sim$"
0 "\" 0 "$\backslash$"
0 "|" 0 "$\mid$"
0 "*" 0 "$\star$"
0 "<" 0 "$<$"
0 ">" 0 "$>$"
0 "$" 0 "\$"
0 "%" 0 "\%"
# these can be represented correctly only in Latin charset
0 "_" 1 "\_"
0 "&" 1 "\&"
0 "#" 1 "\#"
0 "@" 1 "@"
# Cyrillic letters
0 "\0xF4\0xFD" 2 "T{\cydot}Shch" # to prevent C
0 "\0xF4\0xDD" 2 "T{\cydot}shch" # to prevent C
0 "\0xD4\0xFD" 2 "t{\cydot}Shch" # to prevent C
0 "\0xD4\0xDD" 2 "t{\cydot}shch" # to prevent C
0 "\0xF4\0xFB" 2 "T{\cydot}Sh" # to prevent C
0 "\0xF4\0xDB" 2 "T{\cydot}sh" # to prevent C
0 "\0xD4\0xFB" 2 "t{\cydot}Sh" # to prevent C
0 "\0xD4\0xDB" 2 "t{\cydot}sh" # to prevent C
0 "\0xF4\0xF3" 2 "T{\cydot}S" # to prevent C
0 "\0xF4\0xD3" 2 "T{\cydot}s" # to prevent C
0 "\0xD4\0xF3" 2 "t{\cydot}S" # to prevent c
0 "\0xD4\0xD3" 2 "t{\cydot}s" # to prevent c
0 "\0xA3" 2 "\\0o42e" # small \"e (yo)
0 "\0xB3" 2 "\\0o42E" # capital \"E (Yo)
0 "\0xE1" 2 "A"
0 "\0xE2" 2 "B"
0 "\0xF7" 2 "V"
0 "\0xE7" 2 "G"
0 "\0xE4" 2 "D"
0 "\0xE5" 2 "E"
0 "\0xF6" 2 "Zh"
0 "\0xFA" 2 "Z"
0 "\0xE9" 2 "I"
0 "\0xEA" 2 "{\u I}" # I kratkoje
0 "\0xEB" 2 "K"
0 "\0xEC" 2 "L"
0 "\0xED" 2 "M"
0 "\0xEE" 2 "N"
0 "\0xEF" 2 "O"
0 "\0xF0" 2 "P"
0 "\0xF2" 2 "R"
0 "\0xF3" 2 "S"
0 "\0xF4" 2 "T"
0 "\0xF5" 2 "U"
0 "\0xE6" 2 "F"
0 "\0xE8" 2 "Kh"
0 "\0xE3" 2 "C"
0 "\0xFE" 2 "Ch"
0 "\0xFB" 2 "Sh"
0 "\0xFD" 2 "Shch"
0 "\0xFF" 2 "{\Cdprime}" # Tverdyj znak
0 "\0xF9" 2 "Y"
0 "\0xF8" 2 "{\Cprime}" # Myagkij znak
0 "\0xFC" 2 "\`E"
0 "\0xE0" 2 "Yu"
0 "\0xF1" 2 "Ya"
0 "\0xC1" 2 "a"
0 "\0xC2" 2 "b"
0 "\0xD7" 2 "v"
0 "\0xC7" 2 "g"
0 "\0xC4" 2 "d"
0 "\0xC5" 2 "e"
0 "\0xD6" 2 "zh"
0 "\0xDA" 2 "z"
0 "\0xC9" 2 "i"
0 "\0xCA" 2 "{\u i}"
0 "\0xCB" 2 "k"
0 "\0xCC" 2 "l"
0 "\0xCD" 2 "m"
0 "\0xCE" 2 "n"
0 "\0xCF" 2 "o"
0 "\0xD0" 2 "p"
0 "\0xD2" 2 "r"
0 "\0xD3" 2 "s"
0 "\0xD4" 2 "t"
0 "\0xD5" 2 "u"
0 "\0xC6" 2 "f"
0 "\0xC8" 2 "kh"
0 "\0xC3" 2 "c"
0 "\0xDE" 2 "ch"
0 "\0xDB" 2 "sh"
0 "\0xDD" 2 "shch"
0 "\0xDF" 2 "{\cdprime}"
0 "\0xD9" 2 "y"
0 "\0xD8" 2 "{\cprime}"
0 "\0xDC" 2 "\`e"
0 "\0xC0" 2 "yu"
0 "\0xD1" 2 "ya"
|