1
+ ---
2
+ authority_id : bgnpcgn
3
+ id : 2007
4
+ language : rus
5
+ source_script : Cyrl
6
+ destination_script : Latn
7
+ name : BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
8
+ url : https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
9
+ creation_date : 2007
10
+ confirmation_date : 2019
11
+ description : |
12
+ Bashkir is an official language within Respublika Bashkortostan, one of the
13
+ republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
14
+ which case it should be romanized by means of the Cyrillic-Roman table of
15
+ correspondences given below
16
+
17
+ notes :
18
+ - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
19
+ - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
20
+ - The letter w is used between or after vowels.
21
+ - The letter w is used after e, u, ö and ə.
22
+ - |
23
+ An inventory of letter-diacritic combinations, with their Unicode encoding,
24
+ in addition to the unmodified letters of the basic Roman script is:
25
+ Ğ (U+011E) ğ (U+011F)
26
+ Ź (U+0179) ź (U+017A)
27
+ Ë (U+00CB) ë (U+00EB)
28
+ Ñ (U+00D1) ñ (U+00F1)
29
+ Ö (U+00D6) ö (U+00F6)
30
+ Ś (U+015A) ś (U+015B)
31
+ Ü (U+00DC) ü (U+00FC)
32
+ Ç (U+00C7) ç (U+00E7)
33
+ Ş (U+015E) ş (U+015F)
34
+ Ə (U+018F) ə (U+0259)
35
+ - |
36
+ The Roman-script columns show only lowercase forms but, when applying the table,
37
+ uppercase and lowercase Roman letters as appropriate should be used.
38
+
39
+ tests :
40
+ # adopted http://www.eki.ee/knab/lat/kblba.pdf
41
+ - source : Васйылға
42
+ expected : Wasyılğa
43
+ - source : Еҙем
44
+ expected : Yeźem
45
+ - source : Раевка
46
+ expected : Raevka
47
+ - source : Сәйетҡол
48
+ expected : Səyetqol
49
+ - source : Ауырғазы
50
+ expected : Awırğazı
51
+ - source : Бурһыҡтау
52
+ expected : Burhıqtaw
53
+ - source : Мәләүез
54
+ expected : Mələwez
55
+ - source : Ҡыҙылъяр
56
+ expected : Qıźılyar
57
+ # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
58
+ - source : кемдең
59
+ expected : kemdeñ
60
+ - source : кем
61
+ expected : kem
62
+ - source : был
63
+ expected : bıl
64
+ - source : ошо
65
+ expected : oşo
66
+ - source : быларҙың
67
+ expected : bılarźıñ
68
+ - source : һеҙҙән
69
+ expected : heźźən
70
+ - source : һин
71
+ expected : hin
72
+ - source : һеҙҙең
73
+ expected : heźźeñ
74
+
75
+ map :
76
+ rules :
77
+ # note[1]
78
+ - pattern : \b\u0412(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
79
+ result : " W"
80
+ - pattern : \b\u0432(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
81
+ result : " w"
82
+ # note[2]
83
+ - pattern : \b\u0415
84
+ result : " Ye"
85
+ - pattern : \b\u0435
86
+ result : " ye"
87
+ - pattern : (?=\b)\u0415(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
88
+ result : " Ye"
89
+ - pattern : (?=\b)\u0435(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
90
+ result : " ye"
91
+
92
+ # note[3] # note[4]
93
+ - pattern : (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0423\u04AE]
94
+ result : W
95
+ - pattern : (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0443\u04AF]
96
+ result : w
97
+
98
+
99
+ characters :
100
+ ' \u0410 ' : ' A' # А
101
+ ' \u0411 ' : ' B' # Б note[1]
102
+ ' \u0412 ' : ' V' # В
103
+ ' \u0413 ' : ' G' # Г
104
+ ' \u0492 ' : " \u011E " # Ғ
105
+ ' \u0414 ' : ' D' # Д
106
+ ' \u0498 ' : " \u0179 " # Ҙ
107
+ ' \u0415 ' : ' E' # Е note[2]
108
+ ' \u0401 ' : ' Ë' # Ё
109
+ ' \u0416 ' : ' J' # Ж
110
+ ' \u0417 ' : ' Z' # З
111
+ ' \u0418 ' : ' I' # И
112
+ ' \u0419 ' : ' Y' # Й
113
+ ' \u041A ' : ' K' # К
114
+ ' \u04A0 ' : ' Q' # Ҡ
115
+ ' \u041B ' : ' L' # Л
116
+ ' \u041C ' : ' M' # М
117
+ ' \u041D ' : ' N' # Н
118
+ ' \u04A2 ' : ' Ñ' # Ң
119
+ ' \u041E ' : ' O' # О
120
+ ' \u04E8 ' : " ö" # Ө
121
+ ' \u041F ' : ' P' # П
122
+ ' \u0420 ' : ' R' # Р
123
+ ' \u0421 ' : ' S' # С
124
+ ' \u04AA ' : ' Ś' # Ҫ
125
+ ' \u0422 ' : ' T' # Т
126
+ ' \u0423 ' : ' U' # У
127
+ ' \u04AE ' : ' Ü' # Ү note[3]
128
+ ' \u0424 ' : ' F' # Ф
129
+ ' \u0425 ' : ' X' # Х
130
+ ' \u04BA ' : ' H' # Һ
131
+ ' \u0426 ' : ' Ts' # Ц
132
+ ' \u0427 ' : ' Ç' # Ч
133
+ ' \u0428 ' : ' Ş' # Ш
134
+ ' \u0429 ' : ' ŞÇ' # Щ
135
+ ' \u042A ' : ' ' # Ъ
136
+ ' \u042B ' : ' I' # Ы
137
+ ' \u042C ' : ' ' # Ь
138
+ ' \u042D ' : ' E' # Э
139
+ ' \u04D8 ' : " \u018F " # Ә
140
+ ' \u042E ' : ' Yu' # Ю
141
+ ' \u042F ' : ' Ya' # Я
142
+
143
+ ' \u0430 ' : ' a' # а
144
+ ' \u0431 ' : ' b' # б
145
+ ' \u0432 ' : ' v' # в note[1]
146
+ ' \u0433 ' : ' g' # г
147
+ ' \u0493 ' : " \u011F " # ғ
148
+ ' \u0434 ' : ' d' # д
149
+ ' \u0499 ' : ' ź' # ҙ
150
+ ' \u0435 ' : ' e' # e note[2]
151
+ ' \u0451 ' : ' yo' # ё
152
+ ' \u0436 ' : ' j' # ж
153
+ ' \u0437 ' : ' z' # з
154
+ ' \u0438 ' : ' i' # и
155
+ ' \u0439 ' : ' y' # й
156
+ ' \u043A ' : ' k' # к
157
+ ' \u04A1 ' : ' q' # ҡ
158
+ ' \u043B ' : ' l' # л
159
+ ' \u043C ' : ' m' # м
160
+ ' \u043D ' : ' n' # н
161
+ ' \u04A3 ' : ' ñ' # ң
162
+ ' \u043E ' : ' o' # о
163
+ ' \u04E9 ' : " \u00F6 " # ө
164
+ ' \u043F ' : ' p' # п
165
+ ' \u0440 ' : ' r' # р
166
+ ' \u0441 ' : ' s' # с
167
+ ' \u04AB ' : ' ś' # ҫ
168
+ ' \u0442 ' : ' t' # т
169
+ ' \u0443 ' : ' u' # у
170
+ " \u04AF " : ' ü' # ү note[3]
171
+ ' \u0444 ' : ' f' # ф
172
+ ' \u0445 ' : ' x' # х
173
+ ' \u04BB ' : ' h' # һ
174
+ ' \u0446 ' : ' ts' # ц
175
+ ' \u0447 ' : ' ç' # ч
176
+ ' \u0448 ' : ' ş' # ш
177
+ ' \u0449 ' : ' şç' # щ
178
+ ' \u044A ' : ' ' # ъ
179
+ ' \u044B ' : " \u0131 " # ы
180
+ ' \u044C ' : ' ' # ь
181
+ ' \u044D ' : ' e' # э
182
+ ' \u04D9 ' : " \u0259 " # ә
183
+ ' \u044E ' : ' yu' # ю
184
+ ' \u044F ' : ' ya' # я
0 commit comments