This commit is contained in:
Grigoriev Oleg
2013-01-31 15:51:56 +04:00
parent 4019099928
commit 6ad2971ea4
910 changed files with 200029 additions and 0 deletions

293
data/countries.txt Normal file
View File

@ -0,0 +1,293 @@
[abkhazia]
map : 144:31
[afghanistan]
map : 160:40
[algeria]
map : 117:40
[armenia]
map : 147:33
[australia]
map : 206:76
[azerbaijan]
map : 149:33
[bahrain]
map : 152:43
[bali]
map : 194:65
[bangladesh]
map : 177:44
[bosnia]
map : 129:31
[bulgaria]
map : 129:31
[burma]
map : 181:47
[butane]
map : 178:42
[cambodia]
map : 187:52
[cameroon]
map : 124:56
[canada]
map : 48:21
[china]
map : 184:37
[croatia]
map : 127:30
[egypt]
map : 137:42
[england]
map : 115:25
[eritrea]
map : 143:50
[estland]
map : 134:22
[ethiopia]
map : 145:54
[fiji_islands]
map : 228:74
[finland]
map : 134:17
[france]
map : 119:29
[georgia]
map : 144:31
[germany]
map : 123:25
[goa]
map : 166:49
[greece]
map : 132:35
[guam]
map : 207:53
[guinea]
map : 108:52
[herzegovina]
map : 129:31
[hungary]
map : 130:28
[india]
map : 169:47
[indonesia]
map : 193:60
[iran]
map : 153:38
[iraq]
map : 147:38
[ireland]
map : 112:25
[israel]
map : 141:39
[italy]
map : 126:32
[jakarta]
map : 188:64
[japan]
map : 210:36
[kashmir]
map : 168:39
[kazakhstan]
map : 161:28
[kenya]
map : 143:59
[kuwait]
map : 150:40
[laos]
map : 186:48
[levant]
map : 141:38
[liberia]
map : 111:55
[libya]
map : 127:41
[lombok]
map : 195:65
[macedonia]
map : 132:32
[malaysia]
map : 186:57
[maldives]
map : 167:60
[mali]
map : 116:46
[malta]
map : 127:35
[mauritius]
map : 154:74
[mesopotamia]
map : 147:37
[moldova]
map : 136:28
[mongolia]
map : 187:27
[montenegro]
map : 131:32
[myanmar]
map : 182:47
[nepal]
map : 173:41
[nigeria]
map : 122:54
[north_africa]
map : 124:43
[north_korea]
map : 202:33
[north_philippines]
map : 198:49
[norway]
map : 122:19
[pakistan]
map : 163:41
[peru]
map : 66:65
[philippines]
map : 199:52
[poland]
map : 129:25
[rumania]
map : 135:29
[russia]
map : 170:25
[salvador]
map : 57:50
[scotland]
map : 114:21
[serbia]
map : 132:30
[sierra_leone]
map : 108:53
[singapore]
map : 186:58
[slovenia]
map : 128:28
[somalia]
map : 148:55
[south_korea]
map : 203:36
[sri_lanka]
map : 171:55
[sudan]
map : 137:50
[sumatra]
map : 183:58
[sweden]
map : 127:19
[syria]
map : 143:37
[taiwan]
map : 198:44
[tanzania]
map : 141:64
[thailand]
map : 185:49
[tibet]
map : 173:39
[turkey]
map : 139:34
[uganda]
map : 138:57
[ukraine]
map : 138:26
[usa]
map : 50:35
[vietnam]
map : 190:52
[wales]
map : 114:25

256
data/entities.txt Normal file
View File

@ -0,0 +1,256 @@
# HTML Entities.
# "©" for example
quot : 34
amp : 38
apos : 39
lt : 60
gt : 62
nbsp : 160
iexcl : 161
cent : 162
pound : 163
curren : 164
yen : 165
brvbar : 166
sect : 167
uml : 168
copy : 169
ordf : 170
laquo : 171
not : 172
shy : 173
reg : 174
macr : 175
deg : 176
plusmn : 177
sup2 : 178
sup3 : 179
acute : 180
micro : 181
para : 182
middot : 183
cedil : 184
sup1 : 185
ordm : 186
raquo : 187
frac14 : 188
frac12 : 189
frac34 : 190
iquest : 191
Agrave : 192
Aacute : 193
Acirc : 194
Atilde : 195
Auml : 196
Aring : 197
AElig : 198
Ccedil : 199
Egrave : 200
Eacute : 201
Ecirc : 202
Euml : 203
Igrave : 204
Iacute : 205
Icirc : 206
Iuml : 207
ETH : 208
Ntilde : 209
Ograve : 210
Oacute : 211
Ocirc : 212
Otilde : 213
Ouml : 214
times : 215
Oslash : 216
Ugrave : 217
Uacute : 218
Ucirc : 219
Uuml : 220
Yacute : 221
THORN : 222
szlig : 223
agrave : 224
aacute : 225
acirc : 226
atilde : 227
auml : 228
aring : 229
aelig : 230
ccedil : 231
egrave : 232
eacute : 233
ecirc : 234
euml : 235
igrave : 236
iacute : 237
icirc : 238
iuml : 239
eth : 240
ntilde : 241
ograve : 242
oacute : 243
ocirc : 244
otilde : 245
ouml : 246
divide : 247
oslash : 248
ugrave : 249
uacute : 250
ucirc : 251
uuml : 252
yacute : 253
thorn : 254
yuml : 255
OElig : 338
oelig : 339
Scaron : 352
scaron : 353
Yuml : 376
fnof : 402
circ : 710
tilde : 732
Alpha : 913
Beta : 914
Gamma : 915
Delta : 916
Epsilon : 917
Zeta : 918
Eta : 919
Theta : 920
Iota : 921
Kappa : 922
Lambda : 923
Mu : 924
Nu : 925
Xi : 926
Omicron : 927
Pi : 928
Rho : 929
Sigma : 931
Tau : 932
Upsilon : 933
Phi : 934
Chi : 935
Psi : 936
Omega : 937
alpha : 945
beta : 946
gamma : 947
delta : 948
epsilon : 949
zeta : 950
eta : 951
theta : 952
iota : 953
kappa : 954
lambda : 955
mu : 956
nu : 957
xi : 958
omicron : 959
pi : 960
rho : 961
sigmaf : 962
sigma : 963
tau : 964
upsilon : 965
phi : 966
chi : 967
psi : 968
omega : 969
thetasym : 977
upsih : 978
piv : 982
ensp : 8194
emsp : 8195
thinsp : 8201
zwnj : 8204
zwj : 8205
lrm : 8206
rlm : 8207
ndash : 8211
mdash : 8212
lsquo : 8216
rsquo : 8217
sbquo : 8218
ldquo : 8220
rdquo : 8221
bdquo : 8222
dagger : 8224
Dagger : 8225
bull : 8226
hellip : 8230
permil : 8240
prime : 8242
Prime : 8243
lsaquo : 8249
rsaquo : 8250
oline : 8254
frasl : 8260
euro : 8364
image : 8465
weierp : 8472
real : 8476
trade : 8482
alefsym : 8501
larr : 8592
uarr : 8593
rarr : 8594
darr : 8595
harr : 8596
crarr : 8629
lArr : 8656
uArr : 8657
rArr : 8658
dArr : 8659
hArr : 8660
forall : 8704
part : 8706
exist : 8707
empty : 8709
nabla : 8711
isin : 8712
notin : 8713
ni : 8715
prod : 8719
sum : 8721
minus : 8722
lowast : 8727
radic : 8730
prop : 8733
infin : 8734
ang : 8736
and : 8743
or : 8744
cap : 8745
cup : 8746
int : 8747
there4 : 8756
sim : 8764
cong : 8773
asymp : 8776
ne : 8800
equiv : 8801
le : 8804
ge : 8805
sub : 8834
sup : 8835
nsub : 8836
sube : 8838
supe : 8839
oplus : 8853
otimes : 8855
perp : 8869
sdot : 8901
lceil : 8968
rceil : 8969
lfloor : 8970
rfloor : 8971
lang : 9001
rang : 9002
loz : 9674
spades : 9824
clubs : 9827
hearts : 9829
diams : 9830

181
data/languages.txt Normal file
View File

@ -0,0 +1,181 @@
[abkhazian]
[ainu]
[arabic]
[armenian]
[amharic]
[balinese]
[bamum]
[batak]
[bengali]
[buhid]
[buginese]
[bulgarian]
[burmese]
[celtic]
[cham]
[cherokee]
[chinese]
[cia-cia]
[coptic]
[cree]
[croatian]
[dravidian]
[ethiopian-semitic]
[english]
[french]
[georgian]
[german]
[greek]
[hanunoo]
[hebrew]
[hindi]
[italian]
[indian]
[japanese]
[javanese]
[kayah]
[khmer]
[komi]
[korean]
[kurd]
[laotian]
[lepcha]
[limbu]
[lisu]
[makassarese]
[maldivian]
[maltese]
[mandaic]
[mandar]
[marathi]
[mongolian]
[nko]
[okinawan]
[old-church-slavic]
[old-italic]
[oriya]
[palauan]
[palaun]
[pali]
[polish]
[punjabi]
[persian]
[pictish]
[primitive-irish]
[rejang]
[runic]
[russian]
[sami]
[sanskrit]
[santali]
[sasak]
[saurashtra]
[sinhalese]
[slovenian]
[sundanese]
[sylheti]
[syrian]
[tagalog]
[tamil]
[telugu]
[thai]
[tibetan]
[tigtinya]
[tuareg]
[turkish]
[ukrainian]
[uyghur]
[vai]
[vietnamese]
[yi]
[yiddish]

929
data/sections.txt Normal file
View File

@ -0,0 +1,929 @@
[control-character]
diap : 0000:001F
type :
languages :
countries :
[basic-latin]
diap : 0020:007F
type : alphabet
languages : english, german, french, italian, polish
countries : england, usa, germany, france, italy, poland
[latin-1-supplement]
diap : 0080:00FF
type :
languages :
countries :
[latin-extended-A]
diap : 0100:017F
type : alphabet
languages : celtic, sami, maltese, turkish
countries : scotland, wales, ireland, norway, finland, sweden, malta, turkey
[latin-extended-B]
diap : 0180:024F
type : alphabet
languages : slovenian, croatian
countries : slovenia, croatia, rumania, libya
[ipa-extensions]
diap : 0250:02AF
type : alphabet
languages :
countries :
[spacing-modifier-letters]
diap : 02B0:02FF
type : alphabet
languages :
countries :
[combining-diacritical-marks]
diap : 0300:036F
type : alphabet
languages :
countries :
[greek-coptic]
diap : 0370:03FF
type : alphabet
languages : greek, coptic
countries : greece
[cyrillic]
diap : 0400:04FF
type : alphabet
languages : russian, ukrainian, bulgarian
countries : russia, ukraine, bulgaria, serbia, macedonia, moldova
[cyrillic-supplement]
diap : 0500:052F
type : alphabet
languages : komi
countries : russia
[armenian]
diap : 0530:058F
type : alphabet
languages : armenian
countries : armenia
[hebrew]
diap : 0590:05FF
type : alphabet
languages : hebrew, yiddish
countries : israel
[arabic]
diap : 0600:06FF
type : alphabet
languages : arabic, persian, kurd
countries : algeria, bahrain, egypt, iraq, iran, kuwait, afghanistan, pakistan, india, fiji_islands
[syrian]
diap : 0700:074F
type : alphabet
languages : syrian, arabic
countries : algeria, bahrain, egypt, syria
[arabic-supplement]
diap : 0750:077F
type : alphabet
languages : arabic, persian, kurd
countries : algeria, bahrain, egypt, iraq, iran, kuwait, afghanistan, pakistan, india, fiji_islands
[thaana]
diap : 0780:07BF
type : alphabet
languages : maldivian
countries : maldives
[nko]
diap : 07C0:07FF
type : alphabet
languages : nko
countries : guinea, mali
[samaritan]
diap : 0800:083F
type : alphabet
languages :
countries :
[mandaic]
diap : 0840:085F
type : alphabet
languages : mandaic
countries :
[arabic-extended-a]
diap : 08A0:08FF
type : alphabet
languages :
countries :
[devanagari]
diap : 0900:097F
type : abugida
languages : sanskrit, hindi
countries : india, pakistan, fiji_islands, mauritius
[bengali]
diap : 0980:09FF
type : abugida
languages : bengali
countries : india, bangladesh, butane
[gurmukhi]
diap : 0A00:0A7F
type : abugida
languages : punjabi
countries : india, pakistan
[gujarati]
diap : 0A80:0AFF
type : abugida
languages :
countries : india, pakistan, uganda, tanzania, kenya
[oriya]
diap : 0B00:0B7F
type : abugida
languages : oriya
countries : india
[tamil]
diap : 0B80:0BFF
type : abugida
languages : tamil, sanskrit
countries : india, sri_lanka, singapore, malaysia, kenya
[telugu]
diap : 0C00:0C7F
type : abugida
languages : telugu
countries : india
[kannada]
diap : 0C80:0CFF
type : abugida
languages : dravidian
countries : india, goa
[malayalam]
diap : 0D00:0D7F
type : abugida
languages : dravidian
countries : india, goa
[sinhala]
diap : 0D80:0DFF
type : abugida
languages : sinhalese, sanskrit
countries : sri_lanka, india
[thai]
diap : 0E00:0E7F
type : abugida
languages : thai
countries : thailand, india
[lao]
diap : 0E80:0EFF
type : abugida
languages : laotian
countries : thailand, laos
[tibetan]
diap : 0F00:0FFF
type : abugida
languages : tibetan
countries : china, india, butane, nepal, pakistan
[myanmar]
diap : 1000:109F
type : abugida
languages : burmese
countries : myanmar, thailand, bangladesh, malaysia
[georgian]
diap : 10A0:10FF
type : alphabet
languages : georgian, abkhazian
countries : georgia, abkhazia
[hangul-jamo]
diap : 1100:11FF
type : abugida
languages : korean
countries : north_korea, south_korea, china, japan, indonesia
[ethiopic]
diap : 1200:137F
type : abugida
languages :
countries : ethiopia, eritrea, somalia, sudan, israel
[ethiopic-supplement]
diap : 1380:139F
type : abugida
languages :
countries :
[cherokee]
diap : 13A0:13FF
type : syllabary
languages : cherokee
countries : usa
[unified-canadian-aboriginal-syllabics]
diap : 1400:167F
type : abugida
languages : cree
countries : canada, usa
[ogham]
diap : 1680:169F
type : alphabet
languages : primitive-irish, pictish
countries : scotland, ireland, wales
[runic]
diap : 16A0:16FF
type : alphabet
languages : old-italic, runic
countries :
[tagalog]
diap : 1700:171F
type : abugida
languages : tagalog, runic
countries : north_philippines
[hanunoo]
diap : 1720:173F
type : abugida
languages : hanunoo
countries : philippines
[buhid]
diap : 1740:175F
type : abugida
languages : buhid
countries : philippines
[tagbanwa]
diap : 1760:177F
type : abugida
languages :
countries : philippines
[khmer]
diap : 1780:17FF
type : abugida
languages : khmer
countries : cambodia, vietnam, thailand, laos, china
[mongolian]
diap : 1800:18AF
type : alphabet
languages : mongolian
countries : china, mongolia, afghanistan
[unified-canadian-aboriginal-syllabics-extended]
diap : 18B0:18FF
type : abugida
languages : cree
countries : canada, usa
[limbu]
diap : 1900:194F
type : abugida
languages : limbu
countries : nepal, india, kashmir, pakistan
[tai-le]
diap : 1950:197F
type : abugida
languages :
countries : vietnam, laos, myanmar, thailand
[new-tai-lue]
diap : 1980:19DF
type : alphabet
languages :
countries : vietnam, laos, myanmar, thailand
[khmer-symbols]
diap : 19E0:19FF
type :
languages :
countries :
[buginese]
diap : 1A00:1A1F
type : abugida
languages : buginese, makassarese, mandar
countries : indonesia
[tai-tham]
diap : 1A20:1AAF
type : abugida
languages :
countries : thailand, burma, laos, cambodia, china, vietnam
[balinese]
diap : 1B00:1B87
type : abugida
languages : balinese, sasak
countries : bali, lombok, indonesia
[sundanese]
diap : 1B80:1BBF
type : abugida
languages : sundanese
countries : indonesia, jakarta
[batak]
diap : 1BC0:1BFF
type : abugida
languages : batak
countries : indonesia
[lepcha]
diap : 1C00:1C4F
type : abugida
languages : lepcha
countries : india, butane, nepal
[ol-chiki]
diap : 1C50:1C7F
type : alphabet
languages : santali
countries : india, butane, nepal, bangladesh
[sundanese-supplement]
diap : 1CC0:1CCF
type : abugida
languages : sundanese
countries :
[vedic-extensions]
diap : 1CD0:1CFF
type :
languages :
countries :
[phonetic-extentions]
diap : 1D00:1D7F
type :
languages :
countries :
[phonetic-extentions-supplement]
diap : 1D80:1DBF
type :
languages :
countries :
[combining-diacritical-marks-supplement]
diap : 1DC0:1DFF
type :
languages :
countries :
[latin-extended-additional]
diap : 1E00:1EFF
type :
languages :
countries : england, usa, germany, france, italy, poland
[greek-extended]
diap : 1F00:1FFF
type :
languages :
countries : greece
[general-punctuation]
diap : 2000:206F
type :
languages :
countries :
[superscripts-and-subscripts]
diap : 2070:209F
type :
languages :
countries :
[currency-symbols]
diap : 20A0:20CF
type :
languages :
countries :
[combining-diacritical-marks-for-symbols]
diap : 20D0:20FF
type :
languages :
countries :
[letterlike-symbols]
diap : 2100:214F
type :
languages :
countries :
[number-forms]
diap : 2150:218F
type :
languages :
countries :
[arrows]
diap : 2190:21FF
type :
languages :
countries :
[mathematical-operators]
diap : 2200:22FF
type :
languages :
countries :
[miscellaneous-technical]
diap : 2300:23FF
type :
languages :
countries :
[control-pictures]
diap : 2400:243F
type :
languages :
countries :
[optical-character-recognition]
diap : 2440:245F
type :
languages :
countries :
[enclosed-alphanumerics]
diap : 2460:24FF
type :
languages :
countries :
[box-drawing]
diap : 2500:257F
type :
languages :
countries :
[block-elements]
diap : 2580:259F
type :
languages :
countries :
[geometric-shapes]
diap : 25A0:25FF
type :
languages :
countries :
[miscellaneous-symbols]
diap : 2600:26FF
type :
languages :
countries :
[dingbats]
diap : 2700:27BF
type :
languages :
countries :
[miscellaneous-mathematical-symbols-a]
diap : 27C0:27EF
type :
languages :
countries :
[suplemental-arrows-a]
diap : 27F0:27FF
type :
languages :
countries :
[braille-patterns]
diap : 2800:28FF
type :
languages :
countries :
[suplemental-arrows-b]
diap : 2900:297F
type :
languages :
countries :
[miscellaneous-mathematical-symbols-b]
diap : 2980:29FF
type :
languages :
countries :
[supplemental-mathematical-operators]
diap : 2A00:2AFF
type :
languages :
countries :
[miscellaneous-symbols-and-arrows]
diap : 2B00:2BFF
type :
languages :
countries :
[glagolitic]
diap : 2C00:2C5F
type : alphabet
languages : old-church-slavic
countries : russia, ukraine, poland, bulgaria, bosnia, herzegovina, croatia, montenegro, macedonia
[latin-extended-c]
diap : 2С60:2С7F
type : alphabet
languages :
countries : china, kazakhstan
[coptic]
diap : 2С80:2СFF
type : alphabet
languages : coptic
countries : egypt, canada, australia, usa
[georgian-supplement]
diap : 2D00:2D2F
type : alphabet
languages : georgian
countries : georgia, abkhazia
[tifinagh]
diap : 2D30:2D7F
type : abjad
languages : tuareg
countries : north_africa
[ethiopic-extended]
diap : 2D80:2DDF
type : abugida
languages : amharic, tigtinya
countries : ethiopia, eritrea, somalia, sudan, israel
[cyrillic-extended]
diap : 2DE0:2DFF
type : alphabet
languages : russian, ukrainian
countries : russia, ukraine, bulgaria, serbia, macedonia, moldova
[supplemental-punctuation]
diap : 2E00:2E7F
type :
languages :
countries :
[cjk-radicals-supplement]
diap : 2E80:2EFF
type :
languages : chinese, japanese, vietnamese, korean
countries : peru, laos, north_korea, china, cambodia, japan, guam, taiwan, south_korea, vietnam, somalia, sudan, israel, singapore, philippines, malaysia, indonesia, thailand
[kangxi-radicals]
diap : 2F00:2FDF
type :
languages :
countries : china
[ideographic-description-characters]
diap : 2FF0:2FFF
type :
languages :
countries :
[cjk-symbols-and-punctuation]
diap : 3000:303F
type :
languages :
countries :
[hiragana]
diap : 3040:309F
type : syllabary
languages : japanese, okinawan
countries : peru, north_korea, japan, guam, taiwan, south_korea
[katakana]
diap : 30A0:30FF
type : syllabary
languages : japanese, okinawan, ainu, palaun
countries : peru, north_korea, japan, guam, taiwan, south_korea
[bopomofo]
diap : 3100:312F
type : syllabary
languages : chinese
countries : taiwan
[hangul-compatibility-jamo]
diap : 3130:318F
type : syllabary
languages : chinese
countries : north_korea, south_korea, china, japan
[kanbun]
diap : 3190:319F
type : syllabary
languages :
countries : china
[bopomofo-extended]
diap : 31A0:31BF
type : syllabary
languages :
countries : taiwan
[cjk-strokes]
diap : 31C0:31EF
type : syllabary
languages : chinese
countries : peru, laos, north_korea, china, cambodia, japan, guam, taiwan, south_korea, vietnam, somalia, sudan, israel, singapore, philippines, malaysia, indonesia, thailand
[katakana-phonetic-extentions]
diap : 31F0:31FF
type : syllabary
languages : japanese, okinawan, ainu, palauan
countries : peru, north_korea, japan, guam, taiwan, south_korea
[enclosed-cjk-letters-and-months]
diap : 3200:32FF
type : syllabary
languages : chinese
countries : peru, laos, north_korea, china, cambodia, japan, guam, taiwan, south_korea, vietnam, somalia, sudan, israel, singapore, philippines, malaysia, indonesia, thailand
[cjk-compatibility]
diap : 3300:33FF
type : syllabary
languages : chinese, japanese, korean, vietnamese
countries : peru, laos, north_korea, china, cambodia, japan, guam, taiwan, south_korea, vietnam, somalia, sudan, israel, singapore, philippines, malaysia, indonesia, thailand
[cjk-unified-ideographs-extension-a]
diap : 3400:4DBF
type : syllabary
languages : chinese, japanese, korean, vietnamese
countries : peru, laos, north_korea, china, cambodia, japan, guam, taiwan, south_korea, vietnam, somalia, sudan, israel, singapore, philippines, malaysia, indonesia, thailand
[yijing-hexagram-symbols]
diap : 4DC0:4DFF
type : syllabary
languages :
countries : china
[cjk-unified-ideographs]
diap : 4E00:9FFF
type : syllabary
languages : chinese, japanese, korean, vietnamese
countries : peru, laos, north_korea, china, cambodia, japan, guam, taiwan, south_korea, vietnam, somalia, sudan, israel, singapore, philippines, malaysia, indonesia, thailand
[yi-syllables]
diap : A000:A48F
type : syllabary
languages : yi
countries : china
[yi-radicals]
diap : A490:A4CF
type : syllabary
languages : yi
countries : china
[old-lisu-alphabet]
diap : A500:A63F
type : syllabary
languages : vai
countries : liberia, sierra_leone
[ciryllic-extended-B]
diap : A640:A69F
type : alphabet
languages :
countries : russia, ukraine, bulgaria, serbia, macedonia, moldova
[bamum]
diap : A6A0:A6FF
type : semisyllabary
languages : bamum
countries : cameroon, nigeria
[modifier-tone-letters]
diap : A700:A71F
type :
languages :
countries :
[latin-extended-d]
diap : A720:A7FF
type : alphabet
languages :
countries : finland, estland, hungary, salvador
[syloty-nagri]
diap : A800:A82F
type : abugida
languages : sylheti, bengali
countries : bangladesh, india
[indian-numeric-character]
diap : A830:A83F
type :
languages :
countries :
[phags-pa]
diap : A840:A87F
type : abugida
languages : mongolian, sanskrit, tibetan, chinese, uyghur
countries : mongolia, india, china, tibet, kazakhstan
[saurashtra]
diap : A880:A8DF
type : alphabet
languages : saurashtra
countries : mongolia
[devanagari-extended-characters]
diap : A8E0:A8FF
type : abugida
languages : marathi, indian, sanskrit, hindi
countries : india, pakistan, fiji_islands, mauritius
[kayah-li]
diap : A900:A92F
type : abugida
languages : kayah
countries : burma, thailand
[rejang]
diap : A930:A95F
type : abugida
languages : rejang
countries : sumatra, indonesia
[hangul]
diap : A960:A97F
type : alphabet
languages : korean
countries : north_korea, south_korea, china, japan, indonesia
[javanese-alphabet]
diap : A980:A9DF
type : abugida
languages : javanese, sundanese
countries : indonesia
[cham-alphabet]
diap : AA00:AA5F
type : abugida
languages : cham
countries : cambodia, vietnam, thailand, china
[burmese-alphabet-extention-a]
diap : AA60:AA7F
type : abugida
languages :
countries : myanmar, thailand, bangladesh, malaysia
[tai-viet-alphabet]
diap : AA80:AADF
type : abugida
languages :
countries : vietnam, thailand, laos, china
[meitei-language-extentions]
diap : AAE0:AAFF
type : abugida
languages :
countries : vietnam, thailand, laos, china
[ethiopic-extended-a]
diap : AB00:AB2F
type : abugida
languages : ethiopian-semitic
countries : ethiopia, sudan, eritrea, somalia, israel
[meitei-language]
diap : ABC0:ABFF
type :
languages :
countries : india, bangladesh, burma
[hangul-syllables]
diap : AC00:D7AF
type : alphabet
languages : korean, cia-cia
countries : north_korea, south_korea, china, japan, indonesia
[high-surrogates]
diap : D800:DB7F
type :
languages :
countries :
[high-private-use-surrogates]
diap : DB80:DBFF
type :
languages :
countries :
[low-surrogates]
diap : DC00:DFFF
type :
languages :
countries :
[private-use-area]
diap : E000:F8FF
type :
languages :
countries :
[cjk-compatibility-ideographs]
diap : F900:FAFF
type :
languages :
countries :
[alphabetic-presentation-forms]
diap : FB00:FB4F
type :
languages :
countries :
[arabic-presentation-forms-a]
diap : FB50:FDFF
type :
languages :
countries :
[variation-selectors]
diap : FE00:FE0F
type :
languages :
countries :
[vertical-forms]
diap : FE10:FE1F
type :
languages :
countries :
[combining-half-marks]
diap : FE20:FE2F
type :
languages :
countries :
[cjk-compatibility-forms]
diap : FE30:FE4F
type :
languages :
countries :
[small-form-variants]
diap : FE50:FE6F
type :
languages :
countries :
[arabic-presentation-forms-b]
diap : FE70:FEFF
type :
languages :
countries :
[halfwidth-and-fullwidth-forms]
diap : FF00:FFEF
type :
languages :
countries :
[specials]
diap : FFF0:FFFF
type :
languages :
countries :

42
data/sets.txt Normal file
View File

@ -0,0 +1,42 @@
# Character sets
# Наборы символов
# http://unicode-table.com/sets/
[popular]
set: ❤,♎,☀,★,☂,♞,☯,☭,☢,€,☎,⚑,❄,♫,✂
[new-year-and-christmas-symbols]
set: ☃,❄,❅,❆,★,☆,✪,✫,✯,⚝,⚪,⚫,⚹,✵,❉,❋,✺,✹,✸,✶,✷,✵,✴,✳,✲,✱,✧,✦,⍟,⊛
[chess-symbols]
set: ♔,♕,♖,♗,♘,♙,♚,♛,♜,♝,♞,♟
[hearts-symbols]
set: ❦, ❧, ☙, ❥, ❣, ♡, ♥, ❤, ➳, ღ
[signs-of-the-zodiac]
set: ♈, ♉, ♊, ♋, ♌, ♍, ♎, ♏, ♐, ♑, ♒, ♓
[suits-of-the-cards]
set: ♠, ♤, ♥, ♡, ♣, ♧, ♦, ♢
[currency-symbols]
set: ₳, ฿, ¢, ₡, ¢, ₢, ₵, ₫, €, £, £, ₤, ₣, ƒ, ₲, ₭, ₥, ₦, ₱, , $, ₮, ₩, ₩, ¥, ¥, ₴, ¤, ₰, ៛, ₪,₯, ₠, ₧, ₧, ௹, ﷼, ㍐, ৲, ৳, ₹
[office-accessory-symbols]
set: ✆, ☎, ☏, ✉, ☕, ✁, ✂, ✃, ✄, ✍, ✎, ✏, ✐, ✑, ✒, ⌦, ⌫, ⌧, ㍶, ℻
[arrows-symbols]
set: ←, ↑, →, ↓, ↔, ↕, ↖, ↗, ↘, ↙, ▲, ▼, ◀, ▶, ➔, ➘, ➙, ➚, ➛, ➜, ➝, ➞, ➟, ➠, ➡, ➢, ➣, ➤, ➥, ➦, ↪, ↩, ↚, ↛, ↜, ↝, ↞, ↟, ↠, ↡, ↢, ↣, ↤, ↦, ↥, ↧, ↨, ↫, ↬, ↭, ↮,↯, ↰,↱, ↲, ↴, ↳, ↵, ↶, ↷, ↸, ↹, ↺, ↻, ⟲, ⟳, ↼, ↽, ↾, ↿, ⇀, ⇁, ⇂, ⇃, ⇄, ⇅, ⇆, ⇇, ⇈, ⇉, ⇊, ⇋, ⇌, ⇍, ⇏, ⇏, ⇏, ⇏, ⇏, ⇏, ⇏, ⇕, ⇖, ⇗, ⇘, ⇙, ⇙, ⇳, ⇚, ⇛, ⇜, ⇝, ⇞, ⇟, ⇟, ⇟, ⇠, ⇡, ⇢, ⇣, ⇤, ⇥, ⇦, ⇨, ⇩, ⇪, ⇧, ⇫, ⇬, ⇭, ⇮, ⇯, ⇰, ⇱, ⇲, ⇴, ⇵, ⇶, ⇷, ⇸, ⇹, ⇺, ⇺, ⇻, ⇼,⇽ ⇾, ⇿, ⟰, ⟱, ⟴, ⟵, ⟶, ⟷, ⟸, ⟹, ⟽, ⟾, ⟺, ⟻, ⟼, ⟿, ⤀, ⤁, ⤅, ⤂, ⤃, ⤄, ⤆, ⤇, ⤈, ⤉, ⤊, ⤋, ⤌, ⤍, ⤎, ⤏, ⤐, ⤑, ⤒, ⤓, ⤔, ⤕, ⤖, ⤗, ⤘, ⤙, ⤙, ⤚, ⤛, ⤜, ⤝, ⤞, ⤡, ⤢, ⤣, ⤤, ⤥, ⤦, ⤧, ⤨, ⤩, ⤪, ⤭, ⤮, ⤯, ⤰, ⤱, ⤲, ⤳, ⤻, ⤸, ⤾, ⤿, ⤺, ⤼, ⤽, ⤴, ⤵, ⤶, ⤷, ⤹, ⥀, ⥁, ⥂, ⥃, ⥄, ⥅, ⥆, ⥇, ⥈, ⥉, ⥒, ⥓, ⥔, ⥕, ⥖, ⥗, ⥘, ⥙, ⥚, ⥛, ⥜, ⥝, ⥞, ⥟, ⥠, ⥡, ⥢, ⥣, ⥤, ⥥, ⥦, ⥧, ⥨, ⥩, ⥪, ⥫, ⥬, ⥭, ⥮, ⥯, ⥰, ⥱, ⥲, ⥳, ⥴, ⥵, ⥶, ⥷, ⥸, ⥹, ⥺, ⥻, ➧, ➨, ➩, ➪, ➫, ➬, ➭, ➮, ➯, ➱, ➲, ➳, ➴, ➵, ➶, ➷, ➸, ➹, ➺, ➻, ➼, ➽, ➾, ⬅, ⬆, ⬇, ⏎, ⬎, ⬏, ⬐, ⬑, ☈, ☇, ⍃, ⍄, ⍇, ⍈, ⍐, ⍗, ⍌, ⍓, ⍍, ⍔, ⍏, ⍖, ⍅, ⍆, ⬈, ⬉, ⬊, ⬋, ⬌, ⬍, ⬀, ⬁, ⬂, ⬃, ⬄
[special-symbols]
set: «, », ❝, ❞, ×, ⦂, , , , , -, , ⎯, —, ―, ~, , •, °, %, ‰, ‱, &, ⅋, §, ÷, ±, ‼, ¡, ¿, ⸮, ⁇, ⁉, ⁈, ‽, ⸘, ¼, ½, ¾, ², ³, ⅕, ⅙, ⅛, ©, ®, ™, ℠, ℻, ℅, ℁, ⅍, ¶, ⁋, ≠,√, , ∛, ∜, ∞, β, Φ, Σ, €, ₤, , ♀, ♂, ⚢, ⚣, ⌘, ♲, ♻, ☺, ★, ↑, ↓
[sea-symbols]
set: ♆, ≈, ~, ☼, ≋, ⚓, ⎈, ⚑, ⚐, ⚙
[music-symbols]
set: ☊, ♪, ♫, ♯, ▶, ◼, ♬, ♭, ♮,
[flowers-symbols]
set: ⚜,✥, ✤, ✻, ✼, ✽, ✾, ❀, ✿, ❁, ❃, ❇, ❈, ❉, ❊, ❋, ⚘

10
data/specs.txt Normal file
View File

@ -0,0 +1,10 @@
# \n -> 10
0: 0
a: 7
b: 8
t: 9
n: 10
v: 11
f: 12
r: 13

10
data/types.txt Normal file
View File

@ -0,0 +1,10 @@
[abjad]
[abugida]
[alphabet]
[syllabary]
[semisyllabary]