source: Base/UTF8.php

Last change on this file was 1, checked in by george, 15 years ago
  • Přidáno: Výchozí verze systému.
File size: 11.4 KB
Line 
1<?php
2/*
3 ISO8859-2 <-> UTF-8 conversion functions designed for use in PHP-GTK apps
4 Adam Rambousek - rambousek@volny.cz
5
6 version history:
7 1.03 --- 12/02/2002
8 * added Win1257 support
9 1.02 --- 30/11/2001
10 * added ISO8859-1 support
11 1.01
12 * added Win1250 support
13 1.00
14
15 string to_utf8(string string [, string charset])
16 string from_utf8(string string [, string charset])
17
18 supported charsets: name of charset you must use in script
19 ISO8859-2: iso2 (this is the default charset, you don't have to specify it)
20 Windows1250: win1250
21 ISO8859-1: iso1
22 Windows1257: win1257
23
24 example: $new_string=to_utf8($some_string,"win1250");
25*/
26
27
28/*
29 translation table - actually, it's array where key is hexadecimal number of
30 character in ISO8859-2/Windows1250 and value is its two byte representation in UTF-8
31*/
32
33$CharTable = array(
34 'iso2' => array(
35 0x80=>"\xc2\x80",
36 0x81=>"\xc2\x81",
37 0x82=>"\xc2\x82",
38 0x83=>"\xc2\x83",
39 0x84=>"\xc2\x84",
40 0x85=>"\xc2\x85",
41 0x86=>"\xc2\x86",
42 0x87=>"\xc2\x87",
43 0x88=>"\xc2\x88",
44 0x89=>"\xc2\x89",
45 0x8A=>"\xc2\x8a",
46 0x8B=>"\xc2\x8b",
47 0x8C=>"\xc2\x8c",
48 0x8D=>"\xc2\x8d",
49 0x8E=>"\xc2\x8e",
50 0x8F=>"\xc2\x8f",
51 0x90=>"\xc2\x90",
52 0x91=>"\xc2\x91",
53 0x92=>"\xc2\x92",
54 0x93=>"\xc2\x93",
55 0x94=>"\xc2\x94",
56 0x95=>"\xc2\x95",
57 0x96=>"\xc2\x96",
58 0x97=>"\xc2\x97",
59 0x98=>"\xc2\x98",
60 0x99=>"\xc2\x99",
61 0x9A=>"\xc2\x9a",
62 0x9B=>"\xc2\x9b",
63 0x9C=>"\xc2\x9c",
64 0x9D=>"\xc2\x9d",
65 0x9E=>"\xc2\x9e",
66 0x9F=>"\xc2\x9f",
67 0xA0=>"\xc2\xa0",
68 0xA1=>"\xc4\x84",
69 0xA2=>"\xcb\x98",
70 0xA3=>"\xc5\x81",
71 0xA4=>"\xc2\xa4",
72 0xA5=>"\xc4\xbd",
73 0xA6=>"\xc5\x9a",
74 0xA7=>"\xc2\xa7",
75 0xA8=>"\xc2\xa8",
76 0xA9=>"\xc5\xa0",
77 0xAA=>"\xc5\x9e",
78 0xAB=>"\xc5\xa4",
79 0xAC=>"\xc5\xb9",
80 0xAD=>"\xc2\xad",
81 0xAE=>"\xc5\xbd",
82 0xAF=>"\xc5\xbb",
83 0xB0=>"\xc2\xb0",
84 0xB1=>"\xc4\x85",
85 0xB2=>"\xcb\x9b",
86 0xB3=>"\xc5\x82",
87 0xB4=>"\xc2\xb4",
88 0xB5=>"\xc4\xbe",
89 0xB6=>"\xc5\x9b",
90 0xB7=>"\xcb\x87",
91 0xB8=>"\xc2\xb8",
92 0xB9=>"\xc5\xa1",
93 0xBA=>"\xc5\x9f",
94 0xBB=>"\xc5\xa5",
95 0xBC=>"\xc5\xba",
96 0xBD=>"\xcb\x9d",
97 0xBE=>"\xc5\xbe",
98 0xBF=>"\xc5\xbc",
99 0xC0=>"\xc5\x94",
100 0xC1=>"\xc3\x81",
101 0xC2=>"\xc3\x82",
102 0xC3=>"\xc4\x82",
103 0xC4=>"\xc3\x84",
104 0xC5=>"\xc4\xb9",
105 0xC6=>"\xc4\x86",
106 0xC7=>"\xc3\x87",
107 0xC8=>"\xc4\x8c",
108 0xC9=>"\xc3\x89",
109 0xCA=>"\xc4\x98",
110 0xCB=>"\xc3\x8b",
111 0xCC=>"\xc4\x9a",
112 0xCD=>"\xc3\x8d",
113 0xCE=>"\xc3\x8e",
114 0xCF=>"\xc4\x8e",
115 0xD0=>"\xc4\x90",
116 0xD1=>"\xc5\x83",
117 0xD2=>"\xc5\x87",
118 0xD3=>"\xc3\x93",
119 0xD4=>"\xc3\x94",
120 0xD5=>"\xc5\x90",
121 0xD6=>"\xc3\x96",
122 0xD7=>"\xc3\x97",
123 0xD8=>"\xc5\x98",
124 0xD9=>"\xc5\xae",
125 0xDA=>"\xc3\x9a",
126 0xDB=>"\xc5\xb0",
127 0xDC=>"\xc3\x9c",
128 0xDD=>"\xc3\x9d",
129 0xDE=>"\xc5\xa2",
130 0xDF=>"\xc3\x9f",
131 0xE0=>"\xc5\x95",
132 0xE1=>"\xc3\xa1",
133 0xE2=>"\xc3\xa2",
134 0xE3=>"\xc4\x83",
135 0xE4=>"\xc3\xa4",
136 0xE5=>"\xc4\xba",
137 0xE6=>"\xc4\x87",
138 0xE7=>"\xc3\xa7",
139 0xE8=>"\xc4\x8d",
140 0xE9=>"\xc3\xa9",
141 0xEA=>"\xc4\x99",
142 0xEB=>"\xc3\xab",
143 0xEC=>"\xc4\x9b",
144 0xED=>"\xc3\xad",
145 0xEE=>"\xc3\xae",
146 0xEF=>"\xc4\x8f",
147 0xF0=>"\xc4\x91",
148 0xF1=>"\xc5\x84",
149 0xF2=>"\xc5\x88",
150 0xF3=>"\xc3\xb3",
151 0xF4=>"\xc3\xb4",
152 0xF5=>"\xc5\x91",
153 0xF6=>"\xc3\xb6",
154 0xF7=>"\xc3\xb7",
155 0xF8=>"\xc5\x99",
156 0xF9=>"\xc5\xaf",
157 0xFA=>"\xc3\xba",
158 0xFB=>"\xc5\xb1",
159 0xFC=>"\xc3\xbc",
160 0xFD=>"\xc3\xbd",
161 0xFE=>"\xc5\xa3",
162 0xFF=>"\xcb\x99"
163 ),
164 'win1250' => array(
165 0x80=>"\xc2\x80",
166 0x81=>"\xc2\x81",
167 0x82=>"\xe2\x80\x9a",
168 0x83=>"\xc2\x83",
169 0x84=>"\xe2\x80\x9e",
170 0x85=>"\xe2\x80\xa6",
171 0x86=>"\xe2\x80\xa0",
172 0x87=>"\xe2\x80\xa1",
173 0x88=>"\xc2\x88",
174 0x89=>"\xe2\x80\xb0",
175 0x8a=>"\xc5\xa0",
176 0x8b=>"\xe2\x80\xb9",
177 0x8c=>"\xc5\x9a",
178 0x8d=>"\xc5\xa4",
179 0x8e=>"\xc5\xbd",
180 0x8f=>"\xc5\xb9",
181 0x90=>"\xc2\x90",
182 0x91=>"\xe2\x80\x98",
183 0x92=>"\xe2\x80\x99",
184 0x93=>"\xe2\x80\x9c",
185 0x94=>"\xe2\x80\x9d",
186 0x95=>"\xe2\x80\xa2",
187 0x96=>"\xe2\x80\x93",
188 0x97=>"\xe2\x80\x94",
189 0x98=>"\xe2\x80\x98",
190 0x99=>"\xe2\x84\xa2",
191 0x9a=>"\xc5\xa1",
192 0x9b=>"\xe2\x80\xba",
193 0x9c=>"\xc5\x9b",
194 0x9d=>"\xc5\xa5",
195 0x9e=>"\xc5\xbe",
196 0x9f=>"\xc5\xba",
197 0xa0=>"\xc2\xa0",
198 0xa1=>"\xcb\x87",
199 0xa2=>"\xcb\x98",
200 0xa3=>"\xc5\x81",
201 0xa4=>"\xc2\xa4",
202 0xa5=>"\xc4\x84",
203 0xa6=>"\xc2\xa6",
204 0xa7=>"\xc2\xa7",
205 0xa8=>"\xc2\xa8",
206 0xa9=>"\xc2\xa9",
207 0xaa=>"\xc5\x9e",
208 0xab=>"\xc2\xab",
209 0xac=>"\xc2\xac",
210 0xad=>"\xc2\xad",
211 0xae=>"\xc2\xae",
212 0xaf=>"\xc5\xbb",
213 0xb0=>"\xc2\xb0",
214 0xb1=>"\xc2\xb1",
215 0xb2=>"\xcb\x9b",
216 0xb3=>"\xc5\x82",
217 0xb4=>"\xc2\xb4",
218 0xb5=>"\xc2\xb5",
219 0xb6=>"\xc2\xb6",
220 0xb7=>"\xc2\xb7",
221 0xb8=>"\xc2\xb8",
222 0xb9=>"\xc4\x85",
223 0xba=>"\xc5\x9f",
224 0xbb=>"\xc2\xbb",
225 0xbc=>"\xc4\xbd",
226 0xbd=>"\xcb\x9d",
227 0xbe=>"\xc4\xbe",
228 0xbf=>"\xc5\xbc",
229 0xc0=>"\xc5\x94",
230 0xc1=>"\xc3\x81",
231 0xc2=>"\xc3\x82",
232 0xc3=>"\xc4\x82",
233 0xc4=>"\xc3\x84",
234 0xc5=>"\xc4\xb9",
235 0xc6=>"\xc4\x86",
236 0xc7=>"\xc3\x87",
237 0xc8=>"\xc4\x8c",
238 0xc9=>"\xc3\x89",
239 0xca=>"\xc4\x98",
240 0xcb=>"\xc3\x8b",
241 0xcc=>"\xc4\x9a",
242 0xcd=>"\xc3\x8d",
243 0xce=>"\xc3\x8e",
244 0xcf=>"\xc4\x8e",
245 0xd0=>"\xc4\x90",
246 0xd1=>"\xc5\x83",
247 0xd2=>"\xc5\x87",
248 0xd3=>"\xc3\x93",
249 0xd4=>"\xc3\x94",
250 0xd5=>"\xc5\x90",
251 0xd6=>"\xc3\x96",
252 0xd7=>"\xc3\x97",
253 0xd8=>"\xc5\x98",
254 0xd9=>"\xc5\xae",
255 0xda=>"\xc3\x9a",
256 0xdb=>"\xc5\xb0",
257 0xdc=>"\xc3\x9c",
258 0xdd=>"\xc3\x9d",
259 0xde=>"\xc5\xa2",
260 0xdf=>"\xc3\x9f",
261 0xe0=>"\xc5\x95",
262 0xe1=>"\xc3\xa1",
263 0xe2=>"\xc3\xa2",
264 0xe3=>"\xc4\x83",
265 0xe4=>"\xc3\xa4",
266 0xe5=>"\xc4\xba",
267 0xe6=>"\xc4\x87",
268 0xe7=>"\xc3\xa7",
269 0xe8=>"\xc4\x8d",
270 0xe9=>"\xc3\xa9",
271 0xea=>"\xc4\x99",
272 0xeb=>"\xc3\xab",
273 0xec=>"\xc4\x9b",
274 0xed=>"\xc3\xad",
275 0xee=>"\xc3\xae",
276 0xef=>"\xc4\x8f",
277 0xf0=>"\xc4\x91",
278 0xf1=>"\xc5\x84",
279 0xf2=>"\xc5\x88",
280 0xf3=>"\xc3\xb3",
281 0xf4=>"\xc3\xb4",
282 0xf5=>"\xc5\x91",
283 0xf6=>"\xc3\xb6",
284 0xf7=>"\xc3\xb7",
285 0xf8=>"\xc5\x99",
286 0xf9=>"\xc5\xaf",
287 0xfa=>"\xc3\xba",
288 0xfb=>"\xc5\xb1",
289 0xfc=>"\xc3\xbc",
290 0xfd=>"\xc3\xbd",
291 0xfe=>"\xc5\xa3",
292 0xff=>"\xcb\x99"
293 ),
294 'iso1' => array(
295 0xA0=>"\xc2\xa0",
296 0xA1=>"\xc2\xa1",
297 0xA2=>"\xc2\xa2",
298 0xA3=>"\xc2\xa3",
299 0xA4=>"\xc2\xa4",
300 0xA5=>"\xc2\xa5",
301 0xA6=>"\xc2\xa6",
302 0xA7=>"\xc2\xa7",
303 0xA8=>"\xc2\xa8",
304 0xA9=>"\xc2\xa9",
305 0xAA=>"\xc2\xaa",
306 0xAB=>"\xc2\xab",
307 0xAC=>"\xc2\xac",
308 0xAD=>"\xc2\xad",
309 0xAE=>"\xc2\xae",
310 0xAF=>"\xc2\xaf",
311 0xB0=>"\xc2\xb0",
312 0xB1=>"\xc2\xb1",
313 0xB2=>"\xc2\xb2",
314 0xB3=>"\xc2\xb3",
315 0xB4=>"\xc2\xb4",
316 0xB5=>"\xc2\xb5",
317 0xB6=>"\xc2\xb6",
318 0xB7=>"\xc2\xb7",
319 0xB8=>"\xc2\xb8",
320 0xB9=>"\xc2\xb9",
321 0xBA=>"\xc2\xba",
322 0xBB=>"\xc2\xbb",
323 0xBC=>"\xc2\xbc",
324 0xBD=>"\xc2\xbd",
325 0xBE=>"\xc2\xbe",
326 0xBF=>"\xc2\xbf",
327 0xC0=>"\xc3\x80",
328 0xC1=>"\xc3\x81",
329 0xC2=>"\xc3\x82",
330 0xC3=>"\xc3\x83",
331 0xC4=>"\xc3\x84",
332 0xC5=>"\xc3\x85",
333 0xC6=>"\xc3\x86",
334 0xC7=>"\xc3\x87",
335 0xC8=>"\xc3\x88",
336 0xC9=>"\xc3\x89",
337 0xCA=>"\xc3\x8a",
338 0xCB=>"\xc3\x8b",
339 0xCC=>"\xc3\x8c",
340 0xCD=>"\xc3\x8d",
341 0xCE=>"\xc3\x8e",
342 0xCF=>"\xc3\x8f",
343 0xD0=>"\xc3\x90",
344 0xD1=>"\xc3\x91",
345 0xD2=>"\xc3\x92",
346 0xD3=>"\xc3\x93",
347 0xD4=>"\xc3\x94",
348 0xD5=>"\xc3\x95",
349 0xD6=>"\xc3\x96",
350 0xD7=>"\xc3\x97",
351 0xD8=>"\xc3\x98",
352 0xD9=>"\xc3\x99",
353 0xDA=>"\xc3\x9a",
354 0xDB=>"\xc3\x9b",
355 0xDC=>"\xc3\x9c",
356 0xDD=>"\xc3\x9d",
357 0xDE=>"\xc3\x9e",
358 0xDF=>"\xc3\x9f",
359 0xE0=>"\xc3\xa0",
360 0xE1=>"\xc3\xa1",
361 0xE2=>"\xc3\xa2",
362 0xE3=>"\xc3\xa3",
363 0xE4=>"\xc3\xa4",
364 0xE5=>"\xc3\xa5",
365 0xE6=>"\xc3\xa6",
366 0xE7=>"\xc3\xa7",
367 0xE8=>"\xc3\xa8",
368 0xE9=>"\xc3\xa9",
369 0xEA=>"\xc3\xaa",
370 0xEB=>"\xc3\xab",
371 0xEC=>"\xc3\xac",
372 0xED=>"\xc3\xad",
373 0xEE=>"\xc3\xae",
374 0xEF=>"\xc3\xaf",
375 0xF0=>"\xc3\xb0",
376 0xF1=>"\xc3\xb1",
377 0xF2=>"\xc3\xb2",
378 0xF3=>"\xc3\xb3",
379 0xF4=>"\xc3\xb4",
380 0xF5=>"\xc3\xb5",
381 0xF6=>"\xc3\xb6",
382 0xF7=>"\xc3\xb7",
383 0xF8=>"\xc3\xb8",
384 0xF9=>"\xc3\xb9",
385 0xFA=>"\xc3\xba",
386 0xFB=>"\xc3\xbb",
387 0xFC=>"\xc3\xbc",
388 0xFD=>"\xc3\xbd",
389 0xFE=>"\xc3\xbe"
390 ),
391 'win1257' => array(
392 0x80=>"\xe2\x82\xac",
393 0x81=>"\xc0\x4",
394 0x82=>"\xe2\x80\x9a",
395 0x83=>"\xc0\x4",
396 0x84=>"\xe2\x80\x9e",
397 0x85=>"\xe2\x80\xa6",
398 0x86=>"\xe2\x80\xa0",
399 0x87=>"\xe2\x80\xa1",
400 0x88=>"\xc0\x4",
401 0x89=>"\xe2\x80\xb0",
402 0x8A=>"\xc0\x4",
403 0x8B=>"\xe2\x80\xb9",
404 0x8C=>"\xc0\x4",
405 0x8D=>"\xc2\xa8",
406 0x8E=>"\xcb\x87",
407 0x8F=>"\xc2\xb8",
408 0x90=>"\xc0\x4",
409 0x91=>"\xe2\x80\x98",
410 0x92=>"\xe2\x80\x99",
411 0x93=>"\xe2\x80\x9c",
412 0x94=>"\xe2\x80\x9d",
413 0x95=>"\xe2\x80\xa2",
414 0x96=>"\xe2\x80\x93",
415 0x97=>"\xe2\x80\x94",
416 0x98=>"\xc0\x4",
417 0x99=>"\xe2\x84\xa2",
418 0x9A=>"\xc0\x4",
419 0x9B=>"\xe2\x80\xba",
420 0x9C=>"\xc0\x4",
421 0x9D=>"\xc2\xaf",
422 0x9E=>"\xcb\x9b",
423 0x9F=>"\xc0\x4",
424 0xA0=>"\xc2\xa0",
425 0xA1=>"\xc0\x4",
426 0xA2=>"\xc2\xa2",
427 0xA3=>"\xc2\xa3",
428 0xA4=>"\xc2\xa4",
429 0xA5=>"\xc0\x4",
430 0xA6=>"\xc2\xa6",
431 0xA7=>"\xc2\xa7",
432 0xA8=>"\xc3\x98",
433 0xA9=>"\xc2\xa9",
434 0xAA=>"\xc5\x96",
435 0xAB=>"\xc2\xab",
436 0xAC=>"\xc2\xac",
437 0xAD=>"\xc2\xad",
438 0xAE=>"\xc2\xae",
439 0xAF=>"\xc3\x86",
440 0xB0=>"\xc2\xb0",
441 0xB1=>"\xc2\xb1",
442 0xB2=>"\xc2\xb2",
443 0xB3=>"\xc2\xb3",
444 0xB4=>"\xc2\xb4",
445 0xB5=>"\xc2\xb5",
446 0xB6=>"\xc2\xb6",
447 0xB7=>"\xc2\xb7",
448 0xB8=>"\xc3\xb8",
449 0xB9=>"\xc2\xb9",
450 0xBA=>"\xc5\x97",
451 0xBB=>"\xc2\xbb",
452 0xBC=>"\xc2\xbc",
453 0xBD=>"\xc2\xbd",
454 0xBE=>"\xc2\xbe",
455 0xBF=>"\xc3\xa6",
456 0xC0=>"\xc4\x84",
457 0xC1=>"\xc4\xae",
458 0xC2=>"\xc4\x80",
459 0xC3=>"\xc4\x86",
460 0xC4=>"\xc3\x84",
461 0xC5=>"\xc3\x85",
462 0xC6=>"\xc4\x98",
463 0xC7=>"\xc4\x92",
464 0xC8=>"\xc4\x8c",
465 0xC9=>"\xc3\x89",
466 0xCA=>"\xc5\xb9",
467 0xCB=>"\xc4\x96",
468 0xCC=>"\xc4\xa2",
469 0xCD=>"\xc4\xb6",
470 0xCE=>"\xc4\xaa",
471 0xCF=>"\xc4\xbb",
472 0xD0=>"\xc5\xa0",
473 0xD1=>"\xc5\x83",
474 0xD2=>"\xc5\x85",
475 0xD3=>"\xc3\x93",
476 0xD4=>"\xc5\x8c",
477 0xD5=>"\xc3\x95",
478 0xD6=>"\xc3\x96",
479 0xD7=>"\xc3\x97",
480 0xD8=>"\xc5\xb2",
481 0xD9=>"\xc5\x81",
482 0xDA=>"\xc5\x9a",
483 0xDB=>"\xc5\xaa",
484 0xDC=>"\xc3\x9c",
485 0xDD=>"\xc5\xbb",
486 0xDE=>"\xc5\xbd",
487 0xDF=>"\xc3\x9f",
488 0xE0=>"\xc4\x85",
489 0xE1=>"\xc4\xaf",
490 0xE2=>"\xc4\x81",
491 0xE3=>"\xc4\x87",
492 0xE4=>"\xc3\xa4",
493 0xE5=>"\xc3\xa5",
494 0xE6=>"\xc4\x99",
495 0xE7=>"\xc4\x93",
496 0xE8=>"\xc4\x8d",
497 0xE9=>"\xc3\xa9",
498 0xEA=>"\xc5\xba",
499 0xEB=>"\xc4\x97",
500 0xEC=>"\xc4\xa3",
501 0xED=>"\xc4\xb7",
502 0xEE=>"\xc4\xab",
503 0xEF=>"\xc4\xbc",
504 0xF0=>"\xc5\xa1",
505 0xF1=>"\xc5\x84",
506 0xF2=>"\xc5\x86",
507 0xF3=>"\xc3\xb3",
508 0xF4=>"\xc5\x8d",
509 0xF5=>"\xc3\xb5",
510 0xF6=>"\xc3\xb6",
511 0xF7=>"\xc3\xb7",
512 0xF8=>"\xc5\xb3",
513 0xF9=>"\xc5\x82",
514 0xFA=>"\xc5\x9b",
515 0xFB=>"\xc5\xab",
516 0xFC=>"\xc3\xbc",
517 0xFD=>"\xc5\xbc",
518 0xFE=>"\xc5\xbe",
519 0xFF=>"\xcb\x99"
520 ),
521);
522
523
524function ToUTF8($String, $Charset = 'iso2')
525{
526 global $CharTable;
527
528 $Result = '';
529 for($I = 0; $I < strlen($String); $I++)
530 {
531 if(ord($String[$I]) < 128) $Result .= $String[$I];
532 else if(ord($String[$I]) > 127)
533 {
534 $Result .= $CharTable[$Charset][ord($String[$I])];
535 }
536 }
537 return($Result);
538}
539
540function FromUTF8($String, $Charset = 'iso2')
541{
542 global $CharTable;
543
544 $Result = '';
545 $UTFPrefix = '';
546 for($I = 0; $I < strlen($String); $I++)
547 {
548 if(ord($String{$I}) & 0x80) // UTF control character
549 {
550 if(ord($String{$I}) & 0x40) // First
551 {
552 if($UTFPrefix != '') $Result .= chr(array_search($UTFPrefix, $CharTable[$Charset]));
553 $UTFPrefix = $String{$I};
554 }
555 else $UTFPrefix .= $String{$I}; // Next
556 } else
557 {
558 if($UTFPrefix != '') $Result .= chr(array_search($UTFPrefix, $CharTable[$Charset]));
559 $UTFPrefix = '';
560 $Result .= $String{$I};
561 }
562 }
563 return($Result);
564}
565
566?>
Note: See TracBrowser for help on using the repository browser.