source: Common/UTF8.php

Last change on this file was 14, checked in by chronos, 3 years ago
  • Modified: Updated files to newer version.
File size: 11.5 KB
Line 
1<?php
2/*
3 ISO8859-2 <-> UTF-8 conversion functions designed for use in PHP-GTK apps
4 Adam Rambousek - rambousek@volny.cz
5
6 version history:
7 1.03 --- 12/02/2002
8 * added Win1257 support
9 1.02 --- 30/11/2001
10 * added ISO8859-1 support
11 1.01
12 * added Win1250 support
13 1.00
14
15 string to_utf8(string string [, string charset])
16 string from_utf8(string string [, string charset])
17
18 supported charsets: name of charset you must use in script
19 ISO8859-2: iso2 (this is the default charset, you don't have to specify it)
20 Windows1250: win1250
21 ISO8859-1: iso1
22 Windows1257: win1257
23
24 example: $new_string=to_utf8($some_string,"win1250");
25*/
26
27
28/*
29 translation table - actually, it's array where key is hexadecimal number of
30 character in ISO8859-2/Windows1250 and value is its two byte representation in UTF-8
31*/
32
33class Encoding
34{
35 function __construct()
36 {
37 $this->CharTable = array(
38 'iso2' => array(
39 0x80=>"\xc2\x80",
40 0x81=>"\xc2\x81",
41 0x82=>"\xc2\x82",
42 0x83=>"\xc2\x83",
43 0x84=>"\xc2\x84",
44 0x85=>"\xc2\x85",
45 0x86=>"\xc2\x86",
46 0x87=>"\xc2\x87",
47 0x88=>"\xc2\x88",
48 0x89=>"\xc2\x89",
49 0x8A=>"\xc2\x8a",
50 0x8B=>"\xc2\x8b",
51 0x8C=>"\xc2\x8c",
52 0x8D=>"\xc2\x8d",
53 0x8E=>"\xc2\x8e",
54 0x8F=>"\xc2\x8f",
55 0x90=>"\xc2\x90",
56 0x91=>"\xc2\x91",
57 0x92=>"\xc2\x92",
58 0x93=>"\xc2\x93",
59 0x94=>"\xc2\x94",
60 0x95=>"\xc2\x95",
61 0x96=>"\xc2\x96",
62 0x97=>"\xc2\x97",
63 0x98=>"\xc2\x98",
64 0x99=>"\xc2\x99",
65 0x9A=>"\xc2\x9a",
66 0x9B=>"\xc2\x9b",
67 0x9C=>"\xc2\x9c",
68 0x9D=>"\xc2\x9d",
69 0x9E=>"\xc2\x9e",
70 0x9F=>"\xc2\x9f",
71 0xA0=>"\xc2\xa0",
72 0xA1=>"\xc4\x84",
73 0xA2=>"\xcb\x98",
74 0xA3=>"\xc5\x81",
75 0xA4=>"\xc2\xa4",
76 0xA5=>"\xc4\xbd",
77 0xA6=>"\xc5\x9a",
78 0xA7=>"\xc2\xa7",
79 0xA8=>"\xc2\xa8",
80 0xA9=>"\xc5\xa0",
81 0xAA=>"\xc5\x9e",
82 0xAB=>"\xc5\xa4",
83 0xAC=>"\xc5\xb9",
84 0xAD=>"\xc2\xad",
85 0xAE=>"\xc5\xbd",
86 0xAF=>"\xc5\xbb",
87 0xB0=>"\xc2\xb0",
88 0xB1=>"\xc4\x85",
89 0xB2=>"\xcb\x9b",
90 0xB3=>"\xc5\x82",
91 0xB4=>"\xc2\xb4",
92 0xB5=>"\xc4\xbe",
93 0xB6=>"\xc5\x9b",
94 0xB7=>"\xcb\x87",
95 0xB8=>"\xc2\xb8",
96 0xB9=>"\xc5\xa1",
97 0xBA=>"\xc5\x9f",
98 0xBB=>"\xc5\xa5",
99 0xBC=>"\xc5\xba",
100 0xBD=>"\xcb\x9d",
101 0xBE=>"\xc5\xbe",
102 0xBF=>"\xc5\xbc",
103 0xC0=>"\xc5\x94",
104 0xC1=>"\xc3\x81",
105 0xC2=>"\xc3\x82",
106 0xC3=>"\xc4\x82",
107 0xC4=>"\xc3\x84",
108 0xC5=>"\xc4\xb9",
109 0xC6=>"\xc4\x86",
110 0xC7=>"\xc3\x87",
111 0xC8=>"\xc4\x8c",
112 0xC9=>"\xc3\x89",
113 0xCA=>"\xc4\x98",
114 0xCB=>"\xc3\x8b",
115 0xCC=>"\xc4\x9a",
116 0xCD=>"\xc3\x8d",
117 0xCE=>"\xc3\x8e",
118 0xCF=>"\xc4\x8e",
119 0xD0=>"\xc4\x90",
120 0xD1=>"\xc5\x83",
121 0xD2=>"\xc5\x87",
122 0xD3=>"\xc3\x93",
123 0xD4=>"\xc3\x94",
124 0xD5=>"\xc5\x90",
125 0xD6=>"\xc3\x96",
126 0xD7=>"\xc3\x97",
127 0xD8=>"\xc5\x98",
128 0xD9=>"\xc5\xae",
129 0xDA=>"\xc3\x9a",
130 0xDB=>"\xc5\xb0",
131 0xDC=>"\xc3\x9c",
132 0xDD=>"\xc3\x9d",
133 0xDE=>"\xc5\xa2",
134 0xDF=>"\xc3\x9f",
135 0xE0=>"\xc5\x95",
136 0xE1=>"\xc3\xa1",
137 0xE2=>"\xc3\xa2",
138 0xE3=>"\xc4\x83",
139 0xE4=>"\xc3\xa4",
140 0xE5=>"\xc4\xba",
141 0xE6=>"\xc4\x87",
142 0xE7=>"\xc3\xa7",
143 0xE8=>"\xc4\x8d",
144 0xE9=>"\xc3\xa9",
145 0xEA=>"\xc4\x99",
146 0xEB=>"\xc3\xab",
147 0xEC=>"\xc4\x9b",
148 0xED=>"\xc3\xad",
149 0xEE=>"\xc3\xae",
150 0xEF=>"\xc4\x8f",
151 0xF0=>"\xc4\x91",
152 0xF1=>"\xc5\x84",
153 0xF2=>"\xc5\x88",
154 0xF3=>"\xc3\xb3",
155 0xF4=>"\xc3\xb4",
156 0xF5=>"\xc5\x91",
157 0xF6=>"\xc3\xb6",
158 0xF7=>"\xc3\xb7",
159 0xF8=>"\xc5\x99",
160 0xF9=>"\xc5\xaf",
161 0xFA=>"\xc3\xba",
162 0xFB=>"\xc5\xb1",
163 0xFC=>"\xc3\xbc",
164 0xFD=>"\xc3\xbd",
165 0xFE=>"\xc5\xa3",
166 0xFF=>"\xcb\x99"
167 ),
168 'win1250' => array(
169 0x80=>"\xc2\x80",
170 0x81=>"\xc2\x81",
171 0x82=>"\xe2\x80\x9a",
172 0x83=>"\xc2\x83",
173 0x84=>"\xe2\x80\x9e",
174 0x85=>"\xe2\x80\xa6",
175 0x86=>"\xe2\x80\xa0",
176 0x87=>"\xe2\x80\xa1",
177 0x88=>"\xc2\x88",
178 0x89=>"\xe2\x80\xb0",
179 0x8a=>"\xc5\xa0",
180 0x8b=>"\xe2\x80\xb9",
181 0x8c=>"\xc5\x9a",
182 0x8d=>"\xc5\xa4",
183 0x8e=>"\xc5\xbd",
184 0x8f=>"\xc5\xb9",
185 0x90=>"\xc2\x90",
186 0x91=>"\xe2\x80\x98",
187 0x92=>"\xe2\x80\x99",
188 0x93=>"\xe2\x80\x9c",
189 0x94=>"\xe2\x80\x9d",
190 0x95=>"\xe2\x80\xa2",
191 0x96=>"\xe2\x80\x93",
192 0x97=>"\xe2\x80\x94",
193 0x98=>"\xe2\x80\x98",
194 0x99=>"\xe2\x84\xa2",
195 0x9a=>"\xc5\xa1",
196 0x9b=>"\xe2\x80\xba",
197 0x9c=>"\xc5\x9b",
198 0x9d=>"\xc5\xa5",
199 0x9e=>"\xc5\xbe",
200 0x9f=>"\xc5\xba",
201 0xa0=>"\xc2\xa0",
202 0xa1=>"\xcb\x87",
203 0xa2=>"\xcb\x98",
204 0xa3=>"\xc5\x81",
205 0xa4=>"\xc2\xa4",
206 0xa5=>"\xc4\x84",
207 0xa6=>"\xc2\xa6",
208 0xa7=>"\xc2\xa7",
209 0xa8=>"\xc2\xa8",
210 0xa9=>"\xc2\xa9",
211 0xaa=>"\xc5\x9e",
212 0xab=>"\xc2\xab",
213 0xac=>"\xc2\xac",
214 0xad=>"\xc2\xad",
215 0xae=>"\xc2\xae",
216 0xaf=>"\xc5\xbb",
217 0xb0=>"\xc2\xb0",
218 0xb1=>"\xc2\xb1",
219 0xb2=>"\xcb\x9b",
220 0xb3=>"\xc5\x82",
221 0xb4=>"\xc2\xb4",
222 0xb5=>"\xc2\xb5",
223 0xb6=>"\xc2\xb6",
224 0xb7=>"\xc2\xb7",
225 0xb8=>"\xc2\xb8",
226 0xb9=>"\xc4\x85",
227 0xba=>"\xc5\x9f",
228 0xbb=>"\xc2\xbb",
229 0xbc=>"\xc4\xbd",
230 0xbd=>"\xcb\x9d",
231 0xbe=>"\xc4\xbe",
232 0xbf=>"\xc5\xbc",
233 0xc0=>"\xc5\x94",
234 0xc1=>"\xc3\x81",
235 0xc2=>"\xc3\x82",
236 0xc3=>"\xc4\x82",
237 0xc4=>"\xc3\x84",
238 0xc5=>"\xc4\xb9",
239 0xc6=>"\xc4\x86",
240 0xc7=>"\xc3\x87",
241 0xc8=>"\xc4\x8c",
242 0xc9=>"\xc3\x89",
243 0xca=>"\xc4\x98",
244 0xcb=>"\xc3\x8b",
245 0xcc=>"\xc4\x9a",
246 0xcd=>"\xc3\x8d",
247 0xce=>"\xc3\x8e",
248 0xcf=>"\xc4\x8e",
249 0xd0=>"\xc4\x90",
250 0xd1=>"\xc5\x83",
251 0xd2=>"\xc5\x87",
252 0xd3=>"\xc3\x93",
253 0xd4=>"\xc3\x94",
254 0xd5=>"\xc5\x90",
255 0xd6=>"\xc3\x96",
256 0xd7=>"\xc3\x97",
257 0xd8=>"\xc5\x98",
258 0xd9=>"\xc5\xae",
259 0xda=>"\xc3\x9a",
260 0xdb=>"\xc5\xb0",
261 0xdc=>"\xc3\x9c",
262 0xdd=>"\xc3\x9d",
263 0xde=>"\xc5\xa2",
264 0xdf=>"\xc3\x9f",
265 0xe0=>"\xc5\x95",
266 0xe1=>"\xc3\xa1",
267 0xe2=>"\xc3\xa2",
268 0xe3=>"\xc4\x83",
269 0xe4=>"\xc3\xa4",
270 0xe5=>"\xc4\xba",
271 0xe6=>"\xc4\x87",
272 0xe7=>"\xc3\xa7",
273 0xe8=>"\xc4\x8d",
274 0xe9=>"\xc3\xa9",
275 0xea=>"\xc4\x99",
276 0xeb=>"\xc3\xab",
277 0xec=>"\xc4\x9b",
278 0xed=>"\xc3\xad",
279 0xee=>"\xc3\xae",
280 0xef=>"\xc4\x8f",
281 0xf0=>"\xc4\x91",
282 0xf1=>"\xc5\x84",
283 0xf2=>"\xc5\x88",
284 0xf3=>"\xc3\xb3",
285 0xf4=>"\xc3\xb4",
286 0xf5=>"\xc5\x91",
287 0xf6=>"\xc3\xb6",
288 0xf7=>"\xc3\xb7",
289 0xf8=>"\xc5\x99",
290 0xf9=>"\xc5\xaf",
291 0xfa=>"\xc3\xba",
292 0xfb=>"\xc5\xb1",
293 0xfc=>"\xc3\xbc",
294 0xfd=>"\xc3\xbd",
295 0xfe=>"\xc5\xa3",
296 0xff=>"\xcb\x99"
297 ),
298 'iso1' => array(
299 0xA0=>"\xc2\xa0",
300 0xA1=>"\xc2\xa1",
301 0xA2=>"\xc2\xa2",
302 0xA3=>"\xc2\xa3",
303 0xA4=>"\xc2\xa4",
304 0xA5=>"\xc2\xa5",
305 0xA6=>"\xc2\xa6",
306 0xA7=>"\xc2\xa7",
307 0xA8=>"\xc2\xa8",
308 0xA9=>"\xc2\xa9",
309 0xAA=>"\xc2\xaa",
310 0xAB=>"\xc2\xab",
311 0xAC=>"\xc2\xac",
312 0xAD=>"\xc2\xad",
313 0xAE=>"\xc2\xae",
314 0xAF=>"\xc2\xaf",
315 0xB0=>"\xc2\xb0",
316 0xB1=>"\xc2\xb1",
317 0xB2=>"\xc2\xb2",
318 0xB3=>"\xc2\xb3",
319 0xB4=>"\xc2\xb4",
320 0xB5=>"\xc2\xb5",
321 0xB6=>"\xc2\xb6",
322 0xB7=>"\xc2\xb7",
323 0xB8=>"\xc2\xb8",
324 0xB9=>"\xc2\xb9",
325 0xBA=>"\xc2\xba",
326 0xBB=>"\xc2\xbb",
327 0xBC=>"\xc2\xbc",
328 0xBD=>"\xc2\xbd",
329 0xBE=>"\xc2\xbe",
330 0xBF=>"\xc2\xbf",
331 0xC0=>"\xc3\x80",
332 0xC1=>"\xc3\x81",
333 0xC2=>"\xc3\x82",
334 0xC3=>"\xc3\x83",
335 0xC4=>"\xc3\x84",
336 0xC5=>"\xc3\x85",
337 0xC6=>"\xc3\x86",
338 0xC7=>"\xc3\x87",
339 0xC8=>"\xc3\x88",
340 0xC9=>"\xc3\x89",
341 0xCA=>"\xc3\x8a",
342 0xCB=>"\xc3\x8b",
343 0xCC=>"\xc3\x8c",
344 0xCD=>"\xc3\x8d",
345 0xCE=>"\xc3\x8e",
346 0xCF=>"\xc3\x8f",
347 0xD0=>"\xc3\x90",
348 0xD1=>"\xc3\x91",
349 0xD2=>"\xc3\x92",
350 0xD3=>"\xc3\x93",
351 0xD4=>"\xc3\x94",
352 0xD5=>"\xc3\x95",
353 0xD6=>"\xc3\x96",
354 0xD7=>"\xc3\x97",
355 0xD8=>"\xc3\x98",
356 0xD9=>"\xc3\x99",
357 0xDA=>"\xc3\x9a",
358 0xDB=>"\xc3\x9b",
359 0xDC=>"\xc3\x9c",
360 0xDD=>"\xc3\x9d",
361 0xDE=>"\xc3\x9e",
362 0xDF=>"\xc3\x9f",
363 0xE0=>"\xc3\xa0",
364 0xE1=>"\xc3\xa1",
365 0xE2=>"\xc3\xa2",
366 0xE3=>"\xc3\xa3",
367 0xE4=>"\xc3\xa4",
368 0xE5=>"\xc3\xa5",
369 0xE6=>"\xc3\xa6",
370 0xE7=>"\xc3\xa7",
371 0xE8=>"\xc3\xa8",
372 0xE9=>"\xc3\xa9",
373 0xEA=>"\xc3\xaa",
374 0xEB=>"\xc3\xab",
375 0xEC=>"\xc3\xac",
376 0xED=>"\xc3\xad",
377 0xEE=>"\xc3\xae",
378 0xEF=>"\xc3\xaf",
379 0xF0=>"\xc3\xb0",
380 0xF1=>"\xc3\xb1",
381 0xF2=>"\xc3\xb2",
382 0xF3=>"\xc3\xb3",
383 0xF4=>"\xc3\xb4",
384 0xF5=>"\xc3\xb5",
385 0xF6=>"\xc3\xb6",
386 0xF7=>"\xc3\xb7",
387 0xF8=>"\xc3\xb8",
388 0xF9=>"\xc3\xb9",
389 0xFA=>"\xc3\xba",
390 0xFB=>"\xc3\xbb",
391 0xFC=>"\xc3\xbc",
392 0xFD=>"\xc3\xbd",
393 0xFE=>"\xc3\xbe"
394 ),
395 'win1257' => array(
396 0x80=>"\xe2\x82\xac",
397 0x81=>"\xc0\x4",
398 0x82=>"\xe2\x80\x9a",
399 0x83=>"\xc0\x4",
400 0x84=>"\xe2\x80\x9e",
401 0x85=>"\xe2\x80\xa6",
402 0x86=>"\xe2\x80\xa0",
403 0x87=>"\xe2\x80\xa1",
404 0x88=>"\xc0\x4",
405 0x89=>"\xe2\x80\xb0",
406 0x8A=>"\xc0\x4",
407 0x8B=>"\xe2\x80\xb9",
408 0x8C=>"\xc0\x4",
409 0x8D=>"\xc2\xa8",
410 0x8E=>"\xcb\x87",
411 0x8F=>"\xc2\xb8",
412 0x90=>"\xc0\x4",
413 0x91=>"\xe2\x80\x98",
414 0x92=>"\xe2\x80\x99",
415 0x93=>"\xe2\x80\x9c",
416 0x94=>"\xe2\x80\x9d",
417 0x95=>"\xe2\x80\xa2",
418 0x96=>"\xe2\x80\x93",
419 0x97=>"\xe2\x80\x94",
420 0x98=>"\xc0\x4",
421 0x99=>"\xe2\x84\xa2",
422 0x9A=>"\xc0\x4",
423 0x9B=>"\xe2\x80\xba",
424 0x9C=>"\xc0\x4",
425 0x9D=>"\xc2\xaf",
426 0x9E=>"\xcb\x9b",
427 0x9F=>"\xc0\x4",
428 0xA0=>"\xc2\xa0",
429 0xA1=>"\xc0\x4",
430 0xA2=>"\xc2\xa2",
431 0xA3=>"\xc2\xa3",
432 0xA4=>"\xc2\xa4",
433 0xA5=>"\xc0\x4",
434 0xA6=>"\xc2\xa6",
435 0xA7=>"\xc2\xa7",
436 0xA8=>"\xc3\x98",
437 0xA9=>"\xc2\xa9",
438 0xAA=>"\xc5\x96",
439 0xAB=>"\xc2\xab",
440 0xAC=>"\xc2\xac",
441 0xAD=>"\xc2\xad",
442 0xAE=>"\xc2\xae",
443 0xAF=>"\xc3\x86",
444 0xB0=>"\xc2\xb0",
445 0xB1=>"\xc2\xb1",
446 0xB2=>"\xc2\xb2",
447 0xB3=>"\xc2\xb3",
448 0xB4=>"\xc2\xb4",
449 0xB5=>"\xc2\xb5",
450 0xB6=>"\xc2\xb6",
451 0xB7=>"\xc2\xb7",
452 0xB8=>"\xc3\xb8",
453 0xB9=>"\xc2\xb9",
454 0xBA=>"\xc5\x97",
455 0xBB=>"\xc2\xbb",
456 0xBC=>"\xc2\xbc",
457 0xBD=>"\xc2\xbd",
458 0xBE=>"\xc2\xbe",
459 0xBF=>"\xc3\xa6",
460 0xC0=>"\xc4\x84",
461 0xC1=>"\xc4\xae",
462 0xC2=>"\xc4\x80",
463 0xC3=>"\xc4\x86",
464 0xC4=>"\xc3\x84",
465 0xC5=>"\xc3\x85",
466 0xC6=>"\xc4\x98",
467 0xC7=>"\xc4\x92",
468 0xC8=>"\xc4\x8c",
469 0xC9=>"\xc3\x89",
470 0xCA=>"\xc5\xb9",
471 0xCB=>"\xc4\x96",
472 0xCC=>"\xc4\xa2",
473 0xCD=>"\xc4\xb6",
474 0xCE=>"\xc4\xaa",
475 0xCF=>"\xc4\xbb",
476 0xD0=>"\xc5\xa0",
477 0xD1=>"\xc5\x83",
478 0xD2=>"\xc5\x85",
479 0xD3=>"\xc3\x93",
480 0xD4=>"\xc5\x8c",
481 0xD5=>"\xc3\x95",
482 0xD6=>"\xc3\x96",
483 0xD7=>"\xc3\x97",
484 0xD8=>"\xc5\xb2",
485 0xD9=>"\xc5\x81",
486 0xDA=>"\xc5\x9a",
487 0xDB=>"\xc5\xaa",
488 0xDC=>"\xc3\x9c",
489 0xDD=>"\xc5\xbb",
490 0xDE=>"\xc5\xbd",
491 0xDF=>"\xc3\x9f",
492 0xE0=>"\xc4\x85",
493 0xE1=>"\xc4\xaf",
494 0xE2=>"\xc4\x81",
495 0xE3=>"\xc4\x87",
496 0xE4=>"\xc3\xa4",
497 0xE5=>"\xc3\xa5",
498 0xE6=>"\xc4\x99",
499 0xE7=>"\xc4\x93",
500 0xE8=>"\xc4\x8d",
501 0xE9=>"\xc3\xa9",
502 0xEA=>"\xc5\xba",
503 0xEB=>"\xc4\x97",
504 0xEC=>"\xc4\xa3",
505 0xED=>"\xc4\xb7",
506 0xEE=>"\xc4\xab",
507 0xEF=>"\xc4\xbc",
508 0xF0=>"\xc5\xa1",
509 0xF1=>"\xc5\x84",
510 0xF2=>"\xc5\x86",
511 0xF3=>"\xc3\xb3",
512 0xF4=>"\xc5\x8d",
513 0xF5=>"\xc3\xb5",
514 0xF6=>"\xc3\xb6",
515 0xF7=>"\xc3\xb7",
516 0xF8=>"\xc5\xb3",
517 0xF9=>"\xc5\x82",
518 0xFA=>"\xc5\x9b",
519 0xFB=>"\xc5\xab",
520 0xFC=>"\xc3\xbc",
521 0xFD=>"\xc5\xbc",
522 0xFE=>"\xc5\xbe",
523 0xFF=>"\xcb\x99"
524 ),
525);
526 }
527
528 function ToUTF8string(string $String, string $Charset = 'iso2'): string
529 {
530 $Result = '';
531 for ($I = 0; $I < strlen($String); $I++)
532 {
533 if (ord($String[$I]) < 128) $Result .= $String[$I];
534 else if (ord($String[$I]) > 127)
535 {
536 $Result .= $this->CharTable[$Charset][ord($String[$I])];
537 }
538 }
539 return $Result;
540 }
541
542 function FromUTF8(string $String, string $Charset = 'iso2'): string
543 {
544 $Result = '';
545 $UTFPrefix = '';
546 for ($I = 0; $I < strlen($String); $I++)
547 {
548 if (ord($String[$I]) & 0x80) // UTF control character
549 {
550 if (ord($String[$I]) & 0x40) // First
551 {
552 if ($UTFPrefix != '') $Result .= chr(array_search($UTFPrefix, $this->CharTable[$Charset]));
553 $UTFPrefix = $String[$I];
554 }
555 else $UTFPrefix .= $String[$I]; // Next
556 } else
557 {
558 if ($UTFPrefix != '') $Result .= chr(array_search($UTFPrefix, $this->CharTable[$Charset]));
559 $UTFPrefix = '';
560 $Result .= $String[$I];
561 }
562 }
563 return $Result;
564 }
565}
Note: See TracBrowser for help on using the repository browser.