| 1 | <?php
|
|---|
| 2 | /**
|
|---|
| 3 | *
|
|---|
| 4 | * @package utf
|
|---|
| 5 | * @version $Id$
|
|---|
| 6 | * @copyright (c) 2006 phpBB Group
|
|---|
| 7 | * @license http://opensource.org/licenses/gpl-license.php GNU Public License
|
|---|
| 8 | *
|
|---|
| 9 | */
|
|---|
| 10 |
|
|---|
| 11 | /**
|
|---|
| 12 | */
|
|---|
| 13 | if (!defined('IN_PHPBB'))
|
|---|
| 14 | {
|
|---|
| 15 | exit;
|
|---|
| 16 | }
|
|---|
| 17 |
|
|---|
| 18 | // Enforce ASCII only string handling
|
|---|
| 19 | setlocale(LC_CTYPE, 'C');
|
|---|
| 20 |
|
|---|
| 21 | /**
|
|---|
| 22 | * UTF-8 tools
|
|---|
| 23 | *
|
|---|
| 24 | * Whenever possible, these functions will try to use PHP's built-in functions or
|
|---|
| 25 | * extensions, otherwise they will default to custom routines.
|
|---|
| 26 | *
|
|---|
| 27 | * @package utf
|
|---|
| 28 | */
|
|---|
| 29 |
|
|---|
| 30 | if (!extension_loaded('xml'))
|
|---|
| 31 | {
|
|---|
| 32 | /**
|
|---|
| 33 | * Implementation of PHP's native utf8_encode for people without XML support
|
|---|
| 34 | * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
|
|---|
| 35 | *
|
|---|
| 36 | * @param string $str ISO-8859-1 encoded data
|
|---|
| 37 | * @return string UTF-8 encoded data
|
|---|
| 38 | */
|
|---|
| 39 | function utf8_encode($str)
|
|---|
| 40 | {
|
|---|
| 41 | $out = '';
|
|---|
| 42 | for ($i = 0, $len = strlen($str); $i < $len; $i++)
|
|---|
| 43 | {
|
|---|
| 44 | $letter = $str[$i];
|
|---|
| 45 | $num = ord($letter);
|
|---|
| 46 | if ($num < 0x80)
|
|---|
| 47 | {
|
|---|
| 48 | $out .= $letter;
|
|---|
| 49 | }
|
|---|
| 50 | else if ($num < 0xC0)
|
|---|
| 51 | {
|
|---|
| 52 | $out .= "\xC2" . $letter;
|
|---|
| 53 | }
|
|---|
| 54 | else
|
|---|
| 55 | {
|
|---|
| 56 | $out .= "\xC3" . chr($num - 64);
|
|---|
| 57 | }
|
|---|
| 58 | }
|
|---|
| 59 | return $out;
|
|---|
| 60 | }
|
|---|
| 61 |
|
|---|
| 62 | /**
|
|---|
| 63 | * Implementation of PHP's native utf8_decode for people without XML support
|
|---|
| 64 | *
|
|---|
| 65 | * @param string $str UTF-8 encoded data
|
|---|
| 66 | * @return string ISO-8859-1 encoded data
|
|---|
| 67 | */
|
|---|
| 68 | function utf8_decode($str)
|
|---|
| 69 | {
|
|---|
| 70 | $pos = 0;
|
|---|
| 71 | $len = strlen($str);
|
|---|
| 72 | $ret = '';
|
|---|
| 73 |
|
|---|
| 74 | while ($pos < $len)
|
|---|
| 75 | {
|
|---|
| 76 | $ord = ord($str[$pos]) & 0xF0;
|
|---|
| 77 | if ($ord === 0xC0 || $ord === 0xD0)
|
|---|
| 78 | {
|
|---|
| 79 | $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
|
|---|
| 80 | $pos += 2;
|
|---|
| 81 | $ret .= (($charval < 256) ? chr($charval) : '?');
|
|---|
| 82 | }
|
|---|
| 83 | else if ($ord === 0xE0)
|
|---|
| 84 | {
|
|---|
| 85 | $ret .= '?';
|
|---|
| 86 | $pos += 3;
|
|---|
| 87 | }
|
|---|
| 88 | else if ($ord === 0xF0)
|
|---|
| 89 | {
|
|---|
| 90 | $ret .= '?';
|
|---|
| 91 | $pos += 4;
|
|---|
| 92 | }
|
|---|
| 93 | else
|
|---|
| 94 | {
|
|---|
| 95 | $ret .= $str[$pos];
|
|---|
| 96 | ++$pos;
|
|---|
| 97 | }
|
|---|
| 98 | }
|
|---|
| 99 | return $ret;
|
|---|
| 100 | }
|
|---|
| 101 | }
|
|---|
| 102 |
|
|---|
| 103 | // mbstring is old and has it's functions around for older versions of PHP.
|
|---|
| 104 | // if mbstring is not loaded, we go into native mode.
|
|---|
| 105 | if (extension_loaded('mbstring'))
|
|---|
| 106 | {
|
|---|
| 107 | mb_internal_encoding('UTF-8');
|
|---|
| 108 |
|
|---|
| 109 | /**
|
|---|
| 110 | * UTF-8 aware alternative to strrpos
|
|---|
| 111 | * Find position of last occurrence of a char in a string
|
|---|
| 112 | *
|
|---|
| 113 | * Notes:
|
|---|
| 114 | * - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
|
|---|
| 115 | */
|
|---|
| 116 | if (version_compare(PHP_VERSION, '5.2.0', '>='))
|
|---|
| 117 | {
|
|---|
| 118 | /**
|
|---|
| 119 | * UTF-8 aware alternative to strrpos
|
|---|
| 120 | * @ignore
|
|---|
| 121 | */
|
|---|
| 122 | function utf8_strrpos($str, $needle, $offset = null)
|
|---|
| 123 | {
|
|---|
| 124 | // Emulate behaviour of strrpos rather than raising warning
|
|---|
| 125 | if (empty($str))
|
|---|
| 126 | {
|
|---|
| 127 | return false;
|
|---|
| 128 | }
|
|---|
| 129 |
|
|---|
| 130 | if (is_null($offset))
|
|---|
| 131 | {
|
|---|
| 132 | return mb_strrpos($str, $needle);
|
|---|
| 133 | }
|
|---|
| 134 | else
|
|---|
| 135 | {
|
|---|
| 136 | return mb_strrpos($str, $needle, $offset);
|
|---|
| 137 | }
|
|---|
| 138 | }
|
|---|
| 139 | }
|
|---|
| 140 | else
|
|---|
| 141 | {
|
|---|
| 142 | /**
|
|---|
| 143 | * UTF-8 aware alternative to strrpos
|
|---|
| 144 | * @ignore
|
|---|
| 145 | */
|
|---|
| 146 | function utf8_strrpos($str, $needle, $offset = null)
|
|---|
| 147 | {
|
|---|
| 148 | // offset for mb_strrpos was added in 5.2.0
|
|---|
| 149 | if (is_null($offset))
|
|---|
| 150 | {
|
|---|
| 151 | // Emulate behaviour of strrpos rather than raising warning
|
|---|
| 152 | if (empty($str))
|
|---|
| 153 | {
|
|---|
| 154 | return false;
|
|---|
| 155 | }
|
|---|
| 156 |
|
|---|
| 157 | return mb_strrpos($str, $needle);
|
|---|
| 158 | }
|
|---|
| 159 | else
|
|---|
| 160 | {
|
|---|
| 161 | if (!is_int($offset))
|
|---|
| 162 | {
|
|---|
| 163 | trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
|
|---|
| 164 | return false;
|
|---|
| 165 | }
|
|---|
| 166 |
|
|---|
| 167 | $str = mb_substr($str, $offset);
|
|---|
| 168 |
|
|---|
| 169 | if (false !== ($pos = mb_strrpos($str, $needle)))
|
|---|
| 170 | {
|
|---|
| 171 | return $pos + $offset;
|
|---|
| 172 | }
|
|---|
| 173 |
|
|---|
| 174 | return false;
|
|---|
| 175 | }
|
|---|
| 176 | }
|
|---|
| 177 | }
|
|---|
| 178 |
|
|---|
| 179 | /**
|
|---|
| 180 | * UTF-8 aware alternative to strpos
|
|---|
| 181 | * @ignore
|
|---|
| 182 | */
|
|---|
| 183 | function utf8_strpos($str, $needle, $offset = null)
|
|---|
| 184 | {
|
|---|
| 185 | if (is_null($offset))
|
|---|
| 186 | {
|
|---|
| 187 | return mb_strpos($str, $needle);
|
|---|
| 188 | }
|
|---|
| 189 | else
|
|---|
| 190 | {
|
|---|
| 191 | return mb_strpos($str, $needle, $offset);
|
|---|
| 192 | }
|
|---|
| 193 | }
|
|---|
| 194 |
|
|---|
| 195 | /**
|
|---|
| 196 | * UTF-8 aware alternative to strtolower
|
|---|
| 197 | * @ignore
|
|---|
| 198 | */
|
|---|
| 199 | function utf8_strtolower($str)
|
|---|
| 200 | {
|
|---|
| 201 | return mb_strtolower($str);
|
|---|
| 202 | }
|
|---|
| 203 |
|
|---|
| 204 | /**
|
|---|
| 205 | * UTF-8 aware alternative to strtoupper
|
|---|
| 206 | * @ignore
|
|---|
| 207 | */
|
|---|
| 208 | function utf8_strtoupper($str)
|
|---|
| 209 | {
|
|---|
| 210 | return mb_strtoupper($str);
|
|---|
| 211 | }
|
|---|
| 212 |
|
|---|
| 213 | /**
|
|---|
| 214 | * UTF-8 aware alternative to substr
|
|---|
| 215 | * @ignore
|
|---|
| 216 | */
|
|---|
| 217 | function utf8_substr($str, $offset, $length = null)
|
|---|
| 218 | {
|
|---|
| 219 | if (is_null($length))
|
|---|
| 220 | {
|
|---|
| 221 | return mb_substr($str, $offset);
|
|---|
| 222 | }
|
|---|
| 223 | else
|
|---|
| 224 | {
|
|---|
| 225 | return mb_substr($str, $offset, $length);
|
|---|
| 226 | }
|
|---|
| 227 | }
|
|---|
| 228 |
|
|---|
| 229 | /**
|
|---|
| 230 | * Return the length (in characters) of a UTF-8 string
|
|---|
| 231 | * @ignore
|
|---|
| 232 | */
|
|---|
| 233 | function utf8_strlen($text)
|
|---|
| 234 | {
|
|---|
| 235 | return mb_strlen($text, 'utf-8');
|
|---|
| 236 | }
|
|---|
| 237 | }
|
|---|
| 238 | else
|
|---|
| 239 | {
|
|---|
| 240 | /**
|
|---|
| 241 | * UTF-8 aware alternative to strrpos
|
|---|
| 242 | * Find position of last occurrence of a char in a string
|
|---|
| 243 | *
|
|---|
| 244 | * @author Harry Fuecks
|
|---|
| 245 | * @param string $str haystack
|
|---|
| 246 | * @param string $needle needle
|
|---|
| 247 | * @param integer $offset (optional) offset (from left)
|
|---|
| 248 | * @return mixed integer position or FALSE on failure
|
|---|
| 249 | */
|
|---|
| 250 | function utf8_strrpos($str, $needle, $offset = null)
|
|---|
| 251 | {
|
|---|
| 252 | if (is_null($offset))
|
|---|
| 253 | {
|
|---|
| 254 | $ar = explode($needle, $str);
|
|---|
| 255 |
|
|---|
| 256 | if (sizeof($ar) > 1)
|
|---|
| 257 | {
|
|---|
| 258 | // Pop off the end of the string where the last match was made
|
|---|
| 259 | array_pop($ar);
|
|---|
| 260 | $str = join($needle, $ar);
|
|---|
| 261 |
|
|---|
| 262 | return utf8_strlen($str);
|
|---|
| 263 | }
|
|---|
| 264 | return false;
|
|---|
| 265 | }
|
|---|
| 266 | else
|
|---|
| 267 | {
|
|---|
| 268 | if (!is_int($offset))
|
|---|
| 269 | {
|
|---|
| 270 | trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
|
|---|
| 271 | return false;
|
|---|
| 272 | }
|
|---|
| 273 |
|
|---|
| 274 | $str = utf8_substr($str, $offset);
|
|---|
| 275 |
|
|---|
| 276 | if (false !== ($pos = utf8_strrpos($str, $needle)))
|
|---|
| 277 | {
|
|---|
| 278 | return $pos + $offset;
|
|---|
| 279 | }
|
|---|
| 280 |
|
|---|
| 281 | return false;
|
|---|
| 282 | }
|
|---|
| 283 | }
|
|---|
| 284 |
|
|---|
| 285 | /**
|
|---|
| 286 | * UTF-8 aware alternative to strpos
|
|---|
| 287 | * Find position of first occurrence of a string
|
|---|
| 288 | *
|
|---|
| 289 | * @author Harry Fuecks
|
|---|
| 290 | * @param string $str haystack
|
|---|
| 291 | * @param string $needle needle
|
|---|
| 292 | * @param integer $offset offset in characters (from left)
|
|---|
| 293 | * @return mixed integer position or FALSE on failure
|
|---|
| 294 | */
|
|---|
| 295 | function utf8_strpos($str, $needle, $offset = null)
|
|---|
| 296 | {
|
|---|
| 297 | if (is_null($offset))
|
|---|
| 298 | {
|
|---|
| 299 | $ar = explode($needle, $str);
|
|---|
| 300 | if (sizeof($ar) > 1)
|
|---|
| 301 | {
|
|---|
| 302 | return utf8_strlen($ar[0]);
|
|---|
| 303 | }
|
|---|
| 304 | return false;
|
|---|
| 305 | }
|
|---|
| 306 | else
|
|---|
| 307 | {
|
|---|
| 308 | if (!is_int($offset))
|
|---|
| 309 | {
|
|---|
| 310 | trigger_error('utf8_strpos: Offset must be an integer', E_USER_ERROR);
|
|---|
| 311 | return false;
|
|---|
| 312 | }
|
|---|
| 313 |
|
|---|
| 314 | $str = utf8_substr($str, $offset);
|
|---|
| 315 |
|
|---|
| 316 | if (false !== ($pos = utf8_strpos($str, $needle)))
|
|---|
| 317 | {
|
|---|
| 318 | return $pos + $offset;
|
|---|
| 319 | }
|
|---|
| 320 |
|
|---|
| 321 | return false;
|
|---|
| 322 | }
|
|---|
| 323 | }
|
|---|
| 324 |
|
|---|
| 325 | /**
|
|---|
| 326 | * UTF-8 aware alternative to strtolower
|
|---|
| 327 | * Make a string lowercase
|
|---|
| 328 | * Note: The concept of a characters "case" only exists is some alphabets
|
|---|
| 329 | * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
|
|---|
| 330 | * not exist in the Chinese alphabet, for example. See Unicode Standard
|
|---|
| 331 | * Annex #21: Case Mappings
|
|---|
| 332 | *
|
|---|
| 333 | * @param string
|
|---|
| 334 | * @return string string in lowercase
|
|---|
| 335 | */
|
|---|
| 336 | function utf8_strtolower($string)
|
|---|
| 337 | {
|
|---|
| 338 | static $utf8_upper_to_lower = array(
|
|---|
| 339 | "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
|
|---|
| 340 | "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
|
|---|
| 341 | "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
|
|---|
| 342 | "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
|
|---|
| 343 | "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
|
|---|
| 344 | "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
|
|---|
| 345 | "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
|
|---|
| 346 | "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
|
|---|
| 347 | "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
|
|---|
| 348 | "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
|
|---|
| 349 | "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
|
|---|
| 350 | "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
|
|---|
| 351 | "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
|
|---|
| 352 | "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
|
|---|
| 353 | "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
|
|---|
| 354 | "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
|
|---|
| 355 | "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
|
|---|
| 356 | "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
|
|---|
| 357 | "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
|
|---|
| 358 | "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
|
|---|
| 359 | "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
|
|---|
| 360 | "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
|
|---|
| 361 | "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
|
|---|
| 362 | "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
|
|---|
| 363 | "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
|
|---|
| 364 | "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
|
|---|
| 365 | "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
|
|---|
| 366 | "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
|
|---|
| 367 | "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
|
|---|
| 368 | "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
|
|---|
| 369 | "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
|
|---|
| 370 | "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
|
|---|
| 371 | "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
|
|---|
| 372 | "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
|
|---|
| 373 | "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
|
|---|
| 374 | "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
|
|---|
| 375 | "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
|
|---|
| 376 | "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
|
|---|
| 377 | "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
|
|---|
| 378 | "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
|
|---|
| 379 | "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
|
|---|
| 380 | "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
|
|---|
| 381 | "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
|
|---|
| 382 | "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
|
|---|
| 383 | "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
|
|---|
| 384 | "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
|
|---|
| 385 | );
|
|---|
| 386 |
|
|---|
| 387 | return strtr(strtolower($string), $utf8_upper_to_lower);
|
|---|
| 388 | }
|
|---|
| 389 |
|
|---|
| 390 | /**
|
|---|
| 391 | * UTF-8 aware alternative to strtoupper
|
|---|
| 392 | * Make a string uppercase
|
|---|
| 393 | * Note: The concept of a characters "case" only exists is some alphabets
|
|---|
| 394 | * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
|
|---|
| 395 | * not exist in the Chinese alphabet, for example. See Unicode Standard
|
|---|
| 396 | * Annex #21: Case Mappings
|
|---|
| 397 | *
|
|---|
| 398 | * @param string
|
|---|
| 399 | * @return string string in uppercase
|
|---|
| 400 | */
|
|---|
| 401 | function utf8_strtoupper($string)
|
|---|
| 402 | {
|
|---|
| 403 | static $utf8_lower_to_upper = array(
|
|---|
| 404 | "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
|
|---|
| 405 | "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
|
|---|
| 406 | "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
|
|---|
| 407 | "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
|
|---|
| 408 | "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
|
|---|
| 409 | "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
|
|---|
| 410 | "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
|
|---|
| 411 | "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
|
|---|
| 412 | "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
|
|---|
| 413 | "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
|
|---|
| 414 | "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
|
|---|
| 415 | "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
|
|---|
| 416 | "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
|
|---|
| 417 | "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
|
|---|
| 418 | "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
|
|---|
| 419 | "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
|
|---|
| 420 | "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
|
|---|
| 421 | "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
|
|---|
| 422 | "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
|
|---|
| 423 | "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
|
|---|
| 424 | "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
|
|---|
| 425 | "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
|
|---|
| 426 | "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
|
|---|
| 427 | "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
|
|---|
| 428 | "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
|
|---|
| 429 | "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
|
|---|
| 430 | "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
|
|---|
| 431 | "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
|
|---|
| 432 | "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
|
|---|
| 433 | "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
|
|---|
| 434 | "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
|
|---|
| 435 | "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
|
|---|
| 436 | "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
|
|---|
| 437 | "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
|
|---|
| 438 | "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
|
|---|
| 439 | "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
|
|---|
| 440 | "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
|
|---|
| 441 | "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
|
|---|
| 442 | "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
|
|---|
| 443 | "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
|
|---|
| 444 | "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
|
|---|
| 445 | "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
|
|---|
| 446 | "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
|
|---|
| 447 | "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
|
|---|
| 448 | "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
|
|---|
| 449 | "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
|
|---|
| 450 | );
|
|---|
| 451 |
|
|---|
| 452 | return strtr(strtoupper($string), $utf8_lower_to_upper);
|
|---|
| 453 | }
|
|---|
| 454 |
|
|---|
| 455 | /**
|
|---|
| 456 | * UTF-8 aware alternative to substr
|
|---|
| 457 | * Return part of a string given character offset (and optionally length)
|
|---|
| 458 | *
|
|---|
| 459 | * Note arguments: comparied to substr - if offset or length are
|
|---|
| 460 | * not integers, this version will not complain but rather massages them
|
|---|
| 461 | * into an integer.
|
|---|
| 462 | *
|
|---|
| 463 | * Note on returned values: substr documentation states false can be
|
|---|
| 464 | * returned in some cases (e.g. offset > string length)
|
|---|
| 465 | * mb_substr never returns false, it will return an empty string instead.
|
|---|
| 466 | * This adopts the mb_substr approach
|
|---|
| 467 | *
|
|---|
| 468 | * Note on implementation: PCRE only supports repetitions of less than
|
|---|
| 469 | * 65536, in order to accept up to MAXINT values for offset and length,
|
|---|
| 470 | * we'll repeat a group of 65535 characters when needed.
|
|---|
| 471 | *
|
|---|
| 472 | * Note on implementation: calculating the number of characters in the
|
|---|
| 473 | * string is a relatively expensive operation, so we only carry it out when
|
|---|
| 474 | * necessary. It isn't necessary for +ve offsets and no specified length
|
|---|
| 475 | *
|
|---|
| 476 | * @author Chris Smith<chris@jalakai.co.uk>
|
|---|
| 477 | * @param string $str
|
|---|
| 478 | * @param integer $offset number of UTF-8 characters offset (from left)
|
|---|
| 479 | * @param integer $length (optional) length in UTF-8 characters from offset
|
|---|
| 480 | * @return mixed string or FALSE if failure
|
|---|
| 481 | */
|
|---|
| 482 | function utf8_substr($str, $offset, $length = NULL)
|
|---|
| 483 | {
|
|---|
| 484 | // generates E_NOTICE
|
|---|
| 485 | // for PHP4 objects, but not PHP5 objects
|
|---|
| 486 | $str = (string) $str;
|
|---|
| 487 | $offset = (int) $offset;
|
|---|
| 488 | if (!is_null($length))
|
|---|
| 489 | {
|
|---|
| 490 | $length = (int) $length;
|
|---|
| 491 | }
|
|---|
| 492 |
|
|---|
| 493 | // handle trivial cases
|
|---|
| 494 | if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
|
|---|
| 495 | {
|
|---|
| 496 | return '';
|
|---|
| 497 | }
|
|---|
| 498 |
|
|---|
| 499 | // normalise negative offsets (we could use a tail
|
|---|
| 500 | // anchored pattern, but they are horribly slow!)
|
|---|
| 501 | if ($offset < 0)
|
|---|
| 502 | {
|
|---|
| 503 | // see notes
|
|---|
| 504 | $strlen = utf8_strlen($str);
|
|---|
| 505 | $offset = $strlen + $offset;
|
|---|
| 506 | if ($offset < 0)
|
|---|
| 507 | {
|
|---|
| 508 | $offset = 0;
|
|---|
| 509 | }
|
|---|
| 510 | }
|
|---|
| 511 |
|
|---|
| 512 | $op = '';
|
|---|
| 513 | $lp = '';
|
|---|
| 514 |
|
|---|
| 515 | // establish a pattern for offset, a
|
|---|
| 516 | // non-captured group equal in length to offset
|
|---|
| 517 | if ($offset > 0)
|
|---|
| 518 | {
|
|---|
| 519 | $ox = (int) ($offset / 65535);
|
|---|
| 520 | $oy = $offset % 65535;
|
|---|
| 521 |
|
|---|
| 522 | if ($ox)
|
|---|
| 523 | {
|
|---|
| 524 | $op = '(?:.{65535}){' . $ox . '}';
|
|---|
| 525 | }
|
|---|
| 526 |
|
|---|
| 527 | $op = '^(?:' . $op . '.{' . $oy . '})';
|
|---|
| 528 | }
|
|---|
| 529 | else
|
|---|
| 530 | {
|
|---|
| 531 | // offset == 0; just anchor the pattern
|
|---|
| 532 | $op = '^';
|
|---|
| 533 | }
|
|---|
| 534 |
|
|---|
| 535 | // establish a pattern for length
|
|---|
| 536 | if (is_null($length))
|
|---|
| 537 | {
|
|---|
| 538 | // the rest of the string
|
|---|
| 539 | $lp = '(.*)$';
|
|---|
| 540 | }
|
|---|
| 541 | else
|
|---|
| 542 | {
|
|---|
| 543 | if (!isset($strlen))
|
|---|
| 544 | {
|
|---|
| 545 | // see notes
|
|---|
| 546 | $strlen = utf8_strlen($str);
|
|---|
| 547 | }
|
|---|
| 548 |
|
|---|
| 549 | // another trivial case
|
|---|
| 550 | if ($offset > $strlen)
|
|---|
| 551 | {
|
|---|
| 552 | return '';
|
|---|
| 553 | }
|
|---|
| 554 |
|
|---|
| 555 | if ($length > 0)
|
|---|
| 556 | {
|
|---|
| 557 | // reduce any length that would
|
|---|
| 558 | // go passed the end of the string
|
|---|
| 559 | $length = min($strlen - $offset, $length);
|
|---|
| 560 |
|
|---|
| 561 | $lx = (int) ($length / 65535);
|
|---|
| 562 | $ly = $length % 65535;
|
|---|
| 563 |
|
|---|
| 564 | // negative length requires a captured group
|
|---|
| 565 | // of length characters
|
|---|
| 566 | if ($lx)
|
|---|
| 567 | {
|
|---|
| 568 | $lp = '(?:.{65535}){' . $lx . '}';
|
|---|
| 569 | }
|
|---|
| 570 | $lp = '(' . $lp . '.{'. $ly . '})';
|
|---|
| 571 | }
|
|---|
| 572 | else if ($length < 0)
|
|---|
| 573 | {
|
|---|
| 574 | if ($length < ($offset - $strlen))
|
|---|
| 575 | {
|
|---|
| 576 | return '';
|
|---|
| 577 | }
|
|---|
| 578 |
|
|---|
| 579 | $lx = (int)((-$length) / 65535);
|
|---|
| 580 | $ly = (-$length) % 65535;
|
|---|
| 581 |
|
|---|
| 582 | // negative length requires ... capture everything
|
|---|
| 583 | // except a group of -length characters
|
|---|
| 584 | // anchored at the tail-end of the string
|
|---|
| 585 | if ($lx)
|
|---|
| 586 | {
|
|---|
| 587 | $lp = '(?:.{65535}){' . $lx . '}';
|
|---|
| 588 | }
|
|---|
| 589 | $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
|
|---|
| 590 | }
|
|---|
| 591 | }
|
|---|
| 592 |
|
|---|
| 593 | if (!preg_match('#' . $op . $lp . '#us', $str, $match))
|
|---|
| 594 | {
|
|---|
| 595 | return '';
|
|---|
| 596 | }
|
|---|
| 597 |
|
|---|
| 598 | return $match[1];
|
|---|
| 599 | }
|
|---|
| 600 |
|
|---|
| 601 | /**
|
|---|
| 602 | * Return the length (in characters) of a UTF-8 string
|
|---|
| 603 | *
|
|---|
| 604 | * @param string $text UTF-8 string
|
|---|
| 605 | * @return integer Length (in chars) of given string
|
|---|
| 606 | */
|
|---|
| 607 | function utf8_strlen($text)
|
|---|
| 608 | {
|
|---|
| 609 | // Since utf8_decode is replacing multibyte characters to ? strlen works fine
|
|---|
| 610 | return strlen(utf8_decode($text));
|
|---|
| 611 | }
|
|---|
| 612 | }
|
|---|
| 613 |
|
|---|
| 614 | /**
|
|---|
| 615 | * UTF-8 aware alternative to str_split
|
|---|
| 616 | * Convert a string to an array
|
|---|
| 617 | *
|
|---|
| 618 | * @author Harry Fuecks
|
|---|
| 619 | * @param string $str UTF-8 encoded
|
|---|
| 620 | * @param int $split_len number to characters to split string by
|
|---|
| 621 | * @return array characters in string reverses
|
|---|
| 622 | */
|
|---|
| 623 | function utf8_str_split($str, $split_len = 1)
|
|---|
| 624 | {
|
|---|
| 625 | if (!is_int($split_len) || $split_len < 1)
|
|---|
| 626 | {
|
|---|
| 627 | return false;
|
|---|
| 628 | }
|
|---|
| 629 |
|
|---|
| 630 | $len = utf8_strlen($str);
|
|---|
| 631 | if ($len <= $split_len)
|
|---|
| 632 | {
|
|---|
| 633 | return array($str);
|
|---|
| 634 | }
|
|---|
| 635 |
|
|---|
| 636 | preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
|
|---|
| 637 | return $ar[0];
|
|---|
| 638 | }
|
|---|
| 639 |
|
|---|
| 640 | /**
|
|---|
| 641 | * UTF-8 aware alternative to strspn
|
|---|
| 642 | * Find length of initial segment matching the mask
|
|---|
| 643 | *
|
|---|
| 644 | * @author Harry Fuecks
|
|---|
| 645 | */
|
|---|
| 646 | function utf8_strspn($str, $mask, $start = null, $length = null)
|
|---|
| 647 | {
|
|---|
| 648 | if ($start !== null || $length !== null)
|
|---|
| 649 | {
|
|---|
| 650 | $str = utf8_substr($str, $start, $length);
|
|---|
| 651 | }
|
|---|
| 652 |
|
|---|
| 653 | preg_match('/^[' . $mask . ']+/u', $str, $matches);
|
|---|
| 654 |
|
|---|
| 655 | if (isset($matches[0]))
|
|---|
| 656 | {
|
|---|
| 657 | return utf8_strlen($matches[0]);
|
|---|
| 658 | }
|
|---|
| 659 |
|
|---|
| 660 | return 0;
|
|---|
| 661 | }
|
|---|
| 662 |
|
|---|
| 663 | /**
|
|---|
| 664 | * UTF-8 aware alternative to ucfirst
|
|---|
| 665 | * Make a string's first character uppercase
|
|---|
| 666 | *
|
|---|
| 667 | * @author Harry Fuecks
|
|---|
| 668 | * @param string
|
|---|
| 669 | * @return string with first character as upper case (if applicable)
|
|---|
| 670 | */
|
|---|
| 671 | function utf8_ucfirst($str)
|
|---|
| 672 | {
|
|---|
| 673 | switch (utf8_strlen($str))
|
|---|
| 674 | {
|
|---|
| 675 | case 0:
|
|---|
| 676 | return '';
|
|---|
| 677 | break;
|
|---|
| 678 |
|
|---|
| 679 | case 1:
|
|---|
| 680 | return utf8_strtoupper($str);
|
|---|
| 681 | break;
|
|---|
| 682 |
|
|---|
| 683 | default:
|
|---|
| 684 | preg_match('/^(.{1})(.*)$/us', $str, $matches);
|
|---|
| 685 | return utf8_strtoupper($matches[1]) . $matches[2];
|
|---|
| 686 | break;
|
|---|
| 687 | }
|
|---|
| 688 | }
|
|---|
| 689 |
|
|---|
| 690 | /**
|
|---|
| 691 | * Recode a string to UTF-8
|
|---|
| 692 | *
|
|---|
| 693 | * If the encoding is not supported, the string is returned as-is
|
|---|
| 694 | *
|
|---|
| 695 | * @param string $string Original string
|
|---|
| 696 | * @param string $encoding Original encoding (lowered)
|
|---|
| 697 | * @return string The string, encoded in UTF-8
|
|---|
| 698 | */
|
|---|
| 699 | function utf8_recode($string, $encoding)
|
|---|
| 700 | {
|
|---|
| 701 | $encoding = strtolower($encoding);
|
|---|
| 702 |
|
|---|
| 703 | if ($encoding == 'utf-8' || !is_string($string) || empty($string))
|
|---|
| 704 | {
|
|---|
| 705 | return $string;
|
|---|
| 706 | }
|
|---|
| 707 |
|
|---|
| 708 | // we force iso-8859-1 to be cp1252
|
|---|
| 709 | if ($encoding == 'iso-8859-1')
|
|---|
| 710 | {
|
|---|
| 711 | $encoding = 'cp1252';
|
|---|
| 712 | }
|
|---|
| 713 | // convert iso-8859-8-i to iso-8859-8
|
|---|
| 714 | else if ($encoding == 'iso-8859-8-i')
|
|---|
| 715 | {
|
|---|
| 716 | $encoding = 'iso-8859-8';
|
|---|
| 717 | $string = hebrev($string);
|
|---|
| 718 | }
|
|---|
| 719 |
|
|---|
| 720 | // First, try iconv()
|
|---|
| 721 | if (function_exists('iconv'))
|
|---|
| 722 | {
|
|---|
| 723 | $ret = @iconv($encoding, 'utf-8', $string);
|
|---|
| 724 |
|
|---|
| 725 | if (!empty($ret))
|
|---|
| 726 | {
|
|---|
| 727 | return $ret;
|
|---|
| 728 | }
|
|---|
| 729 | }
|
|---|
| 730 |
|
|---|
| 731 | // Try the mb_string extension
|
|---|
| 732 | if (function_exists('mb_convert_encoding'))
|
|---|
| 733 | {
|
|---|
| 734 | // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
|
|---|
| 735 | switch ($encoding)
|
|---|
| 736 | {
|
|---|
| 737 | case 'iso-8859-1':
|
|---|
| 738 | case 'iso-8859-2':
|
|---|
| 739 | case 'iso-8859-4':
|
|---|
| 740 | case 'iso-8859-7':
|
|---|
| 741 | case 'iso-8859-9':
|
|---|
| 742 | case 'iso-8859-15':
|
|---|
| 743 | case 'windows-1251':
|
|---|
| 744 | case 'windows-1252':
|
|---|
| 745 | case 'cp1252':
|
|---|
| 746 | case 'shift_jis':
|
|---|
| 747 | case 'euc-kr':
|
|---|
| 748 | case 'big5':
|
|---|
| 749 | case 'gb2312':
|
|---|
| 750 | $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
|
|---|
| 751 |
|
|---|
| 752 | if (!empty($ret))
|
|---|
| 753 | {
|
|---|
| 754 | return $ret;
|
|---|
| 755 | }
|
|---|
| 756 | }
|
|---|
| 757 | }
|
|---|
| 758 |
|
|---|
| 759 | // Try the recode extension
|
|---|
| 760 | if (function_exists('recode_string'))
|
|---|
| 761 | {
|
|---|
| 762 | $ret = @recode_string($encoding . '..utf-8', $string);
|
|---|
| 763 |
|
|---|
| 764 | if (!empty($ret))
|
|---|
| 765 | {
|
|---|
| 766 | return $ret;
|
|---|
| 767 | }
|
|---|
| 768 | }
|
|---|
| 769 |
|
|---|
| 770 | // If nothing works, check if we have a custom transcoder available
|
|---|
| 771 | if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
|
|---|
| 772 | {
|
|---|
| 773 | // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
|
|---|
| 774 | trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
|
|---|
| 775 | }
|
|---|
| 776 |
|
|---|
| 777 | global $phpbb_root_path, $phpEx;
|
|---|
| 778 |
|
|---|
| 779 | // iso-8859-* character encoding
|
|---|
| 780 | if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
|
|---|
| 781 | {
|
|---|
| 782 | switch ($array[1])
|
|---|
| 783 | {
|
|---|
| 784 | case '1':
|
|---|
| 785 | case '2':
|
|---|
| 786 | case '4':
|
|---|
| 787 | case '7':
|
|---|
| 788 | case '8':
|
|---|
| 789 | case '9':
|
|---|
| 790 | case '15':
|
|---|
| 791 | if (!function_exists('iso_8859_' . $array[1]))
|
|---|
| 792 | {
|
|---|
| 793 | if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
|
|---|
| 794 | {
|
|---|
| 795 | trigger_error('Basic reencoder file is missing', E_USER_ERROR);
|
|---|
| 796 | }
|
|---|
| 797 | include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
|
|---|
| 798 | }
|
|---|
| 799 | return call_user_func('iso_8859_' . $array[1], $string);
|
|---|
| 800 | break;
|
|---|
| 801 |
|
|---|
| 802 | default:
|
|---|
| 803 | trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
|
|---|
| 804 | break;
|
|---|
| 805 | }
|
|---|
| 806 | }
|
|---|
| 807 |
|
|---|
| 808 | // CP/WIN character encoding
|
|---|
| 809 | if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
|
|---|
| 810 | {
|
|---|
| 811 | switch ($array[1])
|
|---|
| 812 | {
|
|---|
| 813 | case '932':
|
|---|
| 814 | break;
|
|---|
| 815 | case '1250':
|
|---|
| 816 | case '1251':
|
|---|
| 817 | case '1252':
|
|---|
| 818 | case '1254':
|
|---|
| 819 | case '1255':
|
|---|
| 820 | case '1256':
|
|---|
| 821 | case '1257':
|
|---|
| 822 | case '874':
|
|---|
| 823 | if (!function_exists('cp' . $array[1]))
|
|---|
| 824 | {
|
|---|
| 825 | if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
|
|---|
| 826 | {
|
|---|
| 827 | trigger_error('Basic reencoder file is missing', E_USER_ERROR);
|
|---|
| 828 | }
|
|---|
| 829 | include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
|
|---|
| 830 | }
|
|---|
| 831 | return call_user_func('cp' . $array[1], $string);
|
|---|
| 832 | break;
|
|---|
| 833 |
|
|---|
| 834 | default:
|
|---|
| 835 | trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
|
|---|
| 836 | break;
|
|---|
| 837 | }
|
|---|
| 838 | }
|
|---|
| 839 |
|
|---|
| 840 | // TIS-620
|
|---|
| 841 | if (preg_match('/tis[_ -]?620/', $encoding))
|
|---|
| 842 | {
|
|---|
| 843 | if (!function_exists('tis_620'))
|
|---|
| 844 | {
|
|---|
| 845 | if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
|
|---|
| 846 | {
|
|---|
| 847 | trigger_error('Basic reencoder file is missing', E_USER_ERROR);
|
|---|
| 848 | }
|
|---|
| 849 | include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
|
|---|
| 850 | }
|
|---|
| 851 | return tis_620($string);
|
|---|
| 852 | }
|
|---|
| 853 |
|
|---|
| 854 | // SJIS
|
|---|
| 855 | if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
|
|---|
| 856 | {
|
|---|
| 857 | if (!function_exists('sjis'))
|
|---|
| 858 | {
|
|---|
| 859 | if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
|
|---|
| 860 | {
|
|---|
| 861 | trigger_error('CJK reencoder file is missing', E_USER_ERROR);
|
|---|
| 862 | }
|
|---|
| 863 | include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
|
|---|
| 864 | }
|
|---|
| 865 | return sjis($string);
|
|---|
| 866 | }
|
|---|
| 867 |
|
|---|
| 868 | // EUC_KR
|
|---|
| 869 | if (preg_match('/euc[_ -]?kr/', $encoding))
|
|---|
| 870 | {
|
|---|
| 871 | if (!function_exists('euc_kr'))
|
|---|
| 872 | {
|
|---|
| 873 | if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
|
|---|
| 874 | {
|
|---|
| 875 | trigger_error('CJK reencoder file is missing', E_USER_ERROR);
|
|---|
| 876 | }
|
|---|
| 877 | include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
|
|---|
| 878 | }
|
|---|
| 879 | return euc_kr($string);
|
|---|
| 880 | }
|
|---|
| 881 |
|
|---|
| 882 | // BIG-5
|
|---|
| 883 | if (preg_match('/big[_ -]?5/', $encoding))
|
|---|
| 884 | {
|
|---|
| 885 | if (!function_exists('big5'))
|
|---|
| 886 | {
|
|---|
| 887 | if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
|
|---|
| 888 | {
|
|---|
| 889 | trigger_error('CJK reencoder file is missing', E_USER_ERROR);
|
|---|
| 890 | }
|
|---|
| 891 | include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
|
|---|
| 892 | }
|
|---|
| 893 | return big5($string);
|
|---|
| 894 | }
|
|---|
| 895 |
|
|---|
| 896 | // GB2312
|
|---|
| 897 | if (preg_match('/gb[_ -]?2312/', $encoding))
|
|---|
| 898 | {
|
|---|
| 899 | if (!function_exists('gb2312'))
|
|---|
| 900 | {
|
|---|
| 901 | if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
|
|---|
| 902 | {
|
|---|
| 903 | trigger_error('CJK reencoder file is missing', E_USER_ERROR);
|
|---|
| 904 | }
|
|---|
| 905 | include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
|
|---|
| 906 | }
|
|---|
| 907 | return gb2312($string);
|
|---|
| 908 | }
|
|---|
| 909 |
|
|---|
| 910 | // Trigger an error?! Fow now just give bad data :-(
|
|---|
| 911 | trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
|
|---|
| 912 | //return $string; // use utf_normalizer::cleanup() ?
|
|---|
| 913 | }
|
|---|
| 914 |
|
|---|
| 915 | /**
|
|---|
| 916 | * Replace all UTF-8 chars that are not in ASCII with their NCR
|
|---|
| 917 | *
|
|---|
| 918 | * @param string $text UTF-8 string in NFC
|
|---|
| 919 | * @return string ASCII string using NCRs for non-ASCII chars
|
|---|
| 920 | */
|
|---|
| 921 | function utf8_encode_ncr($text)
|
|---|
| 922 | {
|
|---|
| 923 | return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
|
|---|
| 924 | }
|
|---|
| 925 |
|
|---|
| 926 | /**
|
|---|
| 927 | * Callback used in encode_ncr()
|
|---|
| 928 | *
|
|---|
| 929 | * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
|
|---|
| 930 | *
|
|---|
| 931 | * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
|
|---|
| 932 | * @return string A HTML NCR if the character is valid, or the original string otherwise
|
|---|
| 933 | */
|
|---|
| 934 | function utf8_encode_ncr_callback($m)
|
|---|
| 935 | {
|
|---|
| 936 | return '&#' . utf8_ord($m[0]) . ';';
|
|---|
| 937 | }
|
|---|
| 938 |
|
|---|
| 939 | /**
|
|---|
| 940 | * Converts a UTF-8 char to an NCR
|
|---|
| 941 | *
|
|---|
| 942 | * @param string $chr UTF-8 char
|
|---|
| 943 | * @return integer UNICODE code point
|
|---|
| 944 | */
|
|---|
| 945 | function utf8_ord($chr)
|
|---|
| 946 | {
|
|---|
| 947 | switch (strlen($chr))
|
|---|
| 948 | {
|
|---|
| 949 | case 1:
|
|---|
| 950 | return ord($chr);
|
|---|
| 951 | break;
|
|---|
| 952 |
|
|---|
| 953 | case 2:
|
|---|
| 954 | return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
|
|---|
| 955 | break;
|
|---|
| 956 |
|
|---|
| 957 | case 3:
|
|---|
| 958 | return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
|
|---|
| 959 | break;
|
|---|
| 960 |
|
|---|
| 961 | case 4:
|
|---|
| 962 | return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
|
|---|
| 963 | break;
|
|---|
| 964 |
|
|---|
| 965 | default:
|
|---|
| 966 | return $chr;
|
|---|
| 967 | }
|
|---|
| 968 | }
|
|---|
| 969 |
|
|---|
| 970 | /**
|
|---|
| 971 | * Converts an NCR to a UTF-8 char
|
|---|
| 972 | *
|
|---|
| 973 | * @param int $cp UNICODE code point
|
|---|
| 974 | * @return string UTF-8 char
|
|---|
| 975 | */
|
|---|
| 976 | function utf8_chr($cp)
|
|---|
| 977 | {
|
|---|
| 978 | if ($cp > 0xFFFF)
|
|---|
| 979 | {
|
|---|
| 980 | return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
|
|---|
| 981 | }
|
|---|
| 982 | else if ($cp > 0x7FF)
|
|---|
| 983 | {
|
|---|
| 984 | return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
|
|---|
| 985 | }
|
|---|
| 986 | else if ($cp > 0x7F)
|
|---|
| 987 | {
|
|---|
| 988 | return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
|
|---|
| 989 | }
|
|---|
| 990 | else
|
|---|
| 991 | {
|
|---|
| 992 | return chr($cp);
|
|---|
| 993 | }
|
|---|
| 994 | }
|
|---|
| 995 |
|
|---|
| 996 | /**
|
|---|
| 997 | * Convert Numeric Character References to UTF-8 chars
|
|---|
| 998 | *
|
|---|
| 999 | * Notes:
|
|---|
| 1000 | * - we do not convert NCRs recursively, if you pass &#38; it will return &
|
|---|
| 1001 | * - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
|
|---|
| 1002 | *
|
|---|
| 1003 | * @param string $text String to convert, encoded in UTF-8 (no normal form required)
|
|---|
| 1004 | * @return string UTF-8 string where NCRs have been replaced with the actual chars
|
|---|
| 1005 | */
|
|---|
| 1006 | function utf8_decode_ncr($text)
|
|---|
| 1007 | {
|
|---|
| 1008 | return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
|
|---|
| 1009 | }
|
|---|
| 1010 |
|
|---|
| 1011 | /**
|
|---|
| 1012 | * Callback used in decode_ncr()
|
|---|
| 1013 | *
|
|---|
| 1014 | * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
|
|---|
| 1015 | * It will ignore most of invalid NCRs, but not all!
|
|---|
| 1016 | *
|
|---|
| 1017 | * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
|
|---|
| 1018 | * @return string UTF-8 char
|
|---|
| 1019 | */
|
|---|
| 1020 | function utf8_decode_ncr_callback($m)
|
|---|
| 1021 | {
|
|---|
| 1022 | $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
|
|---|
| 1023 |
|
|---|
| 1024 | return utf8_chr($cp);
|
|---|
| 1025 | }
|
|---|
| 1026 |
|
|---|
| 1027 | /**
|
|---|
| 1028 | * Case folds a unicode string as per Unicode 5.0, section 3.13
|
|---|
| 1029 | *
|
|---|
| 1030 | * @param string $text text to be case folded
|
|---|
| 1031 | * @param string $option determines how we will fold the cases
|
|---|
| 1032 | * @return string case folded text
|
|---|
| 1033 | */
|
|---|
| 1034 | function utf8_case_fold($text, $option = 'full')
|
|---|
| 1035 | {
|
|---|
| 1036 | static $uniarray = array();
|
|---|
| 1037 | global $phpbb_root_path, $phpEx;
|
|---|
| 1038 |
|
|---|
| 1039 | // common is always set
|
|---|
| 1040 | if (!isset($uniarray['c']))
|
|---|
| 1041 | {
|
|---|
| 1042 | $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
|
|---|
| 1043 | }
|
|---|
| 1044 |
|
|---|
| 1045 | // only set full if we need to
|
|---|
| 1046 | if ($option === 'full' && !isset($uniarray['f']))
|
|---|
| 1047 | {
|
|---|
| 1048 | $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
|
|---|
| 1049 | }
|
|---|
| 1050 |
|
|---|
| 1051 | // only set simple if we need to
|
|---|
| 1052 | if ($option !== 'full' && !isset($uniarray['s']))
|
|---|
| 1053 | {
|
|---|
| 1054 | $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
|
|---|
| 1055 | }
|
|---|
| 1056 |
|
|---|
| 1057 | // common is always replaced
|
|---|
| 1058 | $text = strtr($text, $uniarray['c']);
|
|---|
| 1059 |
|
|---|
| 1060 | if ($option === 'full')
|
|---|
| 1061 | {
|
|---|
| 1062 | // full replaces a character with multiple characters
|
|---|
| 1063 | $text = strtr($text, $uniarray['f']);
|
|---|
| 1064 | }
|
|---|
| 1065 | else
|
|---|
| 1066 | {
|
|---|
| 1067 | // simple replaces a character with another character
|
|---|
| 1068 | $text = strtr($text, $uniarray['s']);
|
|---|
| 1069 | }
|
|---|
| 1070 |
|
|---|
| 1071 | return $text;
|
|---|
| 1072 | }
|
|---|
| 1073 |
|
|---|
| 1074 | /**
|
|---|
| 1075 | * Takes the input and does a "special" case fold. It does minor normalization
|
|---|
| 1076 | * and returns NFKC compatable text
|
|---|
| 1077 | *
|
|---|
| 1078 | * @param string $text text to be case folded
|
|---|
| 1079 | * @param string $option determines how we will fold the cases
|
|---|
| 1080 | * @return string case folded text
|
|---|
| 1081 | */
|
|---|
| 1082 | function utf8_case_fold_nfkc($text, $option = 'full')
|
|---|
| 1083 | {
|
|---|
| 1084 | static $fc_nfkc_closure = array(
|
|---|
| 1085 | "\xCD\xBA" => "\x20\xCE\xB9",
|
|---|
| 1086 | "\xCF\x92" => "\xCF\x85",
|
|---|
| 1087 | "\xCF\x93" => "\xCF\x8D",
|
|---|
| 1088 | "\xCF\x94" => "\xCF\x8B",
|
|---|
| 1089 | "\xCF\xB2" => "\xCF\x83",
|
|---|
| 1090 | "\xCF\xB9" => "\xCF\x83",
|
|---|
| 1091 | "\xE1\xB4\xAC" => "\x61",
|
|---|
| 1092 | "\xE1\xB4\xAD" => "\xC3\xA6",
|
|---|
| 1093 | "\xE1\xB4\xAE" => "\x62",
|
|---|
| 1094 | "\xE1\xB4\xB0" => "\x64",
|
|---|
| 1095 | "\xE1\xB4\xB1" => "\x65",
|
|---|
| 1096 | "\xE1\xB4\xB2" => "\xC7\x9D",
|
|---|
| 1097 | "\xE1\xB4\xB3" => "\x67",
|
|---|
| 1098 | "\xE1\xB4\xB4" => "\x68",
|
|---|
| 1099 | "\xE1\xB4\xB5" => "\x69",
|
|---|
| 1100 | "\xE1\xB4\xB6" => "\x6A",
|
|---|
| 1101 | "\xE1\xB4\xB7" => "\x6B",
|
|---|
| 1102 | "\xE1\xB4\xB8" => "\x6C",
|
|---|
| 1103 | "\xE1\xB4\xB9" => "\x6D",
|
|---|
| 1104 | "\xE1\xB4\xBA" => "\x6E",
|
|---|
| 1105 | "\xE1\xB4\xBC" => "\x6F",
|
|---|
| 1106 | "\xE1\xB4\xBD" => "\xC8\xA3",
|
|---|
| 1107 | "\xE1\xB4\xBE" => "\x70",
|
|---|
| 1108 | "\xE1\xB4\xBF" => "\x72",
|
|---|
| 1109 | "\xE1\xB5\x80" => "\x74",
|
|---|
| 1110 | "\xE1\xB5\x81" => "\x75",
|
|---|
| 1111 | "\xE1\xB5\x82" => "\x77",
|
|---|
| 1112 | "\xE2\x82\xA8" => "\x72\x73",
|
|---|
| 1113 | "\xE2\x84\x82" => "\x63",
|
|---|
| 1114 | "\xE2\x84\x83" => "\xC2\xB0\x63",
|
|---|
| 1115 | "\xE2\x84\x87" => "\xC9\x9B",
|
|---|
| 1116 | "\xE2\x84\x89" => "\xC2\xB0\x66",
|
|---|
| 1117 | "\xE2\x84\x8B" => "\x68",
|
|---|
| 1118 | "\xE2\x84\x8C" => "\x68",
|
|---|
| 1119 | "\xE2\x84\x8D" => "\x68",
|
|---|
| 1120 | "\xE2\x84\x90" => "\x69",
|
|---|
| 1121 | "\xE2\x84\x91" => "\x69",
|
|---|
| 1122 | "\xE2\x84\x92" => "\x6C",
|
|---|
| 1123 | "\xE2\x84\x95" => "\x6E",
|
|---|
| 1124 | "\xE2\x84\x96" => "\x6E\x6F",
|
|---|
| 1125 | "\xE2\x84\x99" => "\x70",
|
|---|
| 1126 | "\xE2\x84\x9A" => "\x71",
|
|---|
| 1127 | "\xE2\x84\x9B" => "\x72",
|
|---|
| 1128 | "\xE2\x84\x9C" => "\x72",
|
|---|
| 1129 | "\xE2\x84\x9D" => "\x72",
|
|---|
| 1130 | "\xE2\x84\xA0" => "\x73\x6D",
|
|---|
| 1131 | "\xE2\x84\xA1" => "\x74\x65\x6C",
|
|---|
| 1132 | "\xE2\x84\xA2" => "\x74\x6D",
|
|---|
| 1133 | "\xE2\x84\xA4" => "\x7A",
|
|---|
| 1134 | "\xE2\x84\xA8" => "\x7A",
|
|---|
| 1135 | "\xE2\x84\xAC" => "\x62",
|
|---|
| 1136 | "\xE2\x84\xAD" => "\x63",
|
|---|
| 1137 | "\xE2\x84\xB0" => "\x65",
|
|---|
| 1138 | "\xE2\x84\xB1" => "\x66",
|
|---|
| 1139 | "\xE2\x84\xB3" => "\x6D",
|
|---|
| 1140 | "\xE2\x84\xBB" => "\x66\x61\x78",
|
|---|
| 1141 | "\xE2\x84\xBE" => "\xCE\xB3",
|
|---|
| 1142 | "\xE2\x84\xBF" => "\xCF\x80",
|
|---|
| 1143 | "\xE2\x85\x85" => "\x64",
|
|---|
| 1144 | "\xE3\x89\x90" => "\x70\x74\x65",
|
|---|
| 1145 | "\xE3\x8B\x8C" => "\x68\x67",
|
|---|
| 1146 | "\xE3\x8B\x8E" => "\x65\x76",
|
|---|
| 1147 | "\xE3\x8B\x8F" => "\x6C\x74\x64",
|
|---|
| 1148 | "\xE3\x8D\xB1" => "\x68\x70\x61",
|
|---|
| 1149 | "\xE3\x8D\xB3" => "\x61\x75",
|
|---|
| 1150 | "\xE3\x8D\xB5" => "\x6F\x76",
|
|---|
| 1151 | "\xE3\x8D\xBA" => "\x69\x75",
|
|---|
| 1152 | "\xE3\x8E\x80" => "\x70\x61",
|
|---|
| 1153 | "\xE3\x8E\x81" => "\x6E\x61",
|
|---|
| 1154 | "\xE3\x8E\x82" => "\xCE\xBC\x61",
|
|---|
| 1155 | "\xE3\x8E\x83" => "\x6D\x61",
|
|---|
| 1156 | "\xE3\x8E\x84" => "\x6B\x61",
|
|---|
| 1157 | "\xE3\x8E\x85" => "\x6B\x62",
|
|---|
| 1158 | "\xE3\x8E\x86" => "\x6D\x62",
|
|---|
| 1159 | "\xE3\x8E\x87" => "\x67\x62",
|
|---|
| 1160 | "\xE3\x8E\x8A" => "\x70\x66",
|
|---|
| 1161 | "\xE3\x8E\x8B" => "\x6E\x66",
|
|---|
| 1162 | "\xE3\x8E\x8C" => "\xCE\xBC\x66",
|
|---|
| 1163 | "\xE3\x8E\x90" => "\x68\x7A",
|
|---|
| 1164 | "\xE3\x8E\x91" => "\x6B\x68\x7A",
|
|---|
| 1165 | "\xE3\x8E\x92" => "\x6D\x68\x7A",
|
|---|
| 1166 | "\xE3\x8E\x93" => "\x67\x68\x7A",
|
|---|
| 1167 | "\xE3\x8E\x94" => "\x74\x68\x7A",
|
|---|
| 1168 | "\xE3\x8E\xA9" => "\x70\x61",
|
|---|
| 1169 | "\xE3\x8E\xAA" => "\x6B\x70\x61",
|
|---|
| 1170 | "\xE3\x8E\xAB" => "\x6D\x70\x61",
|
|---|
| 1171 | "\xE3\x8E\xAC" => "\x67\x70\x61",
|
|---|
| 1172 | "\xE3\x8E\xB4" => "\x70\x76",
|
|---|
| 1173 | "\xE3\x8E\xB5" => "\x6E\x76",
|
|---|
| 1174 | "\xE3\x8E\xB6" => "\xCE\xBC\x76",
|
|---|
| 1175 | "\xE3\x8E\xB7" => "\x6D\x76",
|
|---|
| 1176 | "\xE3\x8E\xB8" => "\x6B\x76",
|
|---|
| 1177 | "\xE3\x8E\xB9" => "\x6D\x76",
|
|---|
| 1178 | "\xE3\x8E\xBA" => "\x70\x77",
|
|---|
| 1179 | "\xE3\x8E\xBB" => "\x6E\x77",
|
|---|
| 1180 | "\xE3\x8E\xBC" => "\xCE\xBC\x77",
|
|---|
| 1181 | "\xE3\x8E\xBD" => "\x6D\x77",
|
|---|
| 1182 | "\xE3\x8E\xBE" => "\x6B\x77",
|
|---|
| 1183 | "\xE3\x8E\xBF" => "\x6D\x77",
|
|---|
| 1184 | "\xE3\x8F\x80" => "\x6B\xCF\x89",
|
|---|
| 1185 | "\xE3\x8F\x81" => "\x6D\xCF\x89",
|
|---|
| 1186 | "\xE3\x8F\x83" => "\x62\x71",
|
|---|
| 1187 | "\xE3\x8F\x86" => "\x63\xE2\x88\x95\x6B\x67",
|
|---|
| 1188 | "\xE3\x8F\x87" => "\x63\x6F\x2E",
|
|---|
| 1189 | "\xE3\x8F\x88" => "\x64\x62",
|
|---|
| 1190 | "\xE3\x8F\x89" => "\x67\x79",
|
|---|
| 1191 | "\xE3\x8F\x8B" => "\x68\x70",
|
|---|
| 1192 | "\xE3\x8F\x8D" => "\x6B\x6B",
|
|---|
| 1193 | "\xE3\x8F\x8E" => "\x6B\x6D",
|
|---|
| 1194 | "\xE3\x8F\x97" => "\x70\x68",
|
|---|
| 1195 | "\xE3\x8F\x99" => "\x70\x70\x6D",
|
|---|
| 1196 | "\xE3\x8F\x9A" => "\x70\x72",
|
|---|
| 1197 | "\xE3\x8F\x9C" => "\x73\x76",
|
|---|
| 1198 | "\xE3\x8F\x9D" => "\x77\x62",
|
|---|
| 1199 | "\xE3\x8F\x9E" => "\x76\xE2\x88\x95\x6D",
|
|---|
| 1200 | "\xE3\x8F\x9F" => "\x61\xE2\x88\x95\x6D",
|
|---|
| 1201 | "\xF0\x9D\x90\x80" => "\x61",
|
|---|
| 1202 | "\xF0\x9D\x90\x81" => "\x62",
|
|---|
| 1203 | "\xF0\x9D\x90\x82" => "\x63",
|
|---|
| 1204 | "\xF0\x9D\x90\x83" => "\x64",
|
|---|
| 1205 | "\xF0\x9D\x90\x84" => "\x65",
|
|---|
| 1206 | "\xF0\x9D\x90\x85" => "\x66",
|
|---|
| 1207 | "\xF0\x9D\x90\x86" => "\x67",
|
|---|
| 1208 | "\xF0\x9D\x90\x87" => "\x68",
|
|---|
| 1209 | "\xF0\x9D\x90\x88" => "\x69",
|
|---|
| 1210 | "\xF0\x9D\x90\x89" => "\x6A",
|
|---|
| 1211 | "\xF0\x9D\x90\x8A" => "\x6B",
|
|---|
| 1212 | "\xF0\x9D\x90\x8B" => "\x6C",
|
|---|
| 1213 | "\xF0\x9D\x90\x8C" => "\x6D",
|
|---|
| 1214 | "\xF0\x9D\x90\x8D" => "\x6E",
|
|---|
| 1215 | "\xF0\x9D\x90\x8E" => "\x6F",
|
|---|
| 1216 | "\xF0\x9D\x90\x8F" => "\x70",
|
|---|
| 1217 | "\xF0\x9D\x90\x90" => "\x71",
|
|---|
| 1218 | "\xF0\x9D\x90\x91" => "\x72",
|
|---|
| 1219 | "\xF0\x9D\x90\x92" => "\x73",
|
|---|
| 1220 | "\xF0\x9D\x90\x93" => "\x74",
|
|---|
| 1221 | "\xF0\x9D\x90\x94" => "\x75",
|
|---|
| 1222 | "\xF0\x9D\x90\x95" => "\x76",
|
|---|
| 1223 | "\xF0\x9D\x90\x96" => "\x77",
|
|---|
| 1224 | "\xF0\x9D\x90\x97" => "\x78",
|
|---|
| 1225 | "\xF0\x9D\x90\x98" => "\x79",
|
|---|
| 1226 | "\xF0\x9D\x90\x99" => "\x7A",
|
|---|
| 1227 | "\xF0\x9D\x90\xB4" => "\x61",
|
|---|
| 1228 | "\xF0\x9D\x90\xB5" => "\x62",
|
|---|
| 1229 | "\xF0\x9D\x90\xB6" => "\x63",
|
|---|
| 1230 | "\xF0\x9D\x90\xB7" => "\x64",
|
|---|
| 1231 | "\xF0\x9D\x90\xB8" => "\x65",
|
|---|
| 1232 | "\xF0\x9D\x90\xB9" => "\x66",
|
|---|
| 1233 | "\xF0\x9D\x90\xBA" => "\x67",
|
|---|
| 1234 | "\xF0\x9D\x90\xBB" => "\x68",
|
|---|
| 1235 | "\xF0\x9D\x90\xBC" => "\x69",
|
|---|
| 1236 | "\xF0\x9D\x90\xBD" => "\x6A",
|
|---|
| 1237 | "\xF0\x9D\x90\xBE" => "\x6B",
|
|---|
| 1238 | "\xF0\x9D\x90\xBF" => "\x6C",
|
|---|
| 1239 | "\xF0\x9D\x91\x80" => "\x6D",
|
|---|
| 1240 | "\xF0\x9D\x91\x81" => "\x6E",
|
|---|
| 1241 | "\xF0\x9D\x91\x82" => "\x6F",
|
|---|
| 1242 | "\xF0\x9D\x91\x83" => "\x70",
|
|---|
| 1243 | "\xF0\x9D\x91\x84" => "\x71",
|
|---|
| 1244 | "\xF0\x9D\x91\x85" => "\x72",
|
|---|
| 1245 | "\xF0\x9D\x91\x86" => "\x73",
|
|---|
| 1246 | "\xF0\x9D\x91\x87" => "\x74",
|
|---|
| 1247 | "\xF0\x9D\x91\x88" => "\x75",
|
|---|
| 1248 | "\xF0\x9D\x91\x89" => "\x76",
|
|---|
| 1249 | "\xF0\x9D\x91\x8A" => "\x77",
|
|---|
| 1250 | "\xF0\x9D\x91\x8B" => "\x78",
|
|---|
| 1251 | "\xF0\x9D\x91\x8C" => "\x79",
|
|---|
| 1252 | "\xF0\x9D\x91\x8D" => "\x7A",
|
|---|
| 1253 | "\xF0\x9D\x91\xA8" => "\x61",
|
|---|
| 1254 | "\xF0\x9D\x91\xA9" => "\x62",
|
|---|
| 1255 | "\xF0\x9D\x91\xAA" => "\x63",
|
|---|
| 1256 | "\xF0\x9D\x91\xAB" => "\x64",
|
|---|
| 1257 | "\xF0\x9D\x91\xAC" => "\x65",
|
|---|
| 1258 | "\xF0\x9D\x91\xAD" => "\x66",
|
|---|
| 1259 | "\xF0\x9D\x91\xAE" => "\x67",
|
|---|
| 1260 | "\xF0\x9D\x91\xAF" => "\x68",
|
|---|
| 1261 | "\xF0\x9D\x91\xB0" => "\x69",
|
|---|
| 1262 | "\xF0\x9D\x91\xB1" => "\x6A",
|
|---|
| 1263 | "\xF0\x9D\x91\xB2" => "\x6B",
|
|---|
| 1264 | "\xF0\x9D\x91\xB3" => "\x6C",
|
|---|
| 1265 | "\xF0\x9D\x91\xB4" => "\x6D",
|
|---|
| 1266 | "\xF0\x9D\x91\xB5" => "\x6E",
|
|---|
| 1267 | "\xF0\x9D\x91\xB6" => "\x6F",
|
|---|
| 1268 | "\xF0\x9D\x91\xB7" => "\x70",
|
|---|
| 1269 | "\xF0\x9D\x91\xB8" => "\x71",
|
|---|
| 1270 | "\xF0\x9D\x91\xB9" => "\x72",
|
|---|
| 1271 | "\xF0\x9D\x91\xBA" => "\x73",
|
|---|
| 1272 | "\xF0\x9D\x91\xBB" => "\x74",
|
|---|
| 1273 | "\xF0\x9D\x91\xBC" => "\x75",
|
|---|
| 1274 | "\xF0\x9D\x91\xBD" => "\x76",
|
|---|
| 1275 | "\xF0\x9D\x91\xBE" => "\x77",
|
|---|
| 1276 | "\xF0\x9D\x91\xBF" => "\x78",
|
|---|
| 1277 | "\xF0\x9D\x92\x80" => "\x79",
|
|---|
| 1278 | "\xF0\x9D\x92\x81" => "\x7A",
|
|---|
| 1279 | "\xF0\x9D\x92\x9C" => "\x61",
|
|---|
| 1280 | "\xF0\x9D\x92\x9E" => "\x63",
|
|---|
| 1281 | "\xF0\x9D\x92\x9F" => "\x64",
|
|---|
| 1282 | "\xF0\x9D\x92\xA2" => "\x67",
|
|---|
| 1283 | "\xF0\x9D\x92\xA5" => "\x6A",
|
|---|
| 1284 | "\xF0\x9D\x92\xA6" => "\x6B",
|
|---|
| 1285 | "\xF0\x9D\x92\xA9" => "\x6E",
|
|---|
| 1286 | "\xF0\x9D\x92\xAA" => "\x6F",
|
|---|
| 1287 | "\xF0\x9D\x92\xAB" => "\x70",
|
|---|
| 1288 | "\xF0\x9D\x92\xAC" => "\x71",
|
|---|
| 1289 | "\xF0\x9D\x92\xAE" => "\x73",
|
|---|
| 1290 | "\xF0\x9D\x92\xAF" => "\x74",
|
|---|
| 1291 | "\xF0\x9D\x92\xB0" => "\x75",
|
|---|
| 1292 | "\xF0\x9D\x92\xB1" => "\x76",
|
|---|
| 1293 | "\xF0\x9D\x92\xB2" => "\x77",
|
|---|
| 1294 | "\xF0\x9D\x92\xB3" => "\x78",
|
|---|
| 1295 | "\xF0\x9D\x92\xB4" => "\x79",
|
|---|
| 1296 | "\xF0\x9D\x92\xB5" => "\x7A",
|
|---|
| 1297 | "\xF0\x9D\x93\x90" => "\x61",
|
|---|
| 1298 | "\xF0\x9D\x93\x91" => "\x62",
|
|---|
| 1299 | "\xF0\x9D\x93\x92" => "\x63",
|
|---|
| 1300 | "\xF0\x9D\x93\x93" => "\x64",
|
|---|
| 1301 | "\xF0\x9D\x93\x94" => "\x65",
|
|---|
| 1302 | "\xF0\x9D\x93\x95" => "\x66",
|
|---|
| 1303 | "\xF0\x9D\x93\x96" => "\x67",
|
|---|
| 1304 | "\xF0\x9D\x93\x97" => "\x68",
|
|---|
| 1305 | "\xF0\x9D\x93\x98" => "\x69",
|
|---|
| 1306 | "\xF0\x9D\x93\x99" => "\x6A",
|
|---|
| 1307 | "\xF0\x9D\x93\x9A" => "\x6B",
|
|---|
| 1308 | "\xF0\x9D\x93\x9B" => "\x6C",
|
|---|
| 1309 | "\xF0\x9D\x93\x9C" => "\x6D",
|
|---|
| 1310 | "\xF0\x9D\x93\x9D" => "\x6E",
|
|---|
| 1311 | "\xF0\x9D\x93\x9E" => "\x6F",
|
|---|
| 1312 | "\xF0\x9D\x93\x9F" => "\x70",
|
|---|
| 1313 | "\xF0\x9D\x93\xA0" => "\x71",
|
|---|
| 1314 | "\xF0\x9D\x93\xA1" => "\x72",
|
|---|
| 1315 | "\xF0\x9D\x93\xA2" => "\x73",
|
|---|
| 1316 | "\xF0\x9D\x93\xA3" => "\x74",
|
|---|
| 1317 | "\xF0\x9D\x93\xA4" => "\x75",
|
|---|
| 1318 | "\xF0\x9D\x93\xA5" => "\x76",
|
|---|
| 1319 | "\xF0\x9D\x93\xA6" => "\x77",
|
|---|
| 1320 | "\xF0\x9D\x93\xA7" => "\x78",
|
|---|
| 1321 | "\xF0\x9D\x93\xA8" => "\x79",
|
|---|
| 1322 | "\xF0\x9D\x93\xA9" => "\x7A",
|
|---|
| 1323 | "\xF0\x9D\x94\x84" => "\x61",
|
|---|
| 1324 | "\xF0\x9D\x94\x85" => "\x62",
|
|---|
| 1325 | "\xF0\x9D\x94\x87" => "\x64",
|
|---|
| 1326 | "\xF0\x9D\x94\x88" => "\x65",
|
|---|
| 1327 | "\xF0\x9D\x94\x89" => "\x66",
|
|---|
| 1328 | "\xF0\x9D\x94\x8A" => "\x67",
|
|---|
| 1329 | "\xF0\x9D\x94\x8D" => "\x6A",
|
|---|
| 1330 | "\xF0\x9D\x94\x8E" => "\x6B",
|
|---|
| 1331 | "\xF0\x9D\x94\x8F" => "\x6C",
|
|---|
| 1332 | "\xF0\x9D\x94\x90" => "\x6D",
|
|---|
| 1333 | "\xF0\x9D\x94\x91" => "\x6E",
|
|---|
| 1334 | "\xF0\x9D\x94\x92" => "\x6F",
|
|---|
| 1335 | "\xF0\x9D\x94\x93" => "\x70",
|
|---|
| 1336 | "\xF0\x9D\x94\x94" => "\x71",
|
|---|
| 1337 | "\xF0\x9D\x94\x96" => "\x73",
|
|---|
| 1338 | "\xF0\x9D\x94\x97" => "\x74",
|
|---|
| 1339 | "\xF0\x9D\x94\x98" => "\x75",
|
|---|
| 1340 | "\xF0\x9D\x94\x99" => "\x76",
|
|---|
| 1341 | "\xF0\x9D\x94\x9A" => "\x77",
|
|---|
| 1342 | "\xF0\x9D\x94\x9B" => "\x78",
|
|---|
| 1343 | "\xF0\x9D\x94\x9C" => "\x79",
|
|---|
| 1344 | "\xF0\x9D\x94\xB8" => "\x61",
|
|---|
| 1345 | "\xF0\x9D\x94\xB9" => "\x62",
|
|---|
| 1346 | "\xF0\x9D\x94\xBB" => "\x64",
|
|---|
| 1347 | "\xF0\x9D\x94\xBC" => "\x65",
|
|---|
| 1348 | "\xF0\x9D\x94\xBD" => "\x66",
|
|---|
| 1349 | "\xF0\x9D\x94\xBE" => "\x67",
|
|---|
| 1350 | "\xF0\x9D\x95\x80" => "\x69",
|
|---|
| 1351 | "\xF0\x9D\x95\x81" => "\x6A",
|
|---|
| 1352 | "\xF0\x9D\x95\x82" => "\x6B",
|
|---|
| 1353 | "\xF0\x9D\x95\x83" => "\x6C",
|
|---|
| 1354 | "\xF0\x9D\x95\x84" => "\x6D",
|
|---|
| 1355 | "\xF0\x9D\x95\x86" => "\x6F",
|
|---|
| 1356 | "\xF0\x9D\x95\x8A" => "\x73",
|
|---|
| 1357 | "\xF0\x9D\x95\x8B" => "\x74",
|
|---|
| 1358 | "\xF0\x9D\x95\x8C" => "\x75",
|
|---|
| 1359 | "\xF0\x9D\x95\x8D" => "\x76",
|
|---|
| 1360 | "\xF0\x9D\x95\x8E" => "\x77",
|
|---|
| 1361 | "\xF0\x9D\x95\x8F" => "\x78",
|
|---|
| 1362 | "\xF0\x9D\x95\x90" => "\x79",
|
|---|
| 1363 | "\xF0\x9D\x95\xAC" => "\x61",
|
|---|
| 1364 | "\xF0\x9D\x95\xAD" => "\x62",
|
|---|
| 1365 | "\xF0\x9D\x95\xAE" => "\x63",
|
|---|
| 1366 | "\xF0\x9D\x95\xAF" => "\x64",
|
|---|
| 1367 | "\xF0\x9D\x95\xB0" => "\x65",
|
|---|
| 1368 | "\xF0\x9D\x95\xB1" => "\x66",
|
|---|
| 1369 | "\xF0\x9D\x95\xB2" => "\x67",
|
|---|
| 1370 | "\xF0\x9D\x95\xB3" => "\x68",
|
|---|
| 1371 | "\xF0\x9D\x95\xB4" => "\x69",
|
|---|
| 1372 | "\xF0\x9D\x95\xB5" => "\x6A",
|
|---|
| 1373 | "\xF0\x9D\x95\xB6" => "\x6B",
|
|---|
| 1374 | "\xF0\x9D\x95\xB7" => "\x6C",
|
|---|
| 1375 | "\xF0\x9D\x95\xB8" => "\x6D",
|
|---|
| 1376 | "\xF0\x9D\x95\xB9" => "\x6E",
|
|---|
| 1377 | "\xF0\x9D\x95\xBA" => "\x6F",
|
|---|
| 1378 | "\xF0\x9D\x95\xBB" => "\x70",
|
|---|
| 1379 | "\xF0\x9D\x95\xBC" => "\x71",
|
|---|
| 1380 | "\xF0\x9D\x95\xBD" => "\x72",
|
|---|
| 1381 | "\xF0\x9D\x95\xBE" => "\x73",
|
|---|
| 1382 | "\xF0\x9D\x95\xBF" => "\x74",
|
|---|
| 1383 | "\xF0\x9D\x96\x80" => "\x75",
|
|---|
| 1384 | "\xF0\x9D\x96\x81" => "\x76",
|
|---|
| 1385 | "\xF0\x9D\x96\x82" => "\x77",
|
|---|
| 1386 | "\xF0\x9D\x96\x83" => "\x78",
|
|---|
| 1387 | "\xF0\x9D\x96\x84" => "\x79",
|
|---|
| 1388 | "\xF0\x9D\x96\x85" => "\x7A",
|
|---|
| 1389 | "\xF0\x9D\x96\xA0" => "\x61",
|
|---|
| 1390 | "\xF0\x9D\x96\xA1" => "\x62",
|
|---|
| 1391 | "\xF0\x9D\x96\xA2" => "\x63",
|
|---|
| 1392 | "\xF0\x9D\x96\xA3" => "\x64",
|
|---|
| 1393 | "\xF0\x9D\x96\xA4" => "\x65",
|
|---|
| 1394 | "\xF0\x9D\x96\xA5" => "\x66",
|
|---|
| 1395 | "\xF0\x9D\x96\xA6" => "\x67",
|
|---|
| 1396 | "\xF0\x9D\x96\xA7" => "\x68",
|
|---|
| 1397 | "\xF0\x9D\x96\xA8" => "\x69",
|
|---|
| 1398 | "\xF0\x9D\x96\xA9" => "\x6A",
|
|---|
| 1399 | "\xF0\x9D\x96\xAA" => "\x6B",
|
|---|
| 1400 | "\xF0\x9D\x96\xAB" => "\x6C",
|
|---|
| 1401 | "\xF0\x9D\x96\xAC" => "\x6D",
|
|---|
| 1402 | "\xF0\x9D\x96\xAD" => "\x6E",
|
|---|
| 1403 | "\xF0\x9D\x96\xAE" => "\x6F",
|
|---|
| 1404 | "\xF0\x9D\x96\xAF" => "\x70",
|
|---|
| 1405 | "\xF0\x9D\x96\xB0" => "\x71",
|
|---|
| 1406 | "\xF0\x9D\x96\xB1" => "\x72",
|
|---|
| 1407 | "\xF0\x9D\x96\xB2" => "\x73",
|
|---|
| 1408 | "\xF0\x9D\x96\xB3" => "\x74",
|
|---|
| 1409 | "\xF0\x9D\x96\xB4" => "\x75",
|
|---|
| 1410 | "\xF0\x9D\x96\xB5" => "\x76",
|
|---|
| 1411 | "\xF0\x9D\x96\xB6" => "\x77",
|
|---|
| 1412 | "\xF0\x9D\x96\xB7" => "\x78",
|
|---|
| 1413 | "\xF0\x9D\x96\xB8" => "\x79",
|
|---|
| 1414 | "\xF0\x9D\x96\xB9" => "\x7A",
|
|---|
| 1415 | "\xF0\x9D\x97\x94" => "\x61",
|
|---|
| 1416 | "\xF0\x9D\x97\x95" => "\x62",
|
|---|
| 1417 | "\xF0\x9D\x97\x96" => "\x63",
|
|---|
| 1418 | "\xF0\x9D\x97\x97" => "\x64",
|
|---|
| 1419 | "\xF0\x9D\x97\x98" => "\x65",
|
|---|
| 1420 | "\xF0\x9D\x97\x99" => "\x66",
|
|---|
| 1421 | "\xF0\x9D\x97\x9A" => "\x67",
|
|---|
| 1422 | "\xF0\x9D\x97\x9B" => "\x68",
|
|---|
| 1423 | "\xF0\x9D\x97\x9C" => "\x69",
|
|---|
| 1424 | "\xF0\x9D\x97\x9D" => "\x6A",
|
|---|
| 1425 | "\xF0\x9D\x97\x9E" => "\x6B",
|
|---|
| 1426 | "\xF0\x9D\x97\x9F" => "\x6C",
|
|---|
| 1427 | "\xF0\x9D\x97\xA0" => "\x6D",
|
|---|
| 1428 | "\xF0\x9D\x97\xA1" => "\x6E",
|
|---|
| 1429 | "\xF0\x9D\x97\xA2" => "\x6F",
|
|---|
| 1430 | "\xF0\x9D\x97\xA3" => "\x70",
|
|---|
| 1431 | "\xF0\x9D\x97\xA4" => "\x71",
|
|---|
| 1432 | "\xF0\x9D\x97\xA5" => "\x72",
|
|---|
| 1433 | "\xF0\x9D\x97\xA6" => "\x73",
|
|---|
| 1434 | "\xF0\x9D\x97\xA7" => "\x74",
|
|---|
| 1435 | "\xF0\x9D\x97\xA8" => "\x75",
|
|---|
| 1436 | "\xF0\x9D\x97\xA9" => "\x76",
|
|---|
| 1437 | "\xF0\x9D\x97\xAA" => "\x77",
|
|---|
| 1438 | "\xF0\x9D\x97\xAB" => "\x78",
|
|---|
| 1439 | "\xF0\x9D\x97\xAC" => "\x79",
|
|---|
| 1440 | "\xF0\x9D\x97\xAD" => "\x7A",
|
|---|
| 1441 | "\xF0\x9D\x98\x88" => "\x61",
|
|---|
| 1442 | "\xF0\x9D\x98\x89" => "\x62",
|
|---|
| 1443 | "\xF0\x9D\x98\x8A" => "\x63",
|
|---|
| 1444 | "\xF0\x9D\x98\x8B" => "\x64",
|
|---|
| 1445 | "\xF0\x9D\x98\x8C" => "\x65",
|
|---|
| 1446 | "\xF0\x9D\x98\x8D" => "\x66",
|
|---|
| 1447 | "\xF0\x9D\x98\x8E" => "\x67",
|
|---|
| 1448 | "\xF0\x9D\x98\x8F" => "\x68",
|
|---|
| 1449 | "\xF0\x9D\x98\x90" => "\x69",
|
|---|
| 1450 | "\xF0\x9D\x98\x91" => "\x6A",
|
|---|
| 1451 | "\xF0\x9D\x98\x92" => "\x6B",
|
|---|
| 1452 | "\xF0\x9D\x98\x93" => "\x6C",
|
|---|
| 1453 | "\xF0\x9D\x98\x94" => "\x6D",
|
|---|
| 1454 | "\xF0\x9D\x98\x95" => "\x6E",
|
|---|
| 1455 | "\xF0\x9D\x98\x96" => "\x6F",
|
|---|
| 1456 | "\xF0\x9D\x98\x97" => "\x70",
|
|---|
| 1457 | "\xF0\x9D\x98\x98" => "\x71",
|
|---|
| 1458 | "\xF0\x9D\x98\x99" => "\x72",
|
|---|
| 1459 | "\xF0\x9D\x98\x9A" => "\x73",
|
|---|
| 1460 | "\xF0\x9D\x98\x9B" => "\x74",
|
|---|
| 1461 | "\xF0\x9D\x98\x9C" => "\x75",
|
|---|
| 1462 | "\xF0\x9D\x98\x9D" => "\x76",
|
|---|
| 1463 | "\xF0\x9D\x98\x9E" => "\x77",
|
|---|
| 1464 | "\xF0\x9D\x98\x9F" => "\x78",
|
|---|
| 1465 | "\xF0\x9D\x98\xA0" => "\x79",
|
|---|
| 1466 | "\xF0\x9D\x98\xA1" => "\x7A",
|
|---|
| 1467 | "\xF0\x9D\x98\xBC" => "\x61",
|
|---|
| 1468 | "\xF0\x9D\x98\xBD" => "\x62",
|
|---|
| 1469 | "\xF0\x9D\x98\xBE" => "\x63",
|
|---|
| 1470 | "\xF0\x9D\x98\xBF" => "\x64",
|
|---|
| 1471 | "\xF0\x9D\x99\x80" => "\x65",
|
|---|
| 1472 | "\xF0\x9D\x99\x81" => "\x66",
|
|---|
| 1473 | "\xF0\x9D\x99\x82" => "\x67",
|
|---|
| 1474 | "\xF0\x9D\x99\x83" => "\x68",
|
|---|
| 1475 | "\xF0\x9D\x99\x84" => "\x69",
|
|---|
| 1476 | "\xF0\x9D\x99\x85" => "\x6A",
|
|---|
| 1477 | "\xF0\x9D\x99\x86" => "\x6B",
|
|---|
| 1478 | "\xF0\x9D\x99\x87" => "\x6C",
|
|---|
| 1479 | "\xF0\x9D\x99\x88" => "\x6D",
|
|---|
| 1480 | "\xF0\x9D\x99\x89" => "\x6E",
|
|---|
| 1481 | "\xF0\x9D\x99\x8A" => "\x6F",
|
|---|
| 1482 | "\xF0\x9D\x99\x8B" => "\x70",
|
|---|
| 1483 | "\xF0\x9D\x99\x8C" => "\x71",
|
|---|
| 1484 | "\xF0\x9D\x99\x8D" => "\x72",
|
|---|
| 1485 | "\xF0\x9D\x99\x8E" => "\x73",
|
|---|
| 1486 | "\xF0\x9D\x99\x8F" => "\x74",
|
|---|
| 1487 | "\xF0\x9D\x99\x90" => "\x75",
|
|---|
| 1488 | "\xF0\x9D\x99\x91" => "\x76",
|
|---|
| 1489 | "\xF0\x9D\x99\x92" => "\x77",
|
|---|
| 1490 | "\xF0\x9D\x99\x93" => "\x78",
|
|---|
| 1491 | "\xF0\x9D\x99\x94" => "\x79",
|
|---|
| 1492 | "\xF0\x9D\x99\x95" => "\x7A",
|
|---|
| 1493 | "\xF0\x9D\x99\xB0" => "\x61",
|
|---|
| 1494 | "\xF0\x9D\x99\xB1" => "\x62",
|
|---|
| 1495 | "\xF0\x9D\x99\xB2" => "\x63",
|
|---|
| 1496 | "\xF0\x9D\x99\xB3" => "\x64",
|
|---|
| 1497 | "\xF0\x9D\x99\xB4" => "\x65",
|
|---|
| 1498 | "\xF0\x9D\x99\xB5" => "\x66",
|
|---|
| 1499 | "\xF0\x9D\x99\xB6" => "\x67",
|
|---|
| 1500 | "\xF0\x9D\x99\xB7" => "\x68",
|
|---|
| 1501 | "\xF0\x9D\x99\xB8" => "\x69",
|
|---|
| 1502 | "\xF0\x9D\x99\xB9" => "\x6A",
|
|---|
| 1503 | "\xF0\x9D\x99\xBA" => "\x6B",
|
|---|
| 1504 | "\xF0\x9D\x99\xBB" => "\x6C",
|
|---|
| 1505 | "\xF0\x9D\x99\xBC" => "\x6D",
|
|---|
| 1506 | "\xF0\x9D\x99\xBD" => "\x6E",
|
|---|
| 1507 | "\xF0\x9D\x99\xBE" => "\x6F",
|
|---|
| 1508 | "\xF0\x9D\x99\xBF" => "\x70",
|
|---|
| 1509 | "\xF0\x9D\x9A\x80" => "\x71",
|
|---|
| 1510 | "\xF0\x9D\x9A\x81" => "\x72",
|
|---|
| 1511 | "\xF0\x9D\x9A\x82" => "\x73",
|
|---|
| 1512 | "\xF0\x9D\x9A\x83" => "\x74",
|
|---|
| 1513 | "\xF0\x9D\x9A\x84" => "\x75",
|
|---|
| 1514 | "\xF0\x9D\x9A\x85" => "\x76",
|
|---|
| 1515 | "\xF0\x9D\x9A\x86" => "\x77",
|
|---|
| 1516 | "\xF0\x9D\x9A\x87" => "\x78",
|
|---|
| 1517 | "\xF0\x9D\x9A\x88" => "\x79",
|
|---|
| 1518 | "\xF0\x9D\x9A\x89" => "\x7A",
|
|---|
| 1519 | "\xF0\x9D\x9A\xA8" => "\xCE\xB1",
|
|---|
| 1520 | "\xF0\x9D\x9A\xA9" => "\xCE\xB2",
|
|---|
| 1521 | "\xF0\x9D\x9A\xAA" => "\xCE\xB3",
|
|---|
| 1522 | "\xF0\x9D\x9A\xAB" => "\xCE\xB4",
|
|---|
| 1523 | "\xF0\x9D\x9A\xAC" => "\xCE\xB5",
|
|---|
| 1524 | "\xF0\x9D\x9A\xAD" => "\xCE\xB6",
|
|---|
| 1525 | "\xF0\x9D\x9A\xAE" => "\xCE\xB7",
|
|---|
| 1526 | "\xF0\x9D\x9A\xAF" => "\xCE\xB8",
|
|---|
| 1527 | "\xF0\x9D\x9A\xB0" => "\xCE\xB9",
|
|---|
| 1528 | "\xF0\x9D\x9A\xB1" => "\xCE\xBA",
|
|---|
| 1529 | "\xF0\x9D\x9A\xB2" => "\xCE\xBB",
|
|---|
| 1530 | "\xF0\x9D\x9A\xB3" => "\xCE\xBC",
|
|---|
| 1531 | "\xF0\x9D\x9A\xB4" => "\xCE\xBD",
|
|---|
| 1532 | "\xF0\x9D\x9A\xB5" => "\xCE\xBE",
|
|---|
| 1533 | "\xF0\x9D\x9A\xB6" => "\xCE\xBF",
|
|---|
| 1534 | "\xF0\x9D\x9A\xB7" => "\xCF\x80",
|
|---|
| 1535 | "\xF0\x9D\x9A\xB8" => "\xCF\x81",
|
|---|
| 1536 | "\xF0\x9D\x9A\xB9" => "\xCE\xB8",
|
|---|
| 1537 | "\xF0\x9D\x9A\xBA" => "\xCF\x83",
|
|---|
| 1538 | "\xF0\x9D\x9A\xBB" => "\xCF\x84",
|
|---|
| 1539 | "\xF0\x9D\x9A\xBC" => "\xCF\x85",
|
|---|
| 1540 | "\xF0\x9D\x9A\xBD" => "\xCF\x86",
|
|---|
| 1541 | "\xF0\x9D\x9A\xBE" => "\xCF\x87",
|
|---|
| 1542 | "\xF0\x9D\x9A\xBF" => "\xCF\x88",
|
|---|
| 1543 | "\xF0\x9D\x9B\x80" => "\xCF\x89",
|
|---|
| 1544 | "\xF0\x9D\x9B\x93" => "\xCF\x83",
|
|---|
| 1545 | "\xF0\x9D\x9B\xA2" => "\xCE\xB1",
|
|---|
| 1546 | "\xF0\x9D\x9B\xA3" => "\xCE\xB2",
|
|---|
| 1547 | "\xF0\x9D\x9B\xA4" => "\xCE\xB3",
|
|---|
| 1548 | "\xF0\x9D\x9B\xA5" => "\xCE\xB4",
|
|---|
| 1549 | "\xF0\x9D\x9B\xA6" => "\xCE\xB5",
|
|---|
| 1550 | "\xF0\x9D\x9B\xA7" => "\xCE\xB6",
|
|---|
| 1551 | "\xF0\x9D\x9B\xA8" => "\xCE\xB7",
|
|---|
| 1552 | "\xF0\x9D\x9B\xA9" => "\xCE\xB8",
|
|---|
| 1553 | "\xF0\x9D\x9B\xAA" => "\xCE\xB9",
|
|---|
| 1554 | "\xF0\x9D\x9B\xAB" => "\xCE\xBA",
|
|---|
| 1555 | "\xF0\x9D\x9B\xAC" => "\xCE\xBB",
|
|---|
| 1556 | "\xF0\x9D\x9B\xAD" => "\xCE\xBC",
|
|---|
| 1557 | "\xF0\x9D\x9B\xAE" => "\xCE\xBD",
|
|---|
| 1558 | "\xF0\x9D\x9B\xAF" => "\xCE\xBE",
|
|---|
| 1559 | "\xF0\x9D\x9B\xB0" => "\xCE\xBF",
|
|---|
| 1560 | "\xF0\x9D\x9B\xB1" => "\xCF\x80",
|
|---|
| 1561 | "\xF0\x9D\x9B\xB2" => "\xCF\x81",
|
|---|
| 1562 | "\xF0\x9D\x9B\xB3" => "\xCE\xB8",
|
|---|
| 1563 | "\xF0\x9D\x9B\xB4" => "\xCF\x83",
|
|---|
| 1564 | "\xF0\x9D\x9B\xB5" => "\xCF\x84",
|
|---|
| 1565 | "\xF0\x9D\x9B\xB6" => "\xCF\x85",
|
|---|
| 1566 | "\xF0\x9D\x9B\xB7" => "\xCF\x86",
|
|---|
| 1567 | "\xF0\x9D\x9B\xB8" => "\xCF\x87",
|
|---|
| 1568 | "\xF0\x9D\x9B\xB9" => "\xCF\x88",
|
|---|
| 1569 | "\xF0\x9D\x9B\xBA" => "\xCF\x89",
|
|---|
| 1570 | "\xF0\x9D\x9C\x8D" => "\xCF\x83",
|
|---|
| 1571 | "\xF0\x9D\x9C\x9C" => "\xCE\xB1",
|
|---|
| 1572 | "\xF0\x9D\x9C\x9D" => "\xCE\xB2",
|
|---|
| 1573 | "\xF0\x9D\x9C\x9E" => "\xCE\xB3",
|
|---|
| 1574 | "\xF0\x9D\x9C\x9F" => "\xCE\xB4",
|
|---|
| 1575 | "\xF0\x9D\x9C\xA0" => "\xCE\xB5",
|
|---|
| 1576 | "\xF0\x9D\x9C\xA1" => "\xCE\xB6",
|
|---|
| 1577 | "\xF0\x9D\x9C\xA2" => "\xCE\xB7",
|
|---|
| 1578 | "\xF0\x9D\x9C\xA3" => "\xCE\xB8",
|
|---|
| 1579 | "\xF0\x9D\x9C\xA4" => "\xCE\xB9",
|
|---|
| 1580 | "\xF0\x9D\x9C\xA5" => "\xCE\xBA",
|
|---|
| 1581 | "\xF0\x9D\x9C\xA6" => "\xCE\xBB",
|
|---|
| 1582 | "\xF0\x9D\x9C\xA7" => "\xCE\xBC",
|
|---|
| 1583 | "\xF0\x9D\x9C\xA8" => "\xCE\xBD",
|
|---|
| 1584 | "\xF0\x9D\x9C\xA9" => "\xCE\xBE",
|
|---|
| 1585 | "\xF0\x9D\x9C\xAA" => "\xCE\xBF",
|
|---|
| 1586 | "\xF0\x9D\x9C\xAB" => "\xCF\x80",
|
|---|
| 1587 | "\xF0\x9D\x9C\xAC" => "\xCF\x81",
|
|---|
| 1588 | "\xF0\x9D\x9C\xAD" => "\xCE\xB8",
|
|---|
| 1589 | "\xF0\x9D\x9C\xAE" => "\xCF\x83",
|
|---|
| 1590 | "\xF0\x9D\x9C\xAF" => "\xCF\x84",
|
|---|
| 1591 | "\xF0\x9D\x9C\xB0" => "\xCF\x85",
|
|---|
| 1592 | "\xF0\x9D\x9C\xB1" => "\xCF\x86",
|
|---|
| 1593 | "\xF0\x9D\x9C\xB2" => "\xCF\x87",
|
|---|
| 1594 | "\xF0\x9D\x9C\xB3" => "\xCF\x88",
|
|---|
| 1595 | "\xF0\x9D\x9C\xB4" => "\xCF\x89",
|
|---|
| 1596 | "\xF0\x9D\x9D\x87" => "\xCF\x83",
|
|---|
| 1597 | "\xF0\x9D\x9D\x96" => "\xCE\xB1",
|
|---|
| 1598 | "\xF0\x9D\x9D\x97" => "\xCE\xB2",
|
|---|
| 1599 | "\xF0\x9D\x9D\x98" => "\xCE\xB3",
|
|---|
| 1600 | "\xF0\x9D\x9D\x99" => "\xCE\xB4",
|
|---|
| 1601 | "\xF0\x9D\x9D\x9A" => "\xCE\xB5",
|
|---|
| 1602 | "\xF0\x9D\x9D\x9B" => "\xCE\xB6",
|
|---|
| 1603 | "\xF0\x9D\x9D\x9C" => "\xCE\xB7",
|
|---|
| 1604 | "\xF0\x9D\x9D\x9D" => "\xCE\xB8",
|
|---|
| 1605 | "\xF0\x9D\x9D\x9E" => "\xCE\xB9",
|
|---|
| 1606 | "\xF0\x9D\x9D\x9F" => "\xCE\xBA",
|
|---|
| 1607 | "\xF0\x9D\x9D\xA0" => "\xCE\xBB",
|
|---|
| 1608 | "\xF0\x9D\x9D\xA1" => "\xCE\xBC",
|
|---|
| 1609 | "\xF0\x9D\x9D\xA2" => "\xCE\xBD",
|
|---|
| 1610 | "\xF0\x9D\x9D\xA3" => "\xCE\xBE",
|
|---|
| 1611 | "\xF0\x9D\x9D\xA4" => "\xCE\xBF",
|
|---|
| 1612 | "\xF0\x9D\x9D\xA5" => "\xCF\x80",
|
|---|
| 1613 | "\xF0\x9D\x9D\xA6" => "\xCF\x81",
|
|---|
| 1614 | "\xF0\x9D\x9D\xA7" => "\xCE\xB8",
|
|---|
| 1615 | "\xF0\x9D\x9D\xA8" => "\xCF\x83",
|
|---|
| 1616 | "\xF0\x9D\x9D\xA9" => "\xCF\x84",
|
|---|
| 1617 | "\xF0\x9D\x9D\xAA" => "\xCF\x85",
|
|---|
| 1618 | "\xF0\x9D\x9D\xAB" => "\xCF\x86",
|
|---|
| 1619 | "\xF0\x9D\x9D\xAC" => "\xCF\x87",
|
|---|
| 1620 | "\xF0\x9D\x9D\xAD" => "\xCF\x88",
|
|---|
| 1621 | "\xF0\x9D\x9D\xAE" => "\xCF\x89",
|
|---|
| 1622 | "\xF0\x9D\x9E\x81" => "\xCF\x83",
|
|---|
| 1623 | "\xF0\x9D\x9E\x90" => "\xCE\xB1",
|
|---|
| 1624 | "\xF0\x9D\x9E\x91" => "\xCE\xB2",
|
|---|
| 1625 | "\xF0\x9D\x9E\x92" => "\xCE\xB3",
|
|---|
| 1626 | "\xF0\x9D\x9E\x93" => "\xCE\xB4",
|
|---|
| 1627 | "\xF0\x9D\x9E\x94" => "\xCE\xB5",
|
|---|
| 1628 | "\xF0\x9D\x9E\x95" => "\xCE\xB6",
|
|---|
| 1629 | "\xF0\x9D\x9E\x96" => "\xCE\xB7",
|
|---|
| 1630 | "\xF0\x9D\x9E\x97" => "\xCE\xB8",
|
|---|
| 1631 | "\xF0\x9D\x9E\x98" => "\xCE\xB9",
|
|---|
| 1632 | "\xF0\x9D\x9E\x99" => "\xCE\xBA",
|
|---|
| 1633 | "\xF0\x9D\x9E\x9A" => "\xCE\xBB",
|
|---|
| 1634 | "\xF0\x9D\x9E\x9B" => "\xCE\xBC",
|
|---|
| 1635 | "\xF0\x9D\x9E\x9C" => "\xCE\xBD",
|
|---|
| 1636 | "\xF0\x9D\x9E\x9D" => "\xCE\xBE",
|
|---|
| 1637 | "\xF0\x9D\x9E\x9E" => "\xCE\xBF",
|
|---|
| 1638 | "\xF0\x9D\x9E\x9F" => "\xCF\x80",
|
|---|
| 1639 | "\xF0\x9D\x9E\xA0" => "\xCF\x81",
|
|---|
| 1640 | "\xF0\x9D\x9E\xA1" => "\xCE\xB8",
|
|---|
| 1641 | "\xF0\x9D\x9E\xA2" => "\xCF\x83",
|
|---|
| 1642 | "\xF0\x9D\x9E\xA3" => "\xCF\x84",
|
|---|
| 1643 | "\xF0\x9D\x9E\xA4" => "\xCF\x85",
|
|---|
| 1644 | "\xF0\x9D\x9E\xA5" => "\xCF\x86",
|
|---|
| 1645 | "\xF0\x9D\x9E\xA6" => "\xCF\x87",
|
|---|
| 1646 | "\xF0\x9D\x9E\xA7" => "\xCF\x88",
|
|---|
| 1647 | "\xF0\x9D\x9E\xA8" => "\xCF\x89",
|
|---|
| 1648 | "\xF0\x9D\x9E\xBB" => "\xCF\x83",
|
|---|
| 1649 | "\xF0\x9D\x9F\x8A" => "\xCF\x9D",
|
|---|
| 1650 | );
|
|---|
| 1651 | global $phpbb_root_path, $phpEx;
|
|---|
| 1652 |
|
|---|
| 1653 | // do the case fold
|
|---|
| 1654 | $text = utf8_case_fold($text, $option);
|
|---|
| 1655 |
|
|---|
| 1656 | if (!class_exists('utf_normalizer'))
|
|---|
| 1657 | {
|
|---|
| 1658 | global $phpbb_root_path, $phpEx;
|
|---|
| 1659 | include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
|
|---|
| 1660 | }
|
|---|
| 1661 |
|
|---|
| 1662 | // convert to NFKC
|
|---|
| 1663 | utf_normalizer::nfkc($text);
|
|---|
| 1664 |
|
|---|
| 1665 | // FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
|
|---|
| 1666 | $text = strtr($text, $fc_nfkc_closure);
|
|---|
| 1667 |
|
|---|
| 1668 | return $text;
|
|---|
| 1669 | }
|
|---|
| 1670 |
|
|---|
| 1671 | /**
|
|---|
| 1672 | * Assume the input is NFC:
|
|---|
| 1673 | * Takes the input and does a "special" case fold. It does minor normalization as well.
|
|---|
| 1674 | *
|
|---|
| 1675 | * @param string $text text to be case folded
|
|---|
| 1676 | * @param string $option determines how we will fold the cases
|
|---|
| 1677 | * @return string case folded text
|
|---|
| 1678 | */
|
|---|
| 1679 | function utf8_case_fold_nfc($text, $option = 'full')
|
|---|
| 1680 | {
|
|---|
| 1681 | static $uniarray = array();
|
|---|
| 1682 | static $ypogegrammeni = array(
|
|---|
| 1683 | "\xCD\xBA" => "\x20\xCD\x85",
|
|---|
| 1684 | "\xE1\xBE\x80" => "\xE1\xBC\x80\xCD\x85",
|
|---|
| 1685 | "\xE1\xBE\x81" => "\xE1\xBC\x81\xCD\x85",
|
|---|
| 1686 | "\xE1\xBE\x82" => "\xE1\xBC\x82\xCD\x85",
|
|---|
| 1687 | "\xE1\xBE\x83" => "\xE1\xBC\x83\xCD\x85",
|
|---|
| 1688 | "\xE1\xBE\x84" => "\xE1\xBC\x84\xCD\x85",
|
|---|
| 1689 | "\xE1\xBE\x85" => "\xE1\xBC\x85\xCD\x85",
|
|---|
| 1690 | "\xE1\xBE\x86" => "\xE1\xBC\x86\xCD\x85",
|
|---|
| 1691 | "\xE1\xBE\x87" => "\xE1\xBC\x87\xCD\x85",
|
|---|
| 1692 | "\xE1\xBE\x88" => "\xE1\xBC\x88\xCD\x85",
|
|---|
| 1693 | "\xE1\xBE\x89" => "\xE1\xBC\x89\xCD\x85",
|
|---|
| 1694 | "\xE1\xBE\x8A" => "\xE1\xBC\x8A\xCD\x85",
|
|---|
| 1695 | "\xE1\xBE\x8B" => "\xE1\xBC\x8B\xCD\x85",
|
|---|
| 1696 | "\xE1\xBE\x8C" => "\xE1\xBC\x8C\xCD\x85",
|
|---|
| 1697 | "\xE1\xBE\x8D" => "\xE1\xBC\x8D\xCD\x85",
|
|---|
| 1698 | "\xE1\xBE\x8E" => "\xE1\xBC\x8E\xCD\x85",
|
|---|
| 1699 | "\xE1\xBE\x8F" => "\xE1\xBC\x8F\xCD\x85",
|
|---|
| 1700 | "\xE1\xBE\x90" => "\xE1\xBC\xA0\xCD\x85",
|
|---|
| 1701 | "\xE1\xBE\x91" => "\xE1\xBC\xA1\xCD\x85",
|
|---|
| 1702 | "\xE1\xBE\x92" => "\xE1\xBC\xA2\xCD\x85",
|
|---|
| 1703 | "\xE1\xBE\x93" => "\xE1\xBC\xA3\xCD\x85",
|
|---|
| 1704 | "\xE1\xBE\x94" => "\xE1\xBC\xA4\xCD\x85",
|
|---|
| 1705 | "\xE1\xBE\x95" => "\xE1\xBC\xA5\xCD\x85",
|
|---|
| 1706 | "\xE1\xBE\x96" => "\xE1\xBC\xA6\xCD\x85",
|
|---|
| 1707 | "\xE1\xBE\x97" => "\xE1\xBC\xA7\xCD\x85",
|
|---|
| 1708 | "\xE1\xBE\x98" => "\xE1\xBC\xA8\xCD\x85",
|
|---|
| 1709 | "\xE1\xBE\x99" => "\xE1\xBC\xA9\xCD\x85",
|
|---|
| 1710 | "\xE1\xBE\x9A" => "\xE1\xBC\xAA\xCD\x85",
|
|---|
| 1711 | "\xE1\xBE\x9B" => "\xE1\xBC\xAB\xCD\x85",
|
|---|
| 1712 | "\xE1\xBE\x9C" => "\xE1\xBC\xAC\xCD\x85",
|
|---|
| 1713 | "\xE1\xBE\x9D" => "\xE1\xBC\xAD\xCD\x85",
|
|---|
| 1714 | "\xE1\xBE\x9E" => "\xE1\xBC\xAE\xCD\x85",
|
|---|
| 1715 | "\xE1\xBE\x9F" => "\xE1\xBC\xAF\xCD\x85",
|
|---|
| 1716 | "\xE1\xBE\xA0" => "\xE1\xBD\xA0\xCD\x85",
|
|---|
| 1717 | "\xE1\xBE\xA1" => "\xE1\xBD\xA1\xCD\x85",
|
|---|
| 1718 | "\xE1\xBE\xA2" => "\xE1\xBD\xA2\xCD\x85",
|
|---|
| 1719 | "\xE1\xBE\xA3" => "\xE1\xBD\xA3\xCD\x85",
|
|---|
| 1720 | "\xE1\xBE\xA4" => "\xE1\xBD\xA4\xCD\x85",
|
|---|
| 1721 | "\xE1\xBE\xA5" => "\xE1\xBD\xA5\xCD\x85",
|
|---|
| 1722 | "\xE1\xBE\xA6" => "\xE1\xBD\xA6\xCD\x85",
|
|---|
| 1723 | "\xE1\xBE\xA7" => "\xE1\xBD\xA7\xCD\x85",
|
|---|
| 1724 | "\xE1\xBE\xA8" => "\xE1\xBD\xA8\xCD\x85",
|
|---|
| 1725 | "\xE1\xBE\xA9" => "\xE1\xBD\xA9\xCD\x85",
|
|---|
| 1726 | "\xE1\xBE\xAA" => "\xE1\xBD\xAA\xCD\x85",
|
|---|
| 1727 | "\xE1\xBE\xAB" => "\xE1\xBD\xAB\xCD\x85",
|
|---|
| 1728 | "\xE1\xBE\xAC" => "\xE1\xBD\xAC\xCD\x85",
|
|---|
| 1729 | "\xE1\xBE\xAD" => "\xE1\xBD\xAD\xCD\x85",
|
|---|
| 1730 | "\xE1\xBE\xAE" => "\xE1\xBD\xAE\xCD\x85",
|
|---|
| 1731 | "\xE1\xBE\xAF" => "\xE1\xBD\xAF\xCD\x85",
|
|---|
| 1732 | "\xE1\xBE\xB2" => "\xE1\xBD\xB0\xCD\x85",
|
|---|
| 1733 | "\xE1\xBE\xB3" => "\xCE\xB1\xCD\x85",
|
|---|
| 1734 | "\xE1\xBE\xB4" => "\xCE\xAC\xCD\x85",
|
|---|
| 1735 | "\xE1\xBE\xB7" => "\xE1\xBE\xB6\xCD\x85",
|
|---|
| 1736 | "\xE1\xBE\xBC" => "\xCE\x91\xCD\x85",
|
|---|
| 1737 | "\xE1\xBF\x82" => "\xE1\xBD\xB4\xCD\x85",
|
|---|
| 1738 | "\xE1\xBF\x83" => "\xCE\xB7\xCD\x85",
|
|---|
| 1739 | "\xE1\xBF\x84" => "\xCE\xAE\xCD\x85",
|
|---|
| 1740 | "\xE1\xBF\x87" => "\xE1\xBF\x86\xCD\x85",
|
|---|
| 1741 | "\xE1\xBF\x8C" => "\xCE\x97\xCD\x85",
|
|---|
| 1742 | "\xE1\xBF\xB2" => "\xE1\xBD\xBC\xCD\x85",
|
|---|
| 1743 | "\xE1\xBF\xB3" => "\xCF\x89\xCD\x85",
|
|---|
| 1744 | "\xE1\xBF\xB4" => "\xCF\x8E\xCD\x85",
|
|---|
| 1745 | "\xE1\xBF\xB7" => "\xE1\xBF\xB6\xCD\x85",
|
|---|
| 1746 | "\xE1\xBF\xBC" => "\xCE\xA9\xCD\x85",
|
|---|
| 1747 | );
|
|---|
| 1748 | global $phpbb_root_path, $phpEx;
|
|---|
| 1749 |
|
|---|
| 1750 | // perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
|
|---|
| 1751 | $text = strtr($text, $ypogegrammeni);
|
|---|
| 1752 |
|
|---|
| 1753 | // do the case fold
|
|---|
| 1754 | $text = utf8_case_fold($text, $option);
|
|---|
| 1755 |
|
|---|
| 1756 | return $text;
|
|---|
| 1757 | }
|
|---|
| 1758 |
|
|---|
| 1759 | /**
|
|---|
| 1760 | * A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
|
|---|
| 1761 | * to be in NFC (Normalization Form Composition).
|
|---|
| 1762 | *
|
|---|
| 1763 | * @param mixed $strings a string or an array of strings to normalize
|
|---|
| 1764 | * @return mixed the normalized content, preserving array keys if array given.
|
|---|
| 1765 | */
|
|---|
| 1766 | function utf8_normalize_nfc($strings)
|
|---|
| 1767 | {
|
|---|
| 1768 | if (empty($strings))
|
|---|
| 1769 | {
|
|---|
| 1770 | return $strings;
|
|---|
| 1771 | }
|
|---|
| 1772 |
|
|---|
| 1773 | if (!class_exists('utf_normalizer'))
|
|---|
| 1774 | {
|
|---|
| 1775 | global $phpbb_root_path, $phpEx;
|
|---|
| 1776 | include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
|
|---|
| 1777 | }
|
|---|
| 1778 |
|
|---|
| 1779 | if (!is_array($strings))
|
|---|
| 1780 | {
|
|---|
| 1781 | utf_normalizer::nfc($strings);
|
|---|
| 1782 | }
|
|---|
| 1783 | else if (is_array($strings))
|
|---|
| 1784 | {
|
|---|
| 1785 | foreach ($strings as $key => $string)
|
|---|
| 1786 | {
|
|---|
| 1787 | if (is_array($string))
|
|---|
| 1788 | {
|
|---|
| 1789 | foreach ($string as $_key => $_string)
|
|---|
| 1790 | {
|
|---|
| 1791 | utf_normalizer::nfc($strings[$key][$_key]);
|
|---|
| 1792 | }
|
|---|
| 1793 | }
|
|---|
| 1794 | else
|
|---|
| 1795 | {
|
|---|
| 1796 | utf_normalizer::nfc($strings[$key]);
|
|---|
| 1797 | }
|
|---|
| 1798 | }
|
|---|
| 1799 | }
|
|---|
| 1800 |
|
|---|
| 1801 | return $strings;
|
|---|
| 1802 | }
|
|---|
| 1803 |
|
|---|
| 1804 | /**
|
|---|
| 1805 | * This function is used to generate a "clean" version of a string.
|
|---|
| 1806 | * Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
|
|---|
| 1807 | * Additionally a homographs of one character are transformed into one specific character (preferably ASCII
|
|---|
| 1808 | * if it is an ASCII character).
|
|---|
| 1809 | *
|
|---|
| 1810 | * Please be aware that if you change something within this function or within
|
|---|
| 1811 | * functions used here you need to rebuild/update the username_clean column in the users table. And all other
|
|---|
| 1812 | * columns that store a clean string otherwise you will break this functionality.
|
|---|
| 1813 | *
|
|---|
| 1814 | * @param string $text An unclean string, mabye user input (has to be valid UTF-8!)
|
|---|
| 1815 | * @return string Cleaned up version of the input string
|
|---|
| 1816 | */
|
|---|
| 1817 | function utf8_clean_string($text)
|
|---|
| 1818 | {
|
|---|
| 1819 | global $phpbb_root_path, $phpEx;
|
|---|
| 1820 |
|
|---|
| 1821 | static $homographs = array();
|
|---|
| 1822 | if (empty($homographs))
|
|---|
| 1823 | {
|
|---|
| 1824 | $homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
|
|---|
| 1825 | }
|
|---|
| 1826 |
|
|---|
| 1827 | $text = utf8_case_fold_nfkc($text);
|
|---|
| 1828 | $text = strtr($text, $homographs);
|
|---|
| 1829 | // Other control characters
|
|---|
| 1830 | $text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
|
|---|
| 1831 |
|
|---|
| 1832 | // we need to reduce multiple spaces to a single one
|
|---|
| 1833 | $text = preg_replace('# {2,}#', ' ', $text);
|
|---|
| 1834 |
|
|---|
| 1835 | // we can use trim here as all the other space characters should have been turned
|
|---|
| 1836 | // into normal ASCII spaces by now
|
|---|
| 1837 | return trim($text);
|
|---|
| 1838 | }
|
|---|
| 1839 |
|
|---|
| 1840 | /**
|
|---|
| 1841 | * A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
|
|---|
| 1842 | */
|
|---|
| 1843 | function utf8_htmlspecialchars($value)
|
|---|
| 1844 | {
|
|---|
| 1845 | return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
|
|---|
| 1846 | }
|
|---|
| 1847 |
|
|---|
| 1848 | /**
|
|---|
| 1849 | * Trying to convert returned system message to utf8
|
|---|
| 1850 | *
|
|---|
| 1851 | * PHP assumes such messages are ISO-8859-1 so we'll do that too
|
|---|
| 1852 | * and if it breaks messages we'll blame it on them ;-)
|
|---|
| 1853 | */
|
|---|
| 1854 | function utf8_convert_message($message)
|
|---|
| 1855 | {
|
|---|
| 1856 | // First of all check if conversion is neded at all, as there is no point
|
|---|
| 1857 | // in converting ASCII messages from ISO-8859-1 to UTF-8
|
|---|
| 1858 | if (!preg_match('/[\x80-\xFF]/', $message))
|
|---|
| 1859 | {
|
|---|
| 1860 | return utf8_htmlspecialchars($message);
|
|---|
| 1861 | }
|
|---|
| 1862 |
|
|---|
| 1863 | // else we need to convert some part of the message
|
|---|
| 1864 | return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
|
|---|
| 1865 | }
|
|---|
| 1866 |
|
|---|
| 1867 | /**
|
|---|
| 1868 | * UTF8-compatible wordwrap replacement
|
|---|
| 1869 | *
|
|---|
| 1870 | * @param string $string The input string
|
|---|
| 1871 | * @param int $width The column width. Defaults to 75.
|
|---|
| 1872 | * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
|
|---|
| 1873 | * @param bool $cut If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
|
|---|
| 1874 | *
|
|---|
| 1875 | * @return string the given string wrapped at the specified column.
|
|---|
| 1876 | *
|
|---|
| 1877 | */
|
|---|
| 1878 | function utf8_wordwrap($string, $width = 75, $break = "\n", $cut = false)
|
|---|
| 1879 | {
|
|---|
| 1880 | // We first need to explode on $break, not destroying existing (intended) breaks
|
|---|
| 1881 | $lines = explode($break, $string);
|
|---|
| 1882 | $new_lines = array(0 => '');
|
|---|
| 1883 | $index = 0;
|
|---|
| 1884 |
|
|---|
| 1885 | foreach ($lines as $line)
|
|---|
| 1886 | {
|
|---|
| 1887 | $words = explode(' ', $line);
|
|---|
| 1888 |
|
|---|
| 1889 | for ($i = 0, $size = sizeof($words); $i < $size; $i++)
|
|---|
| 1890 | {
|
|---|
| 1891 | $word = $words[$i];
|
|---|
| 1892 |
|
|---|
| 1893 | // If cut is true we need to cut the word if it is > width chars
|
|---|
| 1894 | if ($cut && utf8_strlen($word) > $width)
|
|---|
| 1895 | {
|
|---|
| 1896 | $words[$i] = utf8_substr($word, $width);
|
|---|
| 1897 | $word = utf8_substr($word, 0, $width);
|
|---|
| 1898 | $i--;
|
|---|
| 1899 | }
|
|---|
| 1900 |
|
|---|
| 1901 | if (utf8_strlen($new_lines[$index] . $word) > $width)
|
|---|
| 1902 | {
|
|---|
| 1903 | $new_lines[$index] = substr($new_lines[$index], 0, -1);
|
|---|
| 1904 | $index++;
|
|---|
| 1905 | $new_lines[$index] = '';
|
|---|
| 1906 | }
|
|---|
| 1907 |
|
|---|
| 1908 | $new_lines[$index] .= $word . ' ';
|
|---|
| 1909 | }
|
|---|
| 1910 |
|
|---|
| 1911 | $new_lines[$index] = substr($new_lines[$index], 0, -1);
|
|---|
| 1912 | $index++;
|
|---|
| 1913 | $new_lines[$index] = '';
|
|---|
| 1914 | }
|
|---|
| 1915 |
|
|---|
| 1916 | unset($new_lines[$index]);
|
|---|
| 1917 | return implode($break, $new_lines);
|
|---|
| 1918 | }
|
|---|
| 1919 |
|
|---|
| 1920 | /**
|
|---|
| 1921 | * UTF8-safe basename() function
|
|---|
| 1922 | *
|
|---|
| 1923 | * basename() has some limitations and is dependent on the locale setting
|
|---|
| 1924 | * according to the PHP manual. Therefore we provide our own locale independant
|
|---|
| 1925 | * basename function.
|
|---|
| 1926 | *
|
|---|
| 1927 | * @param string $filename The filename basename() should be applied to
|
|---|
| 1928 | * @return string The basenamed filename
|
|---|
| 1929 | */
|
|---|
| 1930 | function utf8_basename($filename)
|
|---|
| 1931 | {
|
|---|
| 1932 | // We always check for forward slash AND backward slash
|
|---|
| 1933 | // because they could be mixed or "sneaked" in. ;)
|
|---|
| 1934 | // You know, never trust user input...
|
|---|
| 1935 | if (strpos($filename, '/') !== false)
|
|---|
| 1936 | {
|
|---|
| 1937 | $filename = utf8_substr($filename, utf8_strrpos($filename, '/') + 1);
|
|---|
| 1938 | }
|
|---|
| 1939 |
|
|---|
| 1940 | if (strpos($filename, '\\') !== false)
|
|---|
| 1941 | {
|
|---|
| 1942 | $filename = utf8_substr($filename, utf8_strrpos($filename, '\\') + 1);
|
|---|
| 1943 | }
|
|---|
| 1944 |
|
|---|
| 1945 | return $filename;
|
|---|
| 1946 | }
|
|---|
| 1947 |
|
|---|
| 1948 | /**
|
|---|
| 1949 | * UTF8-safe str_replace() function
|
|---|
| 1950 | *
|
|---|
| 1951 | * @param string $search The value to search for
|
|---|
| 1952 | * @param string $replace The replacement string
|
|---|
| 1953 | * @param string $subject The target string
|
|---|
| 1954 | * @return string The resultant string
|
|---|
| 1955 | */
|
|---|
| 1956 | function utf8_str_replace($search, $replace, $subject)
|
|---|
| 1957 | {
|
|---|
| 1958 | if (!is_array($search))
|
|---|
| 1959 | {
|
|---|
| 1960 | $search = array($search);
|
|---|
| 1961 | if (is_array($replace))
|
|---|
| 1962 | {
|
|---|
| 1963 | $replace = (string) $replace;
|
|---|
| 1964 | trigger_error('Array to string conversion', E_USER_NOTICE);
|
|---|
| 1965 | }
|
|---|
| 1966 | }
|
|---|
| 1967 |
|
|---|
| 1968 | $length = sizeof($search);
|
|---|
| 1969 |
|
|---|
| 1970 | if (!is_array($replace))
|
|---|
| 1971 | {
|
|---|
| 1972 | $replace = array_fill(0, $length, $replace);
|
|---|
| 1973 | }
|
|---|
| 1974 | else
|
|---|
| 1975 | {
|
|---|
| 1976 | $replace = array_pad($replace, $length, '');
|
|---|
| 1977 | }
|
|---|
| 1978 |
|
|---|
| 1979 | for ($i = 0; $i < $length; $i++)
|
|---|
| 1980 | {
|
|---|
| 1981 | $search_length = utf8_strlen($search[$i]);
|
|---|
| 1982 | $replace_length = utf8_strlen($replace[$i]);
|
|---|
| 1983 |
|
|---|
| 1984 | $offset = 0;
|
|---|
| 1985 | while (($start = utf8_strpos($subject, $search[$i], $offset)) !== false)
|
|---|
| 1986 | {
|
|---|
| 1987 | $subject = utf8_substr($subject, 0, $start) . $replace[$i] . utf8_substr($subject, $start + $search_length);
|
|---|
| 1988 | $offset = $start + $replace_length;
|
|---|
| 1989 | }
|
|---|
| 1990 | }
|
|---|
| 1991 |
|
|---|
| 1992 | return $subject;
|
|---|
| 1993 | }
|
|---|
| 1994 |
|
|---|
| 1995 | ?>
|
|---|