1 | unit BGRAUTF8;
|
---|
2 |
|
---|
3 | {$mode objfpc}{$H+}
|
---|
4 | {$i bgrabitmap.inc}
|
---|
5 |
|
---|
6 | interface
|
---|
7 |
|
---|
8 | uses
|
---|
9 | Classes, SysUtils, BGRAUnicode{$IFDEF BGRABITMAP_USE_LCL}, lazutf8classes{$ENDIF};
|
---|
10 |
|
---|
11 | {$IFDEF BGRABITMAP_USE_LCL}
|
---|
12 | type
|
---|
13 | TFileStreamUTF8 = lazutf8classes.TFileStreamUTF8;
|
---|
14 | TStringListUTF8 = lazutf8classes.TStringListUTF8;
|
---|
15 | {$ELSE}
|
---|
16 | type
|
---|
17 | TFileStreamUTF8 = class(THandleStream)
|
---|
18 | private
|
---|
19 | FFileName: utf8string;
|
---|
20 | public
|
---|
21 | constructor Create(const AFileName: utf8string; Mode: Word); overload;
|
---|
22 | constructor Create(const AFileName: utf8string; Mode: Word; Rights: Cardinal); overload;
|
---|
23 | destructor Destroy; override;
|
---|
24 | property FileName: utf8string Read FFilename;
|
---|
25 | end;
|
---|
26 |
|
---|
27 | TStringListUTF8 = class(TStringList)
|
---|
28 | protected
|
---|
29 | function DoCompareText(const s1,s2 : string) : PtrInt; override;
|
---|
30 | public
|
---|
31 | procedure LoadFromFile(const FileName: string); override;
|
---|
32 | procedure SaveToFile(const FileName: string); override;
|
---|
33 | end;
|
---|
34 | {$ENDIF}
|
---|
35 |
|
---|
36 | procedure LoadStringsFromFileUTF8(List: TStrings; const FileName: string);
|
---|
37 | procedure SaveStringsToFileUTF8(List: TStrings; const FileName: string);
|
---|
38 |
|
---|
39 | function UTF8ToSys(const s: string): string;
|
---|
40 | function SysToUTF8(const s: string): string;
|
---|
41 |
|
---|
42 | function UTF8LowerCase(const s: string): string;
|
---|
43 | function UTF8UpperCase(const s: string): string;
|
---|
44 |
|
---|
45 | function UTF8CompareStr(const S1, S2: string): Integer;
|
---|
46 | function UTF8CompareText(const S1, S2: string): Integer;
|
---|
47 |
|
---|
48 | function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
---|
49 |
|
---|
50 | function FileOpenUTF8(Const FileName : string; Mode : Integer) : THandle;
|
---|
51 | function FileCreateUTF8(Const FileName : string) : THandle; overload;
|
---|
52 | function FileCreateUTF8(Const FileName : string; Rights: Cardinal) : THandle; overload;
|
---|
53 | function FileExistsUTF8(Const FileName : string): boolean;
|
---|
54 | function FindFirstUTF8(const Path: string; Attr: Longint; out Rslt: TSearchRec): Longint;
|
---|
55 | function FindNextUTF8(var Rslt: TSearchRec): Longint;
|
---|
56 | procedure FindCloseUTF8(var F: TSearchrec);
|
---|
57 |
|
---|
58 | type
|
---|
59 | string4 = string[4];
|
---|
60 |
|
---|
61 | function UTF8CharacterLength(p: PChar): integer;
|
---|
62 | function UTF8Length(const s: string): PtrInt; overload;
|
---|
63 | function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; overload;
|
---|
64 | function UnicodeCharToUTF8(u: cardinal): string4;
|
---|
65 | function UTF8ReverseString(const s: string): string;
|
---|
66 | function UTF8CodepointToUnicode(p: PChar; ACodePointLen: integer): cardinal;
|
---|
67 |
|
---|
68 | type
|
---|
69 | TBidiUTF8Info = packed record
|
---|
70 | Offset: Integer;
|
---|
71 | BidiInfo: TUnicodeBidiInfo;
|
---|
72 | end;
|
---|
73 | TBidiUTF8Array = packed array of TBidiUTF8Info;
|
---|
74 | TUnicodeDisplayOrder = BGRAUnicode.TUnicodeDisplayOrder;
|
---|
75 |
|
---|
76 | function GetBidiClassUTF8(P: PChar): TUnicodeBidiClass;
|
---|
77 | function GetFirstStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
|
---|
78 | function GetLastStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
|
---|
79 | function IsRightToLeftUTF8(const sUTF8: string): boolean;
|
---|
80 | function IsZeroWidthUTF8(const sUTF8: string): boolean;
|
---|
81 | function AddParagraphBidiUTF8(s: string; ARightToLeft: boolean): string;
|
---|
82 | function AnalyzeBidiUTF8(const sUTF8: string; ARightToLeft: boolean): TBidiUTF8Array; overload;
|
---|
83 | function AnalyzeBidiUTF8(const sUTF8: string): TBidiUTF8Array; overload;
|
---|
84 | function GetUTF8DisplayOrder(const ABidi: TBidiUTF8Array): TUnicodeDisplayOrder;
|
---|
85 | function ContainsBidiIsolateOrFormattingUTF8(const sUTF8: string): boolean;
|
---|
86 |
|
---|
87 | function UTF8OverrideDirection(const sUTF8: string; ARightToLeft: boolean): string;
|
---|
88 | function UTF8EmbedDirection(const sUTF8: string; ARightToLeft: boolean): string;
|
---|
89 |
|
---|
90 | //little endian stream functions
|
---|
91 | function LEReadInt64(Stream: TStream): int64;
|
---|
92 | procedure LEWriteInt64(Stream: TStream; AValue: int64);
|
---|
93 | function LEReadLongint(Stream: TStream): longint;
|
---|
94 | procedure LEWriteLongint(Stream: TStream; AValue: LongInt);
|
---|
95 | function LEReadByte(Stream: TStream): byte;
|
---|
96 | procedure LEWriteByte(Stream: TStream; AValue: Byte);
|
---|
97 | function LEReadSingle(Stream: TStream): single;
|
---|
98 | procedure LEWriteSingle(Stream: TStream; AValue: single);
|
---|
99 |
|
---|
100 | implementation
|
---|
101 |
|
---|
102 | {$IFDEF BGRABITMAP_USE_LCL}
|
---|
103 | uses LazFileUtils, LazUtf8;
|
---|
104 |
|
---|
105 | procedure LoadStringsFromFileUTF8(List: TStrings; const FileName: string);
|
---|
106 | begin
|
---|
107 | lazutf8classes.LoadStringsFromFileUTF8(List,FileName);
|
---|
108 | end;
|
---|
109 |
|
---|
110 | procedure SaveStringsToFileUTF8(List: TStrings; const FileName: string);
|
---|
111 | begin
|
---|
112 | lazutf8classes.SaveStringsToFileUTF8(List,FileName);
|
---|
113 | end;
|
---|
114 |
|
---|
115 | function UTF8ToSys(const s: string): string;
|
---|
116 | begin
|
---|
117 | result := LazUtf8.UTF8ToSys(s);
|
---|
118 | end;
|
---|
119 |
|
---|
120 | function SysToUTF8(const s: string): string;
|
---|
121 | begin
|
---|
122 | result := LazUtf8.SysToUTF8(s);
|
---|
123 | end;
|
---|
124 |
|
---|
125 | function UTF8LowerCase(const s: string): string;
|
---|
126 | begin
|
---|
127 | result := LazUtf8.UTF8LowerCase(s);
|
---|
128 | end;
|
---|
129 |
|
---|
130 | function UTF8UpperCase(const s: string): string;
|
---|
131 | begin
|
---|
132 | result := LazUtf8.UTF8UpperCase(s);
|
---|
133 | end;
|
---|
134 |
|
---|
135 | function UTF8CompareStr(const S1, S2: string): Integer;
|
---|
136 | begin
|
---|
137 | result := LazUtf8.UTF8CompareStr(S1,S2);
|
---|
138 | end;
|
---|
139 |
|
---|
140 | function UTF8CompareText(const S1, S2: string): Integer;
|
---|
141 | begin
|
---|
142 | result := LazUtf8.UTF8CompareText(S1,S2);
|
---|
143 | end;
|
---|
144 |
|
---|
145 | function FileOpenUTF8(Const FileName : string; Mode : Integer) : THandle;
|
---|
146 | begin
|
---|
147 | result := LazFileUtils.FileOpenUTF8(FileName, Mode);
|
---|
148 | end;
|
---|
149 |
|
---|
150 | function FileCreateUTF8(Const FileName : string) : THandle; overload;
|
---|
151 | begin
|
---|
152 | result := LazFileUtils.FileCreateUTF8(FileName);
|
---|
153 | end;
|
---|
154 |
|
---|
155 | function FileCreateUTF8(Const FileName : string; Rights: Cardinal) : THandle; overload;
|
---|
156 | begin
|
---|
157 | result := LazFileUtils.FileCreateUTF8(FileName, Rights);
|
---|
158 | end;
|
---|
159 |
|
---|
160 | function FileExistsUTF8(Const FileName : string): boolean;
|
---|
161 | begin
|
---|
162 | result := LazFileUtils.FileExistsUTF8(FileName);
|
---|
163 | end;
|
---|
164 |
|
---|
165 | function FindFirstUTF8(const Path: string; Attr: Longint; out Rslt: TSearchRec
|
---|
166 | ): Longint;
|
---|
167 | begin
|
---|
168 | result := LazFileUtils.FindFirstUTF8(Path,Attr,Rslt);
|
---|
169 | end;
|
---|
170 |
|
---|
171 | function FindNextUTF8(var Rslt: TSearchRec): Longint;
|
---|
172 | begin
|
---|
173 | result := LazFileUtils.FindNextUTF8(Rslt);
|
---|
174 | end;
|
---|
175 |
|
---|
176 | procedure FindCloseUTF8(var F: TSearchrec);
|
---|
177 | begin
|
---|
178 | LazFileUtils.FindCloseUTF8(F);
|
---|
179 | end;
|
---|
180 |
|
---|
181 | function UTF8CharacterLength(p: PChar): integer;
|
---|
182 | begin
|
---|
183 | result := LazUtf8.UTF8CharacterLength(p);
|
---|
184 | end;
|
---|
185 |
|
---|
186 | function UTF8Length(const s: string): PtrInt;
|
---|
187 | begin
|
---|
188 | result := LazUtf8.UTF8Length(s);
|
---|
189 | end;
|
---|
190 |
|
---|
191 | function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
|
---|
192 | begin
|
---|
193 | result := LazUtf8.UTF8Length(p, ByteCount);
|
---|
194 | end;
|
---|
195 |
|
---|
196 | function UnicodeCharToUTF8(u: cardinal): string4;
|
---|
197 | begin
|
---|
198 | result := LazUtf8.UnicodeToUTF8(u);
|
---|
199 | end;
|
---|
200 |
|
---|
201 | {$ELSE}
|
---|
202 |
|
---|
203 | procedure LoadStringsFromFileUTF8(List: TStrings; const FileName: string);
|
---|
204 | var
|
---|
205 | uList: TStringListUTF8;
|
---|
206 | begin
|
---|
207 | if List is TStringListUTF8 then
|
---|
208 | begin
|
---|
209 | List.LoadFromFile(FileName);
|
---|
210 | exit;
|
---|
211 | end;
|
---|
212 | uList:=TStringListUTF8.Create;
|
---|
213 | try
|
---|
214 | uList.LoadFromFile(FileName);
|
---|
215 | List.Assign(uList);
|
---|
216 | finally
|
---|
217 | uList.Free;
|
---|
218 | end;
|
---|
219 | end;
|
---|
220 |
|
---|
221 | procedure SaveStringsToFileUTF8(List: TStrings; const FileName: string);
|
---|
222 | var
|
---|
223 | uList: TStringListUTF8;
|
---|
224 | begin
|
---|
225 | if List is TStringListUTF8 then
|
---|
226 | begin
|
---|
227 | List.SaveToFile(FileName);
|
---|
228 | exit;
|
---|
229 | end;
|
---|
230 | uList:=TStringListUTF8.Create;
|
---|
231 | try
|
---|
232 | uList.Assign(List);
|
---|
233 | uList.SaveToFile(FileName);
|
---|
234 | finally
|
---|
235 | uList.Free;
|
---|
236 | end;
|
---|
237 | end;
|
---|
238 |
|
---|
239 | function UTF8LowerCase(const s: string): string;
|
---|
240 | begin
|
---|
241 | result := UTF8Encode(UnicodeLowerCase(UTF8Decode(s)));
|
---|
242 | end;
|
---|
243 |
|
---|
244 | function UTF8UpperCase(const s: string): string;
|
---|
245 | begin
|
---|
246 | result := UTF8Encode(UnicodeUpperCase(UTF8Decode(s)));
|
---|
247 | end;
|
---|
248 |
|
---|
249 | function UTF8CompareStr(const S1, S2: string): Integer;
|
---|
250 | begin
|
---|
251 | Result := SysUtils.CompareStr(S1, S2);
|
---|
252 | end;
|
---|
253 |
|
---|
254 | function UTF8CompareText(const S1, S2: string): Integer;
|
---|
255 | begin
|
---|
256 | Result := UnicodeCompareText(UTF8Decode(S1), UTF8Decode(S2));
|
---|
257 | end;
|
---|
258 |
|
---|
259 | function FileOpenUTF8(const FileName: string; Mode: Integer): THandle;
|
---|
260 | begin
|
---|
261 | result := FileOpen(UTF8ToSys(FileName),Mode);
|
---|
262 | end;
|
---|
263 |
|
---|
264 | function FileCreateUTF8(const FileName: string): THandle;
|
---|
265 | begin
|
---|
266 | result := FileCreate(UTF8ToSys(FileName));
|
---|
267 | end;
|
---|
268 |
|
---|
269 | function FileCreateUTF8(const FileName: string; Rights: Cardinal): THandle;
|
---|
270 | begin
|
---|
271 | result := FileCreate(UTF8ToSys(FileName),Rights);
|
---|
272 | end;
|
---|
273 |
|
---|
274 | function FileExistsUTF8(const FileName: string): boolean;
|
---|
275 | begin
|
---|
276 | result := FileExists(UTF8ToSys(FileName));
|
---|
277 | end;
|
---|
278 |
|
---|
279 | function FindFirstUTF8(const Path: string; Attr: Longint; out Rslt: TSearchRec
|
---|
280 | ): Longint;
|
---|
281 | begin
|
---|
282 | result := FindFirst(UTF8ToSys(Path),Attr,Rslt);
|
---|
283 | Rslt.Name := SysToUTF8(Rslt.Name);
|
---|
284 | end;
|
---|
285 |
|
---|
286 | function FindNextUTF8(var Rslt: TSearchRec): Longint;
|
---|
287 | begin
|
---|
288 | result := FindNext(Rslt);
|
---|
289 | if result = 0 then
|
---|
290 | Rslt.Name := SysToUTF8(Rslt.Name);
|
---|
291 | end;
|
---|
292 |
|
---|
293 | procedure FindCloseUTF8(var F: TSearchrec);
|
---|
294 | begin
|
---|
295 | FindClose(F);
|
---|
296 | end;
|
---|
297 |
|
---|
298 | function UTF8ToSys(const s: string): string;
|
---|
299 | begin
|
---|
300 | result := Utf8ToAnsi(s);
|
---|
301 | end;
|
---|
302 |
|
---|
303 | function SysToUTF8(const s: string): string;
|
---|
304 | begin
|
---|
305 | result := AnsiToUtf8(s);
|
---|
306 | end;
|
---|
307 |
|
---|
308 | function UTF8CharacterLength(p: PChar): integer;
|
---|
309 | begin
|
---|
310 | if p<>nil then begin
|
---|
311 | if ord(p^)<%11000000 then begin
|
---|
312 | // regular single byte character (#0 is a character, this is pascal ;)
|
---|
313 | Result:=1;
|
---|
314 | end
|
---|
315 | else begin
|
---|
316 | // multi byte
|
---|
317 | if ((ord(p^) and %11100000) = %11000000) then begin
|
---|
318 | // could be 2 byte character
|
---|
319 | if (ord(p[1]) and %11000000) = %10000000 then
|
---|
320 | Result:=2
|
---|
321 | else
|
---|
322 | Result:=1;
|
---|
323 | end
|
---|
324 | else if ((ord(p^) and %11110000) = %11100000) then begin
|
---|
325 | // could be 3 byte character
|
---|
326 | if ((ord(p[1]) and %11000000) = %10000000)
|
---|
327 | and ((ord(p[2]) and %11000000) = %10000000) then
|
---|
328 | Result:=3
|
---|
329 | else
|
---|
330 | Result:=1;
|
---|
331 | end
|
---|
332 | else if ((ord(p^) and %11111000) = %11110000) then begin
|
---|
333 | // could be 4 byte character
|
---|
334 | if ((ord(p[1]) and %11000000) = %10000000)
|
---|
335 | and ((ord(p[2]) and %11000000) = %10000000)
|
---|
336 | and ((ord(p[3]) and %11000000) = %10000000) then
|
---|
337 | Result:=4
|
---|
338 | else
|
---|
339 | Result:=1;
|
---|
340 | end
|
---|
341 | else
|
---|
342 | Result:=1;
|
---|
343 | end;
|
---|
344 | end else
|
---|
345 | Result:=0;
|
---|
346 | end;
|
---|
347 |
|
---|
348 | function UTF8Length(const s: string): PtrInt;
|
---|
349 | begin
|
---|
350 | Result:=UTF8Length(PChar(s),length(s));
|
---|
351 | end;
|
---|
352 |
|
---|
353 | function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
|
---|
354 | var
|
---|
355 | CharLen: LongInt;
|
---|
356 | begin
|
---|
357 | Result:=0;
|
---|
358 | while (ByteCount>0) do begin
|
---|
359 | inc(Result);
|
---|
360 | CharLen:=UTF8CharacterLength(p);
|
---|
361 | inc(p,CharLen);
|
---|
362 | dec(ByteCount,CharLen);
|
---|
363 | end;
|
---|
364 | end;
|
---|
365 |
|
---|
366 | function UnicodeToUTF8Inline(CodePoint: cardinal; Buf: PChar): integer;
|
---|
367 | begin
|
---|
368 | case CodePoint of
|
---|
369 | 0..$7f:
|
---|
370 | begin
|
---|
371 | Result:=1;
|
---|
372 | Buf[0]:=char(byte(CodePoint));
|
---|
373 | end;
|
---|
374 | $80..$7ff:
|
---|
375 | begin
|
---|
376 | Result:=2;
|
---|
377 | Buf[0]:=char(byte($c0 or (CodePoint shr 6)));
|
---|
378 | Buf[1]:=char(byte($80 or (CodePoint and $3f)));
|
---|
379 | end;
|
---|
380 | $800..$ffff:
|
---|
381 | begin
|
---|
382 | Result:=3;
|
---|
383 | Buf[0]:=char(byte($e0 or (CodePoint shr 12)));
|
---|
384 | Buf[1]:=char(byte((CodePoint shr 6) and $3f) or $80);
|
---|
385 | Buf[2]:=char(byte(CodePoint and $3f) or $80);
|
---|
386 | end;
|
---|
387 | $10000..$10ffff:
|
---|
388 | begin
|
---|
389 | Result:=4;
|
---|
390 | Buf[0]:=char(byte($f0 or (CodePoint shr 18)));
|
---|
391 | Buf[1]:=char(byte((CodePoint shr 12) and $3f) or $80);
|
---|
392 | Buf[2]:=char(byte((CodePoint shr 6) and $3f) or $80);
|
---|
393 | Buf[3]:=char(byte(CodePoint and $3f) or $80);
|
---|
394 | end;
|
---|
395 | else
|
---|
396 | Result:=0;
|
---|
397 | end;
|
---|
398 | end;
|
---|
399 |
|
---|
400 | function UnicodeCharToUTF8(u: cardinal): string4;
|
---|
401 | begin
|
---|
402 | result[0] := chr(UnicodeToUTF8Inline(u,@result[1]));
|
---|
403 | end;
|
---|
404 |
|
---|
405 | constructor TFileStreamUTF8.Create(const AFileName: utf8string; Mode: Word);
|
---|
406 | var
|
---|
407 | lHandle: THandle;
|
---|
408 | begin
|
---|
409 | FFileName:= AFileName;
|
---|
410 | if Mode = fmcreate then
|
---|
411 | lHandle:= FileCreateUTF8(AFileName)
|
---|
412 | else
|
---|
413 | lHandle:= FileOpenUTF8(AFileName, Mode);
|
---|
414 |
|
---|
415 | If (THandle(lHandle)=feInvalidHandle) then
|
---|
416 | begin
|
---|
417 | if Mode = fmCreate then
|
---|
418 | raise EFCreateError.createfmt({SFCreateError}'Unable to create file "%s"', [AFileName])
|
---|
419 | else
|
---|
420 | raise EFOpenError.Createfmt({SFOpenError}'Unable to open file "%s"', [AFilename]);
|
---|
421 | end
|
---|
422 | else
|
---|
423 | inherited Create(lHandle);
|
---|
424 | end;
|
---|
425 |
|
---|
426 | constructor TFileStreamUTF8.Create(const AFileName: utf8string; Mode: Word; Rights: Cardinal);
|
---|
427 | var
|
---|
428 | lHandle: THandle;
|
---|
429 | begin
|
---|
430 | FFileName:=AFileName;
|
---|
431 | if Mode=fmcreate then
|
---|
432 | lHandle:=FileCreateUTF8(AFileName,Rights)
|
---|
433 | else
|
---|
434 | lHandle:=FileOpenUTF8(AFileName,Mode);
|
---|
435 |
|
---|
436 | if (THandle(lHandle)=feInvalidHandle) then
|
---|
437 | begin
|
---|
438 | if Mode=fmcreate then
|
---|
439 | raise EFCreateError.createfmt({SFCreateError}'Unable to create file "%s"',[AFileName])
|
---|
440 | else
|
---|
441 | raise EFOpenError.Createfmt({SFOpenError}'Unable to open file "%s"',[AFilename]);
|
---|
442 | end
|
---|
443 | else
|
---|
444 | inherited Create(lHandle);
|
---|
445 | end;
|
---|
446 |
|
---|
447 | destructor TFileStreamUTF8.Destroy;
|
---|
448 | begin
|
---|
449 | FileClose(Handle);
|
---|
450 | end;
|
---|
451 |
|
---|
452 | function TStringListUTF8.DoCompareText(const s1, s2: string): PtrInt;
|
---|
453 | begin
|
---|
454 | if CaseSensitive then
|
---|
455 | Result:= UTF8CompareStr(s1,s2)
|
---|
456 | else
|
---|
457 | Result:= UTF8CompareText(s1,s2);
|
---|
458 | end;
|
---|
459 |
|
---|
460 | procedure TStringListUTF8.LoadFromFile(const FileName: string);
|
---|
461 | var
|
---|
462 | TheStream: TFileStreamUTF8;
|
---|
463 | begin
|
---|
464 | TheStream:= TFileStreamUTF8.Create(FileName, fmOpenRead or fmShareDenyWrite);
|
---|
465 | try
|
---|
466 | LoadFromStream(TheStream);
|
---|
467 | finally
|
---|
468 | TheStream.Free;
|
---|
469 | end;
|
---|
470 | end;
|
---|
471 |
|
---|
472 | procedure TStringListUTF8.SaveToFile(const FileName: string);
|
---|
473 | var
|
---|
474 | TheStream: TFileStreamUTF8;
|
---|
475 | begin
|
---|
476 | TheStream:=TFileStreamUTF8.Create(FileName,fmCreate);
|
---|
477 | try
|
---|
478 | SaveToStream(TheStream);
|
---|
479 | finally
|
---|
480 | TheStream.Free;
|
---|
481 | end;
|
---|
482 | end;
|
---|
483 |
|
---|
484 | {$ENDIF}
|
---|
485 |
|
---|
486 | function UTF8ReverseString(const s: string): string;
|
---|
487 | var
|
---|
488 | pSrc,pDest,pEnd: PChar;
|
---|
489 | charLen: Integer;
|
---|
490 | begin
|
---|
491 | if s = '' then
|
---|
492 | begin
|
---|
493 | result := '';
|
---|
494 | exit;
|
---|
495 | end;
|
---|
496 | setlength(result, length(s));
|
---|
497 | pDest := @result[1] + length(result);
|
---|
498 | pSrc := @s[1];
|
---|
499 | pEnd := pSrc+length(s);
|
---|
500 | while pSrc < pEnd do
|
---|
501 | begin
|
---|
502 | charLen := UTF8CharacterLength(pSrc);
|
---|
503 | if (charLen = 0) or (pSrc+charLen > pEnd) then break;
|
---|
504 | dec(pDest, charLen);
|
---|
505 | move(pSrc^, pDest^, charLen);
|
---|
506 | inc(pSrc, charLen);
|
---|
507 | end;
|
---|
508 | end;
|
---|
509 |
|
---|
510 | function UTF8CodepointToUnicode(p: PChar; ACodePointLen: integer): cardinal;
|
---|
511 | begin
|
---|
512 | case ACodePointLen of
|
---|
513 | 0: result := 0;
|
---|
514 | 1: result := ord(p^);
|
---|
515 | 2: result := ((ord(p^) and %00011111) shl 6) or (ord(p[1]) and %00111111);
|
---|
516 | 3: result := ((ord(p^) and %00011111) shl 12) or ((ord(p[1]) and %00111111) shl 6)
|
---|
517 | or (ord(p[2]) and %00111111);
|
---|
518 | 4: result := ((ord(p^) and %00001111) shl 18) or ((ord(p[1]) and %00111111) shl 12)
|
---|
519 | or ((ord(p[2]) and %00111111) shl 6) or (ord(p[3]) and %00111111);
|
---|
520 | else
|
---|
521 | raise exception.Create('Invalid code point length');
|
---|
522 | end;
|
---|
523 | end;
|
---|
524 |
|
---|
525 | function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
---|
526 | var
|
---|
527 | CharLen: LongInt;
|
---|
528 | begin
|
---|
529 | Result:=UTF8Str;
|
---|
530 | if Result<>nil then begin
|
---|
531 | while (CharIndex>0) and (Len>0) do begin
|
---|
532 | CharLen:=UTF8CharacterLength(Result);
|
---|
533 | dec(Len,CharLen);
|
---|
534 | dec(CharIndex);
|
---|
535 | inc(Result,CharLen);
|
---|
536 | end;
|
---|
537 | if (CharIndex<>0) or (Len<0) then
|
---|
538 | Result:=nil;
|
---|
539 | end;
|
---|
540 | end;
|
---|
541 |
|
---|
542 | function GetBidiClassUTF8(P: PChar): TUnicodeBidiClass;
|
---|
543 | begin
|
---|
544 | result := GetUnicodeBidiClass(UTF8CodepointToUnicode(P, UTF8CharacterLength(p)));
|
---|
545 | end;
|
---|
546 |
|
---|
547 | function GetFirstStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
|
---|
548 | var
|
---|
549 | p,pEnd: PChar;
|
---|
550 | charLen: Integer;
|
---|
551 | u: Cardinal;
|
---|
552 | curBidi: TUnicodeBidiClass;
|
---|
553 | isolateNesting: integer;
|
---|
554 | begin
|
---|
555 | if sUTF8 = '' then exit(ubcUnknown);
|
---|
556 | p := @sUTF8[1];
|
---|
557 | pEnd := p + length(sUTF8);
|
---|
558 | isolateNesting:= 0;
|
---|
559 | while p < pEnd do
|
---|
560 | begin
|
---|
561 | charLen := UTF8CharacterLength(p);
|
---|
562 | if (charLen = 0) or (p+charLen > pEnd) then break;
|
---|
563 | u := UTF8CodepointToUnicode(p, charLen);
|
---|
564 | case u of
|
---|
565 | UNICODE_POP_DIRECTIONAL_ISOLATE: if isolateNesting > 0 then dec(isolateNesting);
|
---|
566 | end;
|
---|
567 | curBidi := GetUnicodeBidiClass(u);
|
---|
568 | if isolateNesting = 0 then
|
---|
569 | begin
|
---|
570 | if curBidi in[ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then
|
---|
571 | exit(curBidi);
|
---|
572 | end;
|
---|
573 | case u of
|
---|
574 | UNICODE_FIRST_STRONG_ISOLATE, UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE: inc(isolateNesting);
|
---|
575 | end;
|
---|
576 | if curBidi = ubcParagraphSeparator then isolateNesting:= 0;
|
---|
577 | inc(p,charLen);
|
---|
578 | end;
|
---|
579 | exit(ubcUnknown);
|
---|
580 | end;
|
---|
581 |
|
---|
582 | function GetLastStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
|
---|
583 | var
|
---|
584 | p,pEnd: PChar;
|
---|
585 | charLen: Integer;
|
---|
586 | u: Cardinal;
|
---|
587 | curBidi: TUnicodeBidiClass;
|
---|
588 | isolateNesting: integer;
|
---|
589 | begin
|
---|
590 | if sUTF8 = '' then exit(ubcUnknown);
|
---|
591 | p := @sUTF8[1];
|
---|
592 | pEnd := p + length(sUTF8);
|
---|
593 | isolateNesting:= 0;
|
---|
594 | result := ubcUnknown;
|
---|
595 | while p < pEnd do
|
---|
596 | begin
|
---|
597 | charLen := UTF8CharacterLength(p);
|
---|
598 | if (charLen = 0) or (p+charLen > pEnd) then break;
|
---|
599 | u := UTF8CodepointToUnicode(p, charLen);
|
---|
600 | case u of
|
---|
601 | UNICODE_POP_DIRECTIONAL_ISOLATE: if isolateNesting > 0 then dec(isolateNesting);
|
---|
602 | end;
|
---|
603 | curBidi := GetUnicodeBidiClass(u);
|
---|
604 | if isolateNesting = 0 then
|
---|
605 | begin
|
---|
606 | if curBidi in[ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then
|
---|
607 | result := curBidi;
|
---|
608 | end;
|
---|
609 | case u of
|
---|
610 | UNICODE_FIRST_STRONG_ISOLATE, UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE: inc(isolateNesting);
|
---|
611 | end;
|
---|
612 | if curBidi = ubcParagraphSeparator then isolateNesting:= 0;
|
---|
613 | inc(p,charLen);
|
---|
614 | end;
|
---|
615 | end;
|
---|
616 |
|
---|
617 | function IsRightToLeftUTF8(const sUTF8: string): boolean;
|
---|
618 | begin
|
---|
619 | result := GetFirstStrongBidiClassUTF8(sUTF8) in[ubcRightToLeft,ubcArabicLetter];
|
---|
620 | end;
|
---|
621 |
|
---|
622 | function IsZeroWidthUTF8(const sUTF8: string): boolean;
|
---|
623 | var
|
---|
624 | p,pEnd: PChar;
|
---|
625 | charLen: Integer;
|
---|
626 | u: Cardinal;
|
---|
627 | begin
|
---|
628 | if sUTF8 = '' then exit(true);
|
---|
629 | p := @sUTF8[1];
|
---|
630 | pEnd := p + length(sUTF8);
|
---|
631 | while p < pEnd do
|
---|
632 | begin
|
---|
633 | charLen := UTF8CharacterLength(p);
|
---|
634 | if (charLen = 0) or (p+charLen > pEnd) then break;
|
---|
635 | u := UTF8CodepointToUnicode(p, charLen);
|
---|
636 | if not IsZeroWidthUnicode(u) then exit(false);
|
---|
637 | inc(p,charLen);
|
---|
638 | end;
|
---|
639 | exit(true);
|
---|
640 | end;
|
---|
641 |
|
---|
642 | function AddParagraphBidiUTF8(s: string; ARightToLeft: boolean): string;
|
---|
643 | var
|
---|
644 | i,curParaStart: Integer;
|
---|
645 |
|
---|
646 | procedure CheckParagraph;
|
---|
647 | var
|
---|
648 | para,newPara: string;
|
---|
649 | paraRTL: boolean;
|
---|
650 | begin
|
---|
651 | if i > curParaStart then
|
---|
652 | begin
|
---|
653 | para := copy(s,curParaStart,i-curParaStart);
|
---|
654 | paraRTL := GetFirstStrongBidiClassUTF8(para) in[ubcRightToLeft,ubcArabicLetter];
|
---|
655 | //detected paragraph does not match overall RTL option
|
---|
656 | if paraRTL <> ARightToLeft then
|
---|
657 | begin
|
---|
658 | if not paraRTL then
|
---|
659 | newPara := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_MARK)+para+UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_MARK)
|
---|
660 | else
|
---|
661 | newPara := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_MARK)+para+UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_MARK);
|
---|
662 | inc(i, length(newPara)-length(para));
|
---|
663 | delete(s, curParaStart, length(para));
|
---|
664 | insert(newPara, s, curParaStart);
|
---|
665 | end;
|
---|
666 | end;
|
---|
667 | end;
|
---|
668 |
|
---|
669 | var
|
---|
670 | charLen: integer;
|
---|
671 | u: Cardinal;
|
---|
672 |
|
---|
673 | begin
|
---|
674 | i := 1;
|
---|
675 | curParaStart := 1;
|
---|
676 | while i <= length(s) do
|
---|
677 | begin
|
---|
678 | charLen := UTF8CharacterLength(@s[i]);
|
---|
679 | u := UTF8CodepointToUnicode(@s[i], charLen);
|
---|
680 | if IsUnicodeParagraphSeparator(u) then
|
---|
681 | begin
|
---|
682 | CheckParagraph;
|
---|
683 | //skip end of line
|
---|
684 | inc(i);
|
---|
685 | //skip second CRLF
|
---|
686 | if ((u = 10) or (u = 13)) and (i <= length(s)) and (s[i] in[#13,#10]) and (s[i]<>s[i-1]) then inc(i);
|
---|
687 | curParaStart := i;
|
---|
688 | end else
|
---|
689 | inc(i);
|
---|
690 | end;
|
---|
691 | CheckParagraph;
|
---|
692 | result := s;
|
---|
693 | end;
|
---|
694 |
|
---|
695 | type
|
---|
696 | TUnicodeArray = packed array of cardinal;
|
---|
697 | TIntegerArray = array of integer;
|
---|
698 |
|
---|
699 | procedure UTF8ToUnicode(const sUTF8: string; out u: TUnicodeArray; out ofs: TIntegerArray);
|
---|
700 | var
|
---|
701 | index,len,charLen: integer;
|
---|
702 | p,pStart,pEnd: PChar;
|
---|
703 | begin
|
---|
704 | if sUTF8 = '' then
|
---|
705 | begin
|
---|
706 | u := nil;
|
---|
707 | ofs := nil;
|
---|
708 | end
|
---|
709 | else
|
---|
710 | begin
|
---|
711 | pStart := @sUTF8[1];
|
---|
712 | pEnd := pStart + length(sUTF8);
|
---|
713 | p := pStart;
|
---|
714 | len := 0;
|
---|
715 | while p < pEnd do
|
---|
716 | begin
|
---|
717 | charLen := UTF8CharacterLength(p);
|
---|
718 | inc(len);
|
---|
719 | inc(p,charLen);
|
---|
720 | end;
|
---|
721 |
|
---|
722 | setlength(u, len);
|
---|
723 | setlength(ofs, len);
|
---|
724 | p := pStart;
|
---|
725 | index := 0;
|
---|
726 | while p < pEnd do
|
---|
727 | begin
|
---|
728 | charLen := UTF8CharacterLength(p);
|
---|
729 | u[index] := UTF8CodepointToUnicode(p, charLen);
|
---|
730 | ofs[index] := p - pStart;
|
---|
731 | inc(index);
|
---|
732 | inc(p,charLen);
|
---|
733 | end;
|
---|
734 | end;
|
---|
735 | end;
|
---|
736 |
|
---|
737 | function AnalyzeBidiUTF8(const sUTF8: string; ABaseDirection: cardinal): TBidiUTF8Array;
|
---|
738 | var
|
---|
739 | u: TUnicodeArray;
|
---|
740 | ofs: TIntegerArray;
|
---|
741 | a: TUnicodeBidiArray;
|
---|
742 | i: Integer;
|
---|
743 | begin
|
---|
744 | if sUTF8 = '' then
|
---|
745 | result := nil
|
---|
746 | else
|
---|
747 | begin
|
---|
748 | UTF8ToUnicode(sUTF8, u, ofs);
|
---|
749 | a := AnalyzeBidiUnicode(@u[0], length(u), ABaseDirection);
|
---|
750 | setlength(result, length(u));
|
---|
751 | for i := 0 to high(result) do
|
---|
752 | begin
|
---|
753 | result[i].Offset:= ofs[i];
|
---|
754 | result[i].BidiInfo := a[i];
|
---|
755 | end;
|
---|
756 | end;
|
---|
757 | end;
|
---|
758 |
|
---|
759 | function AnalyzeBidiUTF8(const sUTF8: string; ARightToLeft: boolean): TBidiUTF8Array;
|
---|
760 | begin
|
---|
761 | if ARightToLeft then
|
---|
762 | result := AnalyzeBidiUTF8(sUTF8, UNICODE_RIGHT_TO_LEFT_ISOLATE)
|
---|
763 | else
|
---|
764 | result := AnalyzeBidiUTF8(sUTF8, UNICODE_LEFT_TO_RIGHT_ISOLATE);
|
---|
765 | end;
|
---|
766 |
|
---|
767 | function AnalyzeBidiUTF8(const sUTF8: string): TBidiUTF8Array;
|
---|
768 | begin
|
---|
769 | result := AnalyzeBidiUTF8(sUTF8, UNICODE_FIRST_STRONG_ISOLATE)
|
---|
770 | end;
|
---|
771 |
|
---|
772 | function GetUTF8DisplayOrder(const ABidi: TBidiUTF8Array): TUnicodeDisplayOrder;
|
---|
773 | begin
|
---|
774 | if length(ABidi) = 0 then
|
---|
775 | result := nil
|
---|
776 | else
|
---|
777 | result := GetUnicodeDisplayOrder(@ABidi[0].BidiInfo, sizeof(TBidiUTF8Info), length(ABidi));
|
---|
778 | end;
|
---|
779 |
|
---|
780 | function ContainsBidiIsolateOrFormattingUTF8(const sUTF8: string): boolean;
|
---|
781 | var
|
---|
782 | p,pEnd: PChar;
|
---|
783 | charLen: Integer;
|
---|
784 | u: Cardinal;
|
---|
785 | begin
|
---|
786 | if sUTF8 = '' then exit(false);
|
---|
787 | p := @sUTF8[1];
|
---|
788 | pEnd := p + length(sUTF8);
|
---|
789 | while p < pEnd do
|
---|
790 | begin
|
---|
791 | charLen := UTF8CharacterLength(p);
|
---|
792 | if (charLen = 0) or (p+charLen > pEnd) then break;
|
---|
793 | u := UTF8CodepointToUnicode(p, charLen);
|
---|
794 | case u of
|
---|
795 | UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE,
|
---|
796 | UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
|
---|
797 | UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(true);
|
---|
798 | end;
|
---|
799 | inc(p,charLen);
|
---|
800 | end;
|
---|
801 | exit(false);
|
---|
802 | end;
|
---|
803 |
|
---|
804 | function UTF8OverrideDirection(const sUTF8: string; ARightToLeft: boolean): string;
|
---|
805 | begin
|
---|
806 | if ARightToLeft then
|
---|
807 | result := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_OVERRIDE) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING)
|
---|
808 | else
|
---|
809 | result := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_OVERRIDE) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING);
|
---|
810 | end;
|
---|
811 |
|
---|
812 | function UTF8EmbedDirection(const sUTF8: string; ARightToLeft: boolean): string;
|
---|
813 | begin
|
---|
814 | if ARightToLeft then
|
---|
815 | result := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_EMBEDDING) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING)
|
---|
816 | else
|
---|
817 | result := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_EMBEDDING) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING);
|
---|
818 | end;
|
---|
819 |
|
---|
820 | //little endian stream functions
|
---|
821 | function LEReadInt64(Stream: TStream): int64;
|
---|
822 | begin
|
---|
823 | Result := 0;
|
---|
824 | stream.Read(Result, sizeof(Result));
|
---|
825 | Result := LEtoN(Result);
|
---|
826 | end;
|
---|
827 |
|
---|
828 | procedure LEWriteInt64(Stream: TStream; AValue: int64);
|
---|
829 | begin
|
---|
830 | AValue := NtoLE(AValue);
|
---|
831 | stream.Write(AValue, sizeof(AValue));
|
---|
832 | end;
|
---|
833 |
|
---|
834 | function LEReadLongint(Stream: TStream): longint;
|
---|
835 | begin
|
---|
836 | Result := 0;
|
---|
837 | stream.Read(Result, sizeof(Result));
|
---|
838 | Result := LEtoN(Result);
|
---|
839 | end;
|
---|
840 |
|
---|
841 | procedure LEWriteLongint(Stream: TStream; AValue: LongInt);
|
---|
842 | begin
|
---|
843 | AValue := NtoLE(AValue);
|
---|
844 | stream.Write(AValue, sizeof(AValue));
|
---|
845 | end;
|
---|
846 |
|
---|
847 | function LEReadByte(Stream: TStream): byte;
|
---|
848 | begin
|
---|
849 | Result := 0;
|
---|
850 | stream.Read(Result, sizeof(Result));
|
---|
851 | end;
|
---|
852 |
|
---|
853 | procedure LEWriteByte(Stream: TStream; AValue: Byte);
|
---|
854 | begin
|
---|
855 | stream.Write(AValue, sizeof(AValue));
|
---|
856 | end;
|
---|
857 |
|
---|
858 | function LEReadSingle(Stream: TStream): single;
|
---|
859 | var
|
---|
860 | ResultAsDWord : longword absolute result;
|
---|
861 | begin
|
---|
862 | ResultAsDWord := 0;
|
---|
863 | stream.Read(ResultAsDWord, sizeof(Result));
|
---|
864 | ResultAsDWord := LEtoN(ResultAsDWord);
|
---|
865 | end;
|
---|
866 |
|
---|
867 | procedure LEWriteSingle(Stream: TStream; AValue: single);
|
---|
868 | var
|
---|
869 | ValueAsDWord : longword absolute AValue;
|
---|
870 | begin
|
---|
871 | ValueAsDWord := NtoLE(ValueAsDWord);
|
---|
872 | stream.Write(ValueAsDWord, sizeof(AValue));
|
---|
873 | end;
|
---|
874 |
|
---|
875 | end.
|
---|
876 |
|
---|