source: trunk/Packages/bgrabitmap/bgrautf8.pas

Last change on this file was 2, checked in by chronos, 5 years ago
File size: 22.8 KB
Line 
1unit BGRAUTF8;
2
3{$mode objfpc}{$H+}
4{$i bgrabitmap.inc}
5
6interface
7
8uses
9 Classes, SysUtils, BGRAUnicode{$IFDEF BGRABITMAP_USE_LCL}, lazutf8classes{$ENDIF};
10
11{$IFDEF BGRABITMAP_USE_LCL}
12type
13 TFileStreamUTF8 = lazutf8classes.TFileStreamUTF8;
14 TStringListUTF8 = lazutf8classes.TStringListUTF8;
15{$ELSE}
16type
17 TFileStreamUTF8 = class(THandleStream)
18 private
19 FFileName: utf8string;
20 public
21 constructor Create(const AFileName: utf8string; Mode: Word); overload;
22 constructor Create(const AFileName: utf8string; Mode: Word; Rights: Cardinal); overload;
23 destructor Destroy; override;
24 property FileName: utf8string Read FFilename;
25 end;
26
27 TStringListUTF8 = class(TStringList)
28 protected
29 function DoCompareText(const s1,s2 : string) : PtrInt; override;
30 public
31 procedure LoadFromFile(const FileName: string); override;
32 procedure SaveToFile(const FileName: string); override;
33 end;
34{$ENDIF}
35
36procedure LoadStringsFromFileUTF8(List: TStrings; const FileName: string);
37procedure SaveStringsToFileUTF8(List: TStrings; const FileName: string);
38
39function UTF8ToSys(const s: string): string;
40function SysToUTF8(const s: string): string;
41
42function UTF8LowerCase(const s: string): string;
43function UTF8UpperCase(const s: string): string;
44
45function UTF8CompareStr(const S1, S2: string): Integer;
46function UTF8CompareText(const S1, S2: string): Integer;
47
48function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
49
50function FileOpenUTF8(Const FileName : string; Mode : Integer) : THandle;
51function FileCreateUTF8(Const FileName : string) : THandle; overload;
52function FileCreateUTF8(Const FileName : string; Rights: Cardinal) : THandle; overload;
53function FileExistsUTF8(Const FileName : string): boolean;
54function FindFirstUTF8(const Path: string; Attr: Longint; out Rslt: TSearchRec): Longint;
55function FindNextUTF8(var Rslt: TSearchRec): Longint;
56procedure FindCloseUTF8(var F: TSearchrec);
57
58type
59 string4 = string[4];
60
61function UTF8CharacterLength(p: PChar): integer;
62function UTF8Length(const s: string): PtrInt; overload;
63function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; overload;
64function UnicodeCharToUTF8(u: cardinal): string4;
65function UTF8ReverseString(const s: string): string;
66function UTF8CodepointToUnicode(p: PChar; ACodePointLen: integer): cardinal;
67
68type
69 TBidiUTF8Info = packed record
70 Offset: Integer;
71 BidiInfo: TUnicodeBidiInfo;
72 end;
73 TBidiUTF8Array = packed array of TBidiUTF8Info;
74 TUnicodeDisplayOrder = BGRAUnicode.TUnicodeDisplayOrder;
75
76function GetBidiClassUTF8(P: PChar): TUnicodeBidiClass;
77function GetFirstStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
78function GetLastStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
79function IsRightToLeftUTF8(const sUTF8: string): boolean;
80function IsZeroWidthUTF8(const sUTF8: string): boolean;
81function AddParagraphBidiUTF8(s: string; ARightToLeft: boolean): string;
82function AnalyzeBidiUTF8(const sUTF8: string; ARightToLeft: boolean): TBidiUTF8Array; overload;
83function AnalyzeBidiUTF8(const sUTF8: string): TBidiUTF8Array; overload;
84function GetUTF8DisplayOrder(const ABidi: TBidiUTF8Array): TUnicodeDisplayOrder;
85function ContainsBidiIsolateOrFormattingUTF8(const sUTF8: string): boolean;
86
87function UTF8OverrideDirection(const sUTF8: string; ARightToLeft: boolean): string;
88function UTF8EmbedDirection(const sUTF8: string; ARightToLeft: boolean): string;
89
90//little endian stream functions
91function LEReadInt64(Stream: TStream): int64;
92procedure LEWriteInt64(Stream: TStream; AValue: int64);
93function LEReadLongint(Stream: TStream): longint;
94procedure LEWriteLongint(Stream: TStream; AValue: LongInt);
95function LEReadByte(Stream: TStream): byte;
96procedure LEWriteByte(Stream: TStream; AValue: Byte);
97function LEReadSingle(Stream: TStream): single;
98procedure LEWriteSingle(Stream: TStream; AValue: single);
99
100implementation
101
102{$IFDEF BGRABITMAP_USE_LCL}
103uses LazFileUtils, LazUtf8;
104
105procedure LoadStringsFromFileUTF8(List: TStrings; const FileName: string);
106begin
107 lazutf8classes.LoadStringsFromFileUTF8(List,FileName);
108end;
109
110procedure SaveStringsToFileUTF8(List: TStrings; const FileName: string);
111begin
112 lazutf8classes.SaveStringsToFileUTF8(List,FileName);
113end;
114
115function UTF8ToSys(const s: string): string;
116begin
117 result := LazUtf8.UTF8ToSys(s);
118end;
119
120function SysToUTF8(const s: string): string;
121begin
122 result := LazUtf8.SysToUTF8(s);
123end;
124
125function UTF8LowerCase(const s: string): string;
126begin
127 result := LazUtf8.UTF8LowerCase(s);
128end;
129
130function UTF8UpperCase(const s: string): string;
131begin
132 result := LazUtf8.UTF8UpperCase(s);
133end;
134
135function UTF8CompareStr(const S1, S2: string): Integer;
136begin
137 result := LazUtf8.UTF8CompareStr(S1,S2);
138end;
139
140function UTF8CompareText(const S1, S2: string): Integer;
141begin
142 result := LazUtf8.UTF8CompareText(S1,S2);
143end;
144
145function FileOpenUTF8(Const FileName : string; Mode : Integer) : THandle;
146begin
147 result := LazFileUtils.FileOpenUTF8(FileName, Mode);
148end;
149
150function FileCreateUTF8(Const FileName : string) : THandle; overload;
151begin
152 result := LazFileUtils.FileCreateUTF8(FileName);
153end;
154
155function FileCreateUTF8(Const FileName : string; Rights: Cardinal) : THandle; overload;
156begin
157 result := LazFileUtils.FileCreateUTF8(FileName, Rights);
158end;
159
160function FileExistsUTF8(Const FileName : string): boolean;
161begin
162 result := LazFileUtils.FileExistsUTF8(FileName);
163end;
164
165function FindFirstUTF8(const Path: string; Attr: Longint; out Rslt: TSearchRec
166 ): Longint;
167begin
168 result := LazFileUtils.FindFirstUTF8(Path,Attr,Rslt);
169end;
170
171function FindNextUTF8(var Rslt: TSearchRec): Longint;
172begin
173 result := LazFileUtils.FindNextUTF8(Rslt);
174end;
175
176procedure FindCloseUTF8(var F: TSearchrec);
177begin
178 LazFileUtils.FindCloseUTF8(F);
179end;
180
181function UTF8CharacterLength(p: PChar): integer;
182begin
183 result := LazUtf8.UTF8CharacterLength(p);
184end;
185
186function UTF8Length(const s: string): PtrInt;
187begin
188 result := LazUtf8.UTF8Length(s);
189end;
190
191function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
192begin
193 result := LazUtf8.UTF8Length(p, ByteCount);
194end;
195
196function UnicodeCharToUTF8(u: cardinal): string4;
197begin
198 result := LazUtf8.UnicodeToUTF8(u);
199end;
200
201{$ELSE}
202
203procedure LoadStringsFromFileUTF8(List: TStrings; const FileName: string);
204var
205 uList: TStringListUTF8;
206begin
207 if List is TStringListUTF8 then
208 begin
209 List.LoadFromFile(FileName);
210 exit;
211 end;
212 uList:=TStringListUTF8.Create;
213 try
214 uList.LoadFromFile(FileName);
215 List.Assign(uList);
216 finally
217 uList.Free;
218 end;
219end;
220
221procedure SaveStringsToFileUTF8(List: TStrings; const FileName: string);
222var
223 uList: TStringListUTF8;
224begin
225 if List is TStringListUTF8 then
226 begin
227 List.SaveToFile(FileName);
228 exit;
229 end;
230 uList:=TStringListUTF8.Create;
231 try
232 uList.Assign(List);
233 uList.SaveToFile(FileName);
234 finally
235 uList.Free;
236 end;
237end;
238
239function UTF8LowerCase(const s: string): string;
240begin
241 result := UTF8Encode(UnicodeLowerCase(UTF8Decode(s)));
242end;
243
244function UTF8UpperCase(const s: string): string;
245begin
246 result := UTF8Encode(UnicodeUpperCase(UTF8Decode(s)));
247end;
248
249function UTF8CompareStr(const S1, S2: string): Integer;
250begin
251 Result := SysUtils.CompareStr(S1, S2);
252end;
253
254function UTF8CompareText(const S1, S2: string): Integer;
255begin
256 Result := UnicodeCompareText(UTF8Decode(S1), UTF8Decode(S2));
257end;
258
259function FileOpenUTF8(const FileName: string; Mode: Integer): THandle;
260begin
261 result := FileOpen(UTF8ToSys(FileName),Mode);
262end;
263
264function FileCreateUTF8(const FileName: string): THandle;
265begin
266 result := FileCreate(UTF8ToSys(FileName));
267end;
268
269function FileCreateUTF8(const FileName: string; Rights: Cardinal): THandle;
270begin
271 result := FileCreate(UTF8ToSys(FileName),Rights);
272end;
273
274function FileExistsUTF8(const FileName: string): boolean;
275begin
276 result := FileExists(UTF8ToSys(FileName));
277end;
278
279function FindFirstUTF8(const Path: string; Attr: Longint; out Rslt: TSearchRec
280 ): Longint;
281begin
282 result := FindFirst(UTF8ToSys(Path),Attr,Rslt);
283 Rslt.Name := SysToUTF8(Rslt.Name);
284end;
285
286function FindNextUTF8(var Rslt: TSearchRec): Longint;
287begin
288 result := FindNext(Rslt);
289 if result = 0 then
290 Rslt.Name := SysToUTF8(Rslt.Name);
291end;
292
293procedure FindCloseUTF8(var F: TSearchrec);
294begin
295 FindClose(F);
296end;
297
298function UTF8ToSys(const s: string): string;
299begin
300 result := Utf8ToAnsi(s);
301end;
302
303function SysToUTF8(const s: string): string;
304begin
305 result := AnsiToUtf8(s);
306end;
307
308function UTF8CharacterLength(p: PChar): integer;
309begin
310 if p<>nil then begin
311 if ord(p^)<%11000000 then begin
312 // regular single byte character (#0 is a character, this is pascal ;)
313 Result:=1;
314 end
315 else begin
316 // multi byte
317 if ((ord(p^) and %11100000) = %11000000) then begin
318 // could be 2 byte character
319 if (ord(p[1]) and %11000000) = %10000000 then
320 Result:=2
321 else
322 Result:=1;
323 end
324 else if ((ord(p^) and %11110000) = %11100000) then begin
325 // could be 3 byte character
326 if ((ord(p[1]) and %11000000) = %10000000)
327 and ((ord(p[2]) and %11000000) = %10000000) then
328 Result:=3
329 else
330 Result:=1;
331 end
332 else if ((ord(p^) and %11111000) = %11110000) then begin
333 // could be 4 byte character
334 if ((ord(p[1]) and %11000000) = %10000000)
335 and ((ord(p[2]) and %11000000) = %10000000)
336 and ((ord(p[3]) and %11000000) = %10000000) then
337 Result:=4
338 else
339 Result:=1;
340 end
341 else
342 Result:=1;
343 end;
344 end else
345 Result:=0;
346end;
347
348function UTF8Length(const s: string): PtrInt;
349begin
350 Result:=UTF8Length(PChar(s),length(s));
351end;
352
353function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
354var
355 CharLen: LongInt;
356begin
357 Result:=0;
358 while (ByteCount>0) do begin
359 inc(Result);
360 CharLen:=UTF8CharacterLength(p);
361 inc(p,CharLen);
362 dec(ByteCount,CharLen);
363 end;
364end;
365
366function UnicodeToUTF8Inline(CodePoint: cardinal; Buf: PChar): integer;
367begin
368 case CodePoint of
369 0..$7f:
370 begin
371 Result:=1;
372 Buf[0]:=char(byte(CodePoint));
373 end;
374 $80..$7ff:
375 begin
376 Result:=2;
377 Buf[0]:=char(byte($c0 or (CodePoint shr 6)));
378 Buf[1]:=char(byte($80 or (CodePoint and $3f)));
379 end;
380 $800..$ffff:
381 begin
382 Result:=3;
383 Buf[0]:=char(byte($e0 or (CodePoint shr 12)));
384 Buf[1]:=char(byte((CodePoint shr 6) and $3f) or $80);
385 Buf[2]:=char(byte(CodePoint and $3f) or $80);
386 end;
387 $10000..$10ffff:
388 begin
389 Result:=4;
390 Buf[0]:=char(byte($f0 or (CodePoint shr 18)));
391 Buf[1]:=char(byte((CodePoint shr 12) and $3f) or $80);
392 Buf[2]:=char(byte((CodePoint shr 6) and $3f) or $80);
393 Buf[3]:=char(byte(CodePoint and $3f) or $80);
394 end;
395 else
396 Result:=0;
397 end;
398end;
399
400function UnicodeCharToUTF8(u: cardinal): string4;
401begin
402 result[0] := chr(UnicodeToUTF8Inline(u,@result[1]));
403end;
404
405constructor TFileStreamUTF8.Create(const AFileName: utf8string; Mode: Word);
406var
407 lHandle: THandle;
408begin
409 FFileName:= AFileName;
410 if Mode = fmcreate then
411 lHandle:= FileCreateUTF8(AFileName)
412 else
413 lHandle:= FileOpenUTF8(AFileName, Mode);
414
415 If (THandle(lHandle)=feInvalidHandle) then
416 begin
417 if Mode = fmCreate then
418 raise EFCreateError.createfmt({SFCreateError}'Unable to create file "%s"', [AFileName])
419 else
420 raise EFOpenError.Createfmt({SFOpenError}'Unable to open file "%s"', [AFilename]);
421 end
422 else
423 inherited Create(lHandle);
424end;
425
426constructor TFileStreamUTF8.Create(const AFileName: utf8string; Mode: Word; Rights: Cardinal);
427var
428 lHandle: THandle;
429begin
430 FFileName:=AFileName;
431 if Mode=fmcreate then
432 lHandle:=FileCreateUTF8(AFileName,Rights)
433 else
434 lHandle:=FileOpenUTF8(AFileName,Mode);
435
436 if (THandle(lHandle)=feInvalidHandle) then
437 begin
438 if Mode=fmcreate then
439 raise EFCreateError.createfmt({SFCreateError}'Unable to create file "%s"',[AFileName])
440 else
441 raise EFOpenError.Createfmt({SFOpenError}'Unable to open file "%s"',[AFilename]);
442 end
443 else
444 inherited Create(lHandle);
445end;
446
447destructor TFileStreamUTF8.Destroy;
448begin
449 FileClose(Handle);
450end;
451
452function TStringListUTF8.DoCompareText(const s1, s2: string): PtrInt;
453begin
454 if CaseSensitive then
455 Result:= UTF8CompareStr(s1,s2)
456 else
457 Result:= UTF8CompareText(s1,s2);
458end;
459
460procedure TStringListUTF8.LoadFromFile(const FileName: string);
461var
462 TheStream: TFileStreamUTF8;
463begin
464 TheStream:= TFileStreamUTF8.Create(FileName, fmOpenRead or fmShareDenyWrite);
465 try
466 LoadFromStream(TheStream);
467 finally
468 TheStream.Free;
469 end;
470end;
471
472procedure TStringListUTF8.SaveToFile(const FileName: string);
473var
474 TheStream: TFileStreamUTF8;
475begin
476 TheStream:=TFileStreamUTF8.Create(FileName,fmCreate);
477 try
478 SaveToStream(TheStream);
479 finally
480 TheStream.Free;
481 end;
482end;
483
484{$ENDIF}
485
486function UTF8ReverseString(const s: string): string;
487var
488 pSrc,pDest,pEnd: PChar;
489 charLen: Integer;
490begin
491 if s = '' then
492 begin
493 result := '';
494 exit;
495 end;
496 setlength(result, length(s));
497 pDest := @result[1] + length(result);
498 pSrc := @s[1];
499 pEnd := pSrc+length(s);
500 while pSrc < pEnd do
501 begin
502 charLen := UTF8CharacterLength(pSrc);
503 if (charLen = 0) or (pSrc+charLen > pEnd) then break;
504 dec(pDest, charLen);
505 move(pSrc^, pDest^, charLen);
506 inc(pSrc, charLen);
507 end;
508end;
509
510function UTF8CodepointToUnicode(p: PChar; ACodePointLen: integer): cardinal;
511begin
512 case ACodePointLen of
513 0: result := 0;
514 1: result := ord(p^);
515 2: result := ((ord(p^) and %00011111) shl 6) or (ord(p[1]) and %00111111);
516 3: result := ((ord(p^) and %00011111) shl 12) or ((ord(p[1]) and %00111111) shl 6)
517 or (ord(p[2]) and %00111111);
518 4: result := ((ord(p^) and %00001111) shl 18) or ((ord(p[1]) and %00111111) shl 12)
519 or ((ord(p[2]) and %00111111) shl 6) or (ord(p[3]) and %00111111);
520 else
521 raise exception.Create('Invalid code point length');
522 end;
523end;
524
525function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
526var
527 CharLen: LongInt;
528begin
529 Result:=UTF8Str;
530 if Result<>nil then begin
531 while (CharIndex>0) and (Len>0) do begin
532 CharLen:=UTF8CharacterLength(Result);
533 dec(Len,CharLen);
534 dec(CharIndex);
535 inc(Result,CharLen);
536 end;
537 if (CharIndex<>0) or (Len<0) then
538 Result:=nil;
539 end;
540end;
541
542function GetBidiClassUTF8(P: PChar): TUnicodeBidiClass;
543begin
544 result := GetUnicodeBidiClass(UTF8CodepointToUnicode(P, UTF8CharacterLength(p)));
545end;
546
547function GetFirstStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
548var
549 p,pEnd: PChar;
550 charLen: Integer;
551 u: Cardinal;
552 curBidi: TUnicodeBidiClass;
553 isolateNesting: integer;
554begin
555 if sUTF8 = '' then exit(ubcUnknown);
556 p := @sUTF8[1];
557 pEnd := p + length(sUTF8);
558 isolateNesting:= 0;
559 while p < pEnd do
560 begin
561 charLen := UTF8CharacterLength(p);
562 if (charLen = 0) or (p+charLen > pEnd) then break;
563 u := UTF8CodepointToUnicode(p, charLen);
564 case u of
565 UNICODE_POP_DIRECTIONAL_ISOLATE: if isolateNesting > 0 then dec(isolateNesting);
566 end;
567 curBidi := GetUnicodeBidiClass(u);
568 if isolateNesting = 0 then
569 begin
570 if curBidi in[ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then
571 exit(curBidi);
572 end;
573 case u of
574 UNICODE_FIRST_STRONG_ISOLATE, UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE: inc(isolateNesting);
575 end;
576 if curBidi = ubcParagraphSeparator then isolateNesting:= 0;
577 inc(p,charLen);
578 end;
579 exit(ubcUnknown);
580end;
581
582function GetLastStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass;
583var
584 p,pEnd: PChar;
585 charLen: Integer;
586 u: Cardinal;
587 curBidi: TUnicodeBidiClass;
588 isolateNesting: integer;
589begin
590 if sUTF8 = '' then exit(ubcUnknown);
591 p := @sUTF8[1];
592 pEnd := p + length(sUTF8);
593 isolateNesting:= 0;
594 result := ubcUnknown;
595 while p < pEnd do
596 begin
597 charLen := UTF8CharacterLength(p);
598 if (charLen = 0) or (p+charLen > pEnd) then break;
599 u := UTF8CodepointToUnicode(p, charLen);
600 case u of
601 UNICODE_POP_DIRECTIONAL_ISOLATE: if isolateNesting > 0 then dec(isolateNesting);
602 end;
603 curBidi := GetUnicodeBidiClass(u);
604 if isolateNesting = 0 then
605 begin
606 if curBidi in[ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then
607 result := curBidi;
608 end;
609 case u of
610 UNICODE_FIRST_STRONG_ISOLATE, UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE: inc(isolateNesting);
611 end;
612 if curBidi = ubcParagraphSeparator then isolateNesting:= 0;
613 inc(p,charLen);
614 end;
615end;
616
617function IsRightToLeftUTF8(const sUTF8: string): boolean;
618begin
619 result := GetFirstStrongBidiClassUTF8(sUTF8) in[ubcRightToLeft,ubcArabicLetter];
620end;
621
622function IsZeroWidthUTF8(const sUTF8: string): boolean;
623var
624 p,pEnd: PChar;
625 charLen: Integer;
626 u: Cardinal;
627begin
628 if sUTF8 = '' then exit(true);
629 p := @sUTF8[1];
630 pEnd := p + length(sUTF8);
631 while p < pEnd do
632 begin
633 charLen := UTF8CharacterLength(p);
634 if (charLen = 0) or (p+charLen > pEnd) then break;
635 u := UTF8CodepointToUnicode(p, charLen);
636 if not IsZeroWidthUnicode(u) then exit(false);
637 inc(p,charLen);
638 end;
639 exit(true);
640end;
641
642function AddParagraphBidiUTF8(s: string; ARightToLeft: boolean): string;
643var
644 i,curParaStart: Integer;
645
646 procedure CheckParagraph;
647 var
648 para,newPara: string;
649 paraRTL: boolean;
650 begin
651 if i > curParaStart then
652 begin
653 para := copy(s,curParaStart,i-curParaStart);
654 paraRTL := GetFirstStrongBidiClassUTF8(para) in[ubcRightToLeft,ubcArabicLetter];
655 //detected paragraph does not match overall RTL option
656 if paraRTL <> ARightToLeft then
657 begin
658 if not paraRTL then
659 newPara := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_MARK)+para+UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_MARK)
660 else
661 newPara := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_MARK)+para+UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_MARK);
662 inc(i, length(newPara)-length(para));
663 delete(s, curParaStart, length(para));
664 insert(newPara, s, curParaStart);
665 end;
666 end;
667 end;
668
669var
670 charLen: integer;
671 u: Cardinal;
672
673begin
674 i := 1;
675 curParaStart := 1;
676 while i <= length(s) do
677 begin
678 charLen := UTF8CharacterLength(@s[i]);
679 u := UTF8CodepointToUnicode(@s[i], charLen);
680 if IsUnicodeParagraphSeparator(u) then
681 begin
682 CheckParagraph;
683 //skip end of line
684 inc(i);
685 //skip second CRLF
686 if ((u = 10) or (u = 13)) and (i <= length(s)) and (s[i] in[#13,#10]) and (s[i]<>s[i-1]) then inc(i);
687 curParaStart := i;
688 end else
689 inc(i);
690 end;
691 CheckParagraph;
692 result := s;
693end;
694
695type
696 TUnicodeArray = packed array of cardinal;
697 TIntegerArray = array of integer;
698
699procedure UTF8ToUnicode(const sUTF8: string; out u: TUnicodeArray; out ofs: TIntegerArray);
700var
701 index,len,charLen: integer;
702 p,pStart,pEnd: PChar;
703begin
704 if sUTF8 = '' then
705 begin
706 u := nil;
707 ofs := nil;
708 end
709 else
710 begin
711 pStart := @sUTF8[1];
712 pEnd := pStart + length(sUTF8);
713 p := pStart;
714 len := 0;
715 while p < pEnd do
716 begin
717 charLen := UTF8CharacterLength(p);
718 inc(len);
719 inc(p,charLen);
720 end;
721
722 setlength(u, len);
723 setlength(ofs, len);
724 p := pStart;
725 index := 0;
726 while p < pEnd do
727 begin
728 charLen := UTF8CharacterLength(p);
729 u[index] := UTF8CodepointToUnicode(p, charLen);
730 ofs[index] := p - pStart;
731 inc(index);
732 inc(p,charLen);
733 end;
734 end;
735end;
736
737function AnalyzeBidiUTF8(const sUTF8: string; ABaseDirection: cardinal): TBidiUTF8Array;
738var
739 u: TUnicodeArray;
740 ofs: TIntegerArray;
741 a: TUnicodeBidiArray;
742 i: Integer;
743begin
744 if sUTF8 = '' then
745 result := nil
746 else
747 begin
748 UTF8ToUnicode(sUTF8, u, ofs);
749 a := AnalyzeBidiUnicode(@u[0], length(u), ABaseDirection);
750 setlength(result, length(u));
751 for i := 0 to high(result) do
752 begin
753 result[i].Offset:= ofs[i];
754 result[i].BidiInfo := a[i];
755 end;
756 end;
757end;
758
759function AnalyzeBidiUTF8(const sUTF8: string; ARightToLeft: boolean): TBidiUTF8Array;
760begin
761 if ARightToLeft then
762 result := AnalyzeBidiUTF8(sUTF8, UNICODE_RIGHT_TO_LEFT_ISOLATE)
763 else
764 result := AnalyzeBidiUTF8(sUTF8, UNICODE_LEFT_TO_RIGHT_ISOLATE);
765end;
766
767function AnalyzeBidiUTF8(const sUTF8: string): TBidiUTF8Array;
768begin
769 result := AnalyzeBidiUTF8(sUTF8, UNICODE_FIRST_STRONG_ISOLATE)
770end;
771
772function GetUTF8DisplayOrder(const ABidi: TBidiUTF8Array): TUnicodeDisplayOrder;
773begin
774 if length(ABidi) = 0 then
775 result := nil
776 else
777 result := GetUnicodeDisplayOrder(@ABidi[0].BidiInfo, sizeof(TBidiUTF8Info), length(ABidi));
778end;
779
780function ContainsBidiIsolateOrFormattingUTF8(const sUTF8: string): boolean;
781var
782 p,pEnd: PChar;
783 charLen: Integer;
784 u: Cardinal;
785begin
786 if sUTF8 = '' then exit(false);
787 p := @sUTF8[1];
788 pEnd := p + length(sUTF8);
789 while p < pEnd do
790 begin
791 charLen := UTF8CharacterLength(p);
792 if (charLen = 0) or (p+charLen > pEnd) then break;
793 u := UTF8CodepointToUnicode(p, charLen);
794 case u of
795 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE,
796 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
797 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(true);
798 end;
799 inc(p,charLen);
800 end;
801 exit(false);
802end;
803
804function UTF8OverrideDirection(const sUTF8: string; ARightToLeft: boolean): string;
805begin
806 if ARightToLeft then
807 result := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_OVERRIDE) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING)
808 else
809 result := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_OVERRIDE) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING);
810end;
811
812function UTF8EmbedDirection(const sUTF8: string; ARightToLeft: boolean): string;
813begin
814 if ARightToLeft then
815 result := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_EMBEDDING) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING)
816 else
817 result := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_EMBEDDING) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING);
818end;
819
820//little endian stream functions
821function LEReadInt64(Stream: TStream): int64;
822begin
823 Result := 0;
824 stream.Read(Result, sizeof(Result));
825 Result := LEtoN(Result);
826end;
827
828procedure LEWriteInt64(Stream: TStream; AValue: int64);
829begin
830 AValue := NtoLE(AValue);
831 stream.Write(AValue, sizeof(AValue));
832end;
833
834function LEReadLongint(Stream: TStream): longint;
835begin
836 Result := 0;
837 stream.Read(Result, sizeof(Result));
838 Result := LEtoN(Result);
839end;
840
841procedure LEWriteLongint(Stream: TStream; AValue: LongInt);
842begin
843 AValue := NtoLE(AValue);
844 stream.Write(AValue, sizeof(AValue));
845end;
846
847function LEReadByte(Stream: TStream): byte;
848begin
849 Result := 0;
850 stream.Read(Result, sizeof(Result));
851end;
852
853procedure LEWriteByte(Stream: TStream; AValue: Byte);
854begin
855 stream.Write(AValue, sizeof(AValue));
856end;
857
858function LEReadSingle(Stream: TStream): single;
859var
860 ResultAsDWord : longword absolute result;
861begin
862 ResultAsDWord := 0;
863 stream.Read(ResultAsDWord, sizeof(Result));
864 ResultAsDWord := LEtoN(ResultAsDWord);
865end;
866
867procedure LEWriteSingle(Stream: TStream; AValue: single);
868var
869 ValueAsDWord : longword absolute AValue;
870begin
871 ValueAsDWord := NtoLE(ValueAsDWord);
872 stream.Write(ValueAsDWord, sizeof(AValue));
873end;
874
875end.
876
Note: See TracBrowser for help on using the repository browser.