Changeset 521 for GraphicTest/Packages/bgrabitmap/bgrautf8.pas
- Timestamp:
- Apr 17, 2019, 12:58:41 AM (6 years ago)
- Location:
- GraphicTest
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
GraphicTest
- Property svn:ignore
-
old new 8 8 GraphicTest.lps 9 9 GraphicTest.dbg 10 heaptrclog.trc
-
- Property svn:ignore
-
GraphicTest/Packages/bgrabitmap/bgrautf8.pas
r494 r521 7 7 8 8 uses 9 Classes, SysUtils {$IFDEF BGRABITMAP_USE_LCL}, lazutf8classes{$ENDIF};9 Classes, SysUtils, BGRAUnicode{$IFDEF BGRABITMAP_USE_LCL}, lazutf8classes{$ENDIF}; 10 10 11 11 {$IFDEF BGRABITMAP_USE_LCL} … … 19 19 FFileName: utf8string; 20 20 public 21 constructor Create(const AFileName: utf8string; Mode: Word); 22 constructor Create(const AFileName: utf8string; Mode: Word; Rights: Cardinal); 21 constructor Create(const AFileName: utf8string; Mode: Word); overload; 22 constructor Create(const AFileName: utf8string; Mode: Word; Rights: Cardinal); overload; 23 23 destructor Destroy; override; 24 24 property FileName: utf8string Read FFilename; … … 60 60 61 61 function UTF8CharacterLength(p: PChar): integer; 62 function UTF8Length(const s: string): PtrInt; 63 function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; 62 function UTF8Length(const s: string): PtrInt; overload; 63 function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; overload; 64 64 function UnicodeCharToUTF8(u: cardinal): string4; 65 function UTF8ReverseString(const s: string): string; 66 function UTF8CodepointToUnicode(p: PChar; ACodePointLen: integer): cardinal; 67 68 type 69 TBidiUTF8Info = packed record 70 Offset: Integer; 71 BidiInfo: TUnicodeBidiInfo; 72 end; 73 TBidiUTF8Array = packed array of TBidiUTF8Info; 74 TUnicodeDisplayOrder = BGRAUnicode.TUnicodeDisplayOrder; 75 76 function GetBidiClassUTF8(P: PChar): TUnicodeBidiClass; 77 function GetFirstStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass; 78 function GetLastStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass; 79 function IsRightToLeftUTF8(const sUTF8: string): boolean; 80 function IsZeroWidthUTF8(const sUTF8: string): boolean; 81 function AddParagraphBidiUTF8(s: string; ARightToLeft: boolean): string; 82 function AnalyzeBidiUTF8(const sUTF8: string; ARightToLeft: boolean): TBidiUTF8Array; overload; 83 function AnalyzeBidiUTF8(const sUTF8: string): TBidiUTF8Array; overload; 84 function GetUTF8DisplayOrder(const ABidi: TBidiUTF8Array): TUnicodeDisplayOrder; 85 function ContainsBidiIsolateOrFormattingUTF8(const sUTF8: string): boolean; 86 87 function UTF8OverrideDirection(const sUTF8: string; ARightToLeft: boolean): string; 88 function UTF8EmbedDirection(const sUTF8: string; ARightToLeft: boolean): string; 65 89 66 90 //little endian stream functions 91 function LEReadInt64(Stream: TStream): int64; 92 procedure LEWriteInt64(Stream: TStream; AValue: int64); 67 93 function LEReadLongint(Stream: TStream): longint; 68 94 procedure LEWriteLongint(Stream: TStream; AValue: LongInt); … … 172 198 result := LazUtf8.UnicodeToUTF8(u); 173 199 end; 200 174 201 {$ELSE} 175 202 … … 457 484 {$ENDIF} 458 485 459 function LEReadLongint(Stream: TStream): longint; 460 begin 461 Result := 0; 462 stream.Read(Result, sizeof(Result)); 463 Result := LEtoN(Result); 464 end; 465 466 procedure LEWriteLongint(Stream: TStream; AValue: LongInt); 467 begin 468 AValue := NtoLE(AValue); 469 stream.Write(AValue, sizeof(AValue)); 470 end; 471 472 function LEReadByte(Stream: TStream): byte; 473 begin 474 Result := 0; 475 stream.Read(Result, sizeof(Result)); 476 end; 477 478 procedure LEWriteByte(Stream: TStream; AValue: Byte); 479 begin 480 stream.Write(AValue, sizeof(AValue)); 481 end; 482 483 function LEReadSingle(Stream: TStream): single; 484 var 485 ResultAsDWord : longword absolute result; 486 begin 487 ResultAsDWord := 0; 488 stream.Read(ResultAsDWord, sizeof(Result)); 489 ResultAsDWord := LEtoN(ResultAsDWord); 490 end; 491 492 procedure LEWriteSingle(Stream: TStream; AValue: single); 493 var 494 ValueAsDWord : longword absolute AValue; 495 begin 496 ValueAsDWord := NtoLE(ValueAsDWord); 497 stream.Write(ValueAsDWord, sizeof(AValue)); 486 function UTF8ReverseString(const s: string): string; 487 var 488 pSrc,pDest,pEnd: PChar; 489 charLen: Integer; 490 begin 491 if s = '' then 492 begin 493 result := ''; 494 exit; 495 end; 496 setlength(result, length(s)); 497 pDest := @result[1] + length(result); 498 pSrc := @s[1]; 499 pEnd := pSrc+length(s); 500 while pSrc < pEnd do 501 begin 502 charLen := UTF8CharacterLength(pSrc); 503 if (charLen = 0) or (pSrc+charLen > pEnd) then break; 504 dec(pDest, charLen); 505 move(pSrc^, pDest^, charLen); 506 inc(pSrc, charLen); 507 end; 508 end; 509 510 function UTF8CodepointToUnicode(p: PChar; ACodePointLen: integer): cardinal; 511 begin 512 case ACodePointLen of 513 0: result := 0; 514 1: result := ord(p^); 515 2: result := ((ord(p^) and %00011111) shl 6) or (ord(p[1]) and %00111111); 516 3: result := ((ord(p^) and %00011111) shl 12) or ((ord(p[1]) and %00111111) shl 6) 517 or (ord(p[2]) and %00111111); 518 4: result := ((ord(p^) and %00001111) shl 18) or ((ord(p[1]) and %00111111) shl 12) 519 or ((ord(p[2]) and %00111111) shl 6) or (ord(p[3]) and %00111111); 520 else 521 raise exception.Create('Invalid code point length'); 522 end; 498 523 end; 499 524 … … 515 540 end; 516 541 542 function GetBidiClassUTF8(P: PChar): TUnicodeBidiClass; 543 begin 544 result := GetUnicodeBidiClass(UTF8CodepointToUnicode(P, UTF8CharacterLength(p))); 545 end; 546 547 function GetFirstStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass; 548 var 549 p,pEnd: PChar; 550 charLen: Integer; 551 u: Cardinal; 552 curBidi: TUnicodeBidiClass; 553 isolateNesting: integer; 554 begin 555 if sUTF8 = '' then exit(ubcUnknown); 556 p := @sUTF8[1]; 557 pEnd := p + length(sUTF8); 558 isolateNesting:= 0; 559 while p < pEnd do 560 begin 561 charLen := UTF8CharacterLength(p); 562 if (charLen = 0) or (p+charLen > pEnd) then break; 563 u := UTF8CodepointToUnicode(p, charLen); 564 case u of 565 UNICODE_POP_DIRECTIONAL_ISOLATE: if isolateNesting > 0 then dec(isolateNesting); 566 end; 567 curBidi := GetUnicodeBidiClass(u); 568 if isolateNesting = 0 then 569 begin 570 if curBidi in[ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then 571 exit(curBidi); 572 end; 573 case u of 574 UNICODE_FIRST_STRONG_ISOLATE, UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE: inc(isolateNesting); 575 end; 576 if curBidi = ubcParagraphSeparator then isolateNesting:= 0; 577 inc(p,charLen); 578 end; 579 exit(ubcUnknown); 580 end; 581 582 function GetLastStrongBidiClassUTF8(const sUTF8: string): TUnicodeBidiClass; 583 var 584 p,pEnd: PChar; 585 charLen: Integer; 586 u: Cardinal; 587 curBidi: TUnicodeBidiClass; 588 isolateNesting: integer; 589 begin 590 if sUTF8 = '' then exit(ubcUnknown); 591 p := @sUTF8[1]; 592 pEnd := p + length(sUTF8); 593 isolateNesting:= 0; 594 result := ubcUnknown; 595 while p < pEnd do 596 begin 597 charLen := UTF8CharacterLength(p); 598 if (charLen = 0) or (p+charLen > pEnd) then break; 599 u := UTF8CodepointToUnicode(p, charLen); 600 case u of 601 UNICODE_POP_DIRECTIONAL_ISOLATE: if isolateNesting > 0 then dec(isolateNesting); 602 end; 603 curBidi := GetUnicodeBidiClass(u); 604 if isolateNesting = 0 then 605 begin 606 if curBidi in[ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then 607 result := curBidi; 608 end; 609 case u of 610 UNICODE_FIRST_STRONG_ISOLATE, UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE: inc(isolateNesting); 611 end; 612 if curBidi = ubcParagraphSeparator then isolateNesting:= 0; 613 inc(p,charLen); 614 end; 615 end; 616 617 function IsRightToLeftUTF8(const sUTF8: string): boolean; 618 begin 619 result := GetFirstStrongBidiClassUTF8(sUTF8) in[ubcRightToLeft,ubcArabicLetter]; 620 end; 621 622 function IsZeroWidthUTF8(const sUTF8: string): boolean; 623 var 624 p,pEnd: PChar; 625 charLen: Integer; 626 u: Cardinal; 627 begin 628 if sUTF8 = '' then exit(true); 629 p := @sUTF8[1]; 630 pEnd := p + length(sUTF8); 631 while p < pEnd do 632 begin 633 charLen := UTF8CharacterLength(p); 634 if (charLen = 0) or (p+charLen > pEnd) then break; 635 u := UTF8CodepointToUnicode(p, charLen); 636 if not IsZeroWidthUnicode(u) then exit(false); 637 inc(p,charLen); 638 end; 639 exit(true); 640 end; 641 642 function AddParagraphBidiUTF8(s: string; ARightToLeft: boolean): string; 643 var 644 i,curParaStart: Integer; 645 646 procedure CheckParagraph; 647 var 648 para,newPara: string; 649 paraRTL: boolean; 650 begin 651 if i > curParaStart then 652 begin 653 para := copy(s,curParaStart,i-curParaStart); 654 paraRTL := GetFirstStrongBidiClassUTF8(para) in[ubcRightToLeft,ubcArabicLetter]; 655 //detected paragraph does not match overall RTL option 656 if paraRTL <> ARightToLeft then 657 begin 658 if not paraRTL then 659 newPara := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_MARK)+para+UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_MARK) 660 else 661 newPara := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_MARK)+para+UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_MARK); 662 inc(i, length(newPara)-length(para)); 663 delete(s, curParaStart, length(para)); 664 insert(newPara, s, curParaStart); 665 end; 666 end; 667 end; 668 669 var 670 charLen: integer; 671 u: Cardinal; 672 673 begin 674 i := 1; 675 curParaStart := 1; 676 while i <= length(s) do 677 begin 678 charLen := UTF8CharacterLength(@s[i]); 679 u := UTF8CodepointToUnicode(@s[i], charLen); 680 if IsUnicodeParagraphSeparator(u) then 681 begin 682 CheckParagraph; 683 //skip end of line 684 inc(i); 685 //skip second CRLF 686 if ((u = 10) or (u = 13)) and (i <= length(s)) and (s[i] in[#13,#10]) and (s[i]<>s[i-1]) then inc(i); 687 curParaStart := i; 688 end else 689 inc(i); 690 end; 691 CheckParagraph; 692 result := s; 693 end; 694 695 type 696 TUnicodeArray = packed array of cardinal; 697 TIntegerArray = array of integer; 698 699 procedure UTF8ToUnicode(const sUTF8: string; out u: TUnicodeArray; out ofs: TIntegerArray); 700 var 701 index,len,charLen: integer; 702 p,pStart,pEnd: PChar; 703 begin 704 if sUTF8 = '' then 705 begin 706 u := nil; 707 ofs := nil; 708 end 709 else 710 begin 711 pStart := @sUTF8[1]; 712 pEnd := pStart + length(sUTF8); 713 p := pStart; 714 len := 0; 715 while p < pEnd do 716 begin 717 charLen := UTF8CharacterLength(p); 718 inc(len); 719 inc(p,charLen); 720 end; 721 722 setlength(u, len); 723 setlength(ofs, len); 724 p := pStart; 725 index := 0; 726 while p < pEnd do 727 begin 728 charLen := UTF8CharacterLength(p); 729 u[index] := UTF8CodepointToUnicode(p, charLen); 730 ofs[index] := p - pStart; 731 inc(index); 732 inc(p,charLen); 733 end; 734 end; 735 end; 736 737 function AnalyzeBidiUTF8(const sUTF8: string; ABaseDirection: cardinal): TBidiUTF8Array; 738 var 739 u: TUnicodeArray; 740 ofs: TIntegerArray; 741 a: TUnicodeBidiArray; 742 i: Integer; 743 begin 744 if sUTF8 = '' then 745 result := nil 746 else 747 begin 748 UTF8ToUnicode(sUTF8, u, ofs); 749 a := AnalyzeBidiUnicode(@u[0], length(u), ABaseDirection); 750 setlength(result, length(u)); 751 for i := 0 to high(result) do 752 begin 753 result[i].Offset:= ofs[i]; 754 result[i].BidiInfo := a[i]; 755 end; 756 end; 757 end; 758 759 function AnalyzeBidiUTF8(const sUTF8: string; ARightToLeft: boolean): TBidiUTF8Array; 760 begin 761 if ARightToLeft then 762 result := AnalyzeBidiUTF8(sUTF8, UNICODE_RIGHT_TO_LEFT_ISOLATE) 763 else 764 result := AnalyzeBidiUTF8(sUTF8, UNICODE_LEFT_TO_RIGHT_ISOLATE); 765 end; 766 767 function AnalyzeBidiUTF8(const sUTF8: string): TBidiUTF8Array; 768 begin 769 result := AnalyzeBidiUTF8(sUTF8, UNICODE_FIRST_STRONG_ISOLATE) 770 end; 771 772 function GetUTF8DisplayOrder(const ABidi: TBidiUTF8Array): TUnicodeDisplayOrder; 773 begin 774 if length(ABidi) = 0 then 775 result := nil 776 else 777 result := GetUnicodeDisplayOrder(@ABidi[0].BidiInfo, sizeof(TBidiUTF8Info), length(ABidi)); 778 end; 779 780 function ContainsBidiIsolateOrFormattingUTF8(const sUTF8: string): boolean; 781 var 782 p,pEnd: PChar; 783 charLen: Integer; 784 u: Cardinal; 785 begin 786 if sUTF8 = '' then exit(false); 787 p := @sUTF8[1]; 788 pEnd := p + length(sUTF8); 789 while p < pEnd do 790 begin 791 charLen := UTF8CharacterLength(p); 792 if (charLen = 0) or (p+charLen > pEnd) then break; 793 u := UTF8CodepointToUnicode(p, charLen); 794 case u of 795 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE, 796 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING, 797 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(true); 798 end; 799 inc(p,charLen); 800 end; 801 exit(false); 802 end; 803 804 function UTF8OverrideDirection(const sUTF8: string; ARightToLeft: boolean): string; 805 begin 806 if ARightToLeft then 807 result := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_OVERRIDE) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING) 808 else 809 result := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_OVERRIDE) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING); 810 end; 811 812 function UTF8EmbedDirection(const sUTF8: string; ARightToLeft: boolean): string; 813 begin 814 if ARightToLeft then 815 result := UnicodeCharToUTF8(UNICODE_RIGHT_TO_LEFT_EMBEDDING) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING) 816 else 817 result := UnicodeCharToUTF8(UNICODE_LEFT_TO_RIGHT_EMBEDDING) + sUTF8 + UnicodeCharToUTF8(UNICODE_POP_DIRECTIONAL_FORMATTING); 818 end; 819 820 //little endian stream functions 821 function LEReadInt64(Stream: TStream): int64; 822 begin 823 Result := 0; 824 stream.Read(Result, sizeof(Result)); 825 Result := LEtoN(Result); 826 end; 827 828 procedure LEWriteInt64(Stream: TStream; AValue: int64); 829 begin 830 AValue := NtoLE(AValue); 831 stream.Write(AValue, sizeof(AValue)); 832 end; 833 834 function LEReadLongint(Stream: TStream): longint; 835 begin 836 Result := 0; 837 stream.Read(Result, sizeof(Result)); 838 Result := LEtoN(Result); 839 end; 840 841 procedure LEWriteLongint(Stream: TStream; AValue: LongInt); 842 begin 843 AValue := NtoLE(AValue); 844 stream.Write(AValue, sizeof(AValue)); 845 end; 846 847 function LEReadByte(Stream: TStream): byte; 848 begin 849 Result := 0; 850 stream.Read(Result, sizeof(Result)); 851 end; 852 853 procedure LEWriteByte(Stream: TStream; AValue: Byte); 854 begin 855 stream.Write(AValue, sizeof(AValue)); 856 end; 857 858 function LEReadSingle(Stream: TStream): single; 859 var 860 ResultAsDWord : longword absolute result; 861 begin 862 ResultAsDWord := 0; 863 stream.Read(ResultAsDWord, sizeof(Result)); 864 ResultAsDWord := LEtoN(ResultAsDWord); 865 end; 866 867 procedure LEWriteSingle(Stream: TStream; AValue: single); 868 var 869 ValueAsDWord : longword absolute AValue; 870 begin 871 ValueAsDWord := NtoLE(ValueAsDWord); 872 stream.Write(ValueAsDWord, sizeof(AValue)); 873 end; 874 517 875 end. 518 876
Note:
See TracChangeset
for help on using the changeset viewer.