1 | unit BGRAUnicode;
|
---|
2 | { Implementation of Unicode bidi algorithm }
|
---|
3 | { Author: circular }
|
---|
4 | { License: modified LGPL }
|
---|
5 |
|
---|
6 | {$mode objfpc}{$H+}
|
---|
7 | {$modeswitch advancedrecords}
|
---|
8 |
|
---|
9 | interface
|
---|
10 |
|
---|
11 | uses
|
---|
12 | Classes, SysUtils;
|
---|
13 |
|
---|
14 | type
|
---|
15 | TUnicodeBidiClass = (ubcBoundaryNeutral, ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals,
|
---|
16 | ubcCommonSeparator, ubcNonSpacingMark,
|
---|
17 | ubcLeftToRight, ubcEuropeanNumber, ubcEuropeanNumberSeparator, ubcEuropeanNumberTerminator,
|
---|
18 | ubcRightToLeft, ubcArabicLetter, ubcArabicNumber, ubcUnknown);
|
---|
19 |
|
---|
20 | const
|
---|
21 | ubcNeutral = [ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals];
|
---|
22 |
|
---|
23 | BIDI_FLAG_REMOVED = 1; //RLE, LRE, RLO, LRO, PDF and BN are supposed to be removed
|
---|
24 | BIDI_FLAG_END_OF_PARAGRAPH = 2; //end of paragraph (paragraph spacing below)
|
---|
25 | BIDI_FLAG_END_OF_LINE = 4; //line break <br>
|
---|
26 |
|
---|
27 | type
|
---|
28 | PUnicodeBidiInfo = ^TUnicodeBidiInfo;
|
---|
29 |
|
---|
30 | { TUnicodeBidiInfo }
|
---|
31 |
|
---|
32 | TUnicodeBidiInfo = packed record
|
---|
33 | private
|
---|
34 | function GetEndOfLine: boolean;
|
---|
35 | function GetEndOfParagraph: boolean;
|
---|
36 | function GetRemoved: boolean;
|
---|
37 | function GetRightToLeft: boolean;
|
---|
38 | function GetParagraphRightToLeft: boolean;
|
---|
39 | public
|
---|
40 | ParagraphBidiLevel, BidiLevel: byte;
|
---|
41 | Flags, Dummy: Byte;
|
---|
42 | property IsRemoved: boolean read GetRemoved;
|
---|
43 | property IsRightToLeft: boolean read GetRightToLeft;
|
---|
44 | property IsParagraphRightToLeft: boolean read GetParagraphRightToLeft;
|
---|
45 | property IsEndOfLine: boolean read GetEndOfLine;
|
---|
46 | property IsEndOfParagraph: boolean read GetEndOfParagraph;
|
---|
47 | end;
|
---|
48 |
|
---|
49 | TUnicodeBidiArray = packed array of TUnicodeBidiInfo;
|
---|
50 | TUnicodeDisplayOrder = array of integer;
|
---|
51 |
|
---|
52 | const
|
---|
53 | //maximum nesting level of isolates and bidi-formatting blocks (char bidi level can actually be higher due to char properties)
|
---|
54 | UNICODE_MAX_BIDI_DEPTH = 125;
|
---|
55 |
|
---|
56 | UNICODE_LINE_SEPARATOR = $2028; //equivalent of <br>
|
---|
57 | UNICODE_PARAGRAPH_SEPARATOR = $2029; //equivalent of </p>
|
---|
58 | UNICODE_NEXT_LINE = $0085; //equivalent of CRLF
|
---|
59 |
|
---|
60 | //characters that split lines into top-level bidi blocks
|
---|
61 | UNICODE_LEFT_TO_RIGHT_ISOLATE = $2066;
|
---|
62 | UNICODE_RIGHT_TO_LEFT_ISOLATE = $2067;
|
---|
63 | UNICODE_FIRST_STRONG_ISOLATE = $2068;
|
---|
64 | UNICODE_POP_DIRECTIONAL_ISOLATE = $2069;
|
---|
65 |
|
---|
66 | //characters that split into bidi sub-blocks (called "formatting")
|
---|
67 | UNICODE_LEFT_TO_RIGHT_EMBEDDING = $202A;
|
---|
68 | UNICODE_RIGHT_TO_LEFT_EMBEDDING = $202B;
|
---|
69 | UNICODE_LEFT_TO_RIGHT_OVERRIDE = $202D;
|
---|
70 | UNICODE_RIGHT_TO_LEFT_OVERRIDE = $202E;
|
---|
71 | UNICODE_POP_DIRECTIONAL_FORMATTING = $202C;
|
---|
72 |
|
---|
73 | //characters that mark direction without splitting the bidi block
|
---|
74 | UNICODE_LEFT_TO_RIGHT_MARK = $200E;
|
---|
75 | UNICODE_RIGHT_TO_LEFT_MARK = $200F;
|
---|
76 | UNICODE_ARABIC_LETTER_MARK = $061C;
|
---|
77 |
|
---|
78 | //data separators
|
---|
79 | UNICODE_INFORMATION_SEPARATOR_FOUR = $001C; //end-of-file
|
---|
80 | UNICODE_INFORMATION_SEPARATOR_THREE = $001D; //section separator
|
---|
81 | UNICODE_INFORMATION_SEPARATOR_TWO = $001E; //record separator, kind of equivalent to paragraph separator
|
---|
82 | UNICODE_INFORMATION_SEPARATOR_ONE = $001F; //field separator, kind of equivalent to Tab
|
---|
83 |
|
---|
84 | //zero-width
|
---|
85 | UNICODE_ZERO_WIDTH_SPACE = $200B;
|
---|
86 | UNICODE_ZERO_WIDTH_NON_JOINER = $200C;
|
---|
87 | UNICODE_ZERO_WIDTH_NO_BREAK_SPACE = $FEFF; //byte order mark
|
---|
88 | UNICODE_ZERO_WIDTH_JOINER = $200D;
|
---|
89 |
|
---|
90 | //arabic letters
|
---|
91 | UNICODE_ARABIC_TATWEEL = $0640; //horizontal line that makes a ligature with most letters
|
---|
92 |
|
---|
93 | //ideographic punctuation
|
---|
94 | UNICODE_IDEOGRAPHIC_COMMA = $3001;
|
---|
95 | UNICODE_IDEOGRAPHIC_FULL_STOP = $3002;
|
---|
96 | UNICODE_FULLWIDTH_COMMA = $FF0C;
|
---|
97 | UNICODE_HORIZONTAL_ELLIPSIS = $2026;
|
---|
98 |
|
---|
99 | //bracket equivalence
|
---|
100 | UNICODE_RIGHT_POINTING_ANGLE_BRACKET = $232A;
|
---|
101 | UNICODE_RIGHT_ANGLE_BRACKET = $3009;
|
---|
102 |
|
---|
103 | type //bracket matching
|
---|
104 | TUnicodeBracketInfo = record
|
---|
105 | IsBracket: boolean;
|
---|
106 | OpeningBracket,ClosingBracket: cardinal;
|
---|
107 | end;
|
---|
108 |
|
---|
109 | function GetUnicodeBidiClass(u: cardinal): TUnicodeBidiClass;
|
---|
110 | function GetUnicodeBracketInfo(u: cardinal): TUnicodeBracketInfo;
|
---|
111 | function IsZeroWidthUnicode(u: cardinal): boolean;
|
---|
112 | function IsUnicodeParagraphSeparator(u: cardinal): boolean;
|
---|
113 | function IsUnicodeCrLf(u: cardinal): boolean;
|
---|
114 | function IsUnicodeIsolateOrFormatting(u: cardinal): boolean;
|
---|
115 |
|
---|
116 |
|
---|
117 | { Analyze unicode and return bidi levels for each character.
|
---|
118 | baseDirection can be either UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE or UNICODE_FIRST_STRONG_ISOLATE }
|
---|
119 | function AnalyzeBidiUnicode(u: PCardinal; ALength: integer; baseDirection: cardinal): TUnicodeBidiArray;
|
---|
120 |
|
---|
121 | { Determine diplay order, provided the display surface is horizontally infinite }
|
---|
122 | function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder; overload;
|
---|
123 | function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder; overload;
|
---|
124 | function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder; overload;
|
---|
125 | procedure GenerateUnicodeFunctions; //to regenerate the code of GetUnicodeBidiClass and GetUnicodeBracketInfo
|
---|
126 |
|
---|
127 | implementation
|
---|
128 |
|
---|
129 | procedure GenerateUnicodeFunctions;
|
---|
130 | const Indent = ' ';
|
---|
131 | var
|
---|
132 | tIn,tOut: TextFile;
|
---|
133 |
|
---|
134 | procedure IncludeClasses(AClasses: TStrings; AMinCode, AMaxCode: integer);
|
---|
135 | var
|
---|
136 | line,curBidi,newBidi: string;
|
---|
137 | codes: array of integer;
|
---|
138 | codeCount: integer;
|
---|
139 | cells: TStringList;
|
---|
140 | curCode: LongInt;
|
---|
141 |
|
---|
142 | procedure FlushCase;
|
---|
143 | var i: integer;
|
---|
144 | buf: string;
|
---|
145 | bufLines: TStringList;
|
---|
146 | begin
|
---|
147 | if codeCount = 0 then exit;
|
---|
148 |
|
---|
149 | bufLines := TStringList.Create;
|
---|
150 | i := 0;
|
---|
151 | buf := Indent+' ';
|
---|
152 | while i < codeCount do
|
---|
153 | begin
|
---|
154 | if i > 0 then buf += ', ';
|
---|
155 |
|
---|
156 | if length(buf) > 95 then
|
---|
157 | begin
|
---|
158 | bufLines.Add(buf);
|
---|
159 | buf := Indent+' ';
|
---|
160 | end;
|
---|
161 |
|
---|
162 | if (i+2 < codeCount) and (codes[i]+1 = codes[i+1]) and (codes[i+1]+1 = codes[i+2]) then
|
---|
163 | begin
|
---|
164 | buf += '$'+IntToHex(codes[i],2) + '..';
|
---|
165 | while (i+1 < codeCount) and (codes[i]+1 = codes[i+1]) do inc(i);
|
---|
166 | buf += '$'+IntToHex(codes[i],2);
|
---|
167 | end else
|
---|
168 | buf += '$'+IntToHex(codes[i],2);
|
---|
169 |
|
---|
170 | inc(i);
|
---|
171 | end;
|
---|
172 |
|
---|
173 | if trim(buf) <> '' then bufLines.Add(buf);
|
---|
174 |
|
---|
175 | buf := '';
|
---|
176 | for i := 0 to bufLines.Count-1 do
|
---|
177 | begin
|
---|
178 | if i > 0 then buf += LineEnding;
|
---|
179 | buf += bufLines[i];
|
---|
180 | end;
|
---|
181 |
|
---|
182 | bufLines.Free;
|
---|
183 |
|
---|
184 | case curBidi of
|
---|
185 | 'CS': WriteLn(tOut,buf+': result := ubcCommonSeparator;');
|
---|
186 | 'L': WriteLn(tOut,buf+': result := ubcLeftToRight;');
|
---|
187 | 'EN': WriteLn(tOut,buf+': result := ubcEuropeanNumber;');
|
---|
188 | 'ES': WriteLn(tOut,buf+': result := ubcEuropeanNumberSeparator;');
|
---|
189 | 'ET': WriteLn(tOut,buf+': result := ubcEuropeanNumberTerminator;');
|
---|
190 | 'R': WriteLn(tOut,buf+': result := ubcRightToLeft;');
|
---|
191 | 'AL': WriteLn(tOut,buf+': result := ubcArabicLetter;');
|
---|
192 | 'AN': WriteLn(tOut,buf+': result := ubcArabicNumber;');
|
---|
193 | 'NSM': WriteLn(tOut,buf+': result := ubcNonSpacingMark;');
|
---|
194 | 'BN': WriteLn(tOut,buf+': result := ubcBoundaryNeutral;');
|
---|
195 | 'B': WriteLn(tOut,buf+': result := ubcParagraphSeparator;');
|
---|
196 | 'S': WriteLn(tOut,buf+': result := ubcSegmentSeparator;');
|
---|
197 | 'WS': WriteLn(tOut,buf+': result := ubcWhiteSpace;');
|
---|
198 | 'ON': WriteLn(tOut,buf+': result := ubcOtherNeutrals;');
|
---|
199 | end;
|
---|
200 | codeCount:= 0;
|
---|
201 | end;
|
---|
202 |
|
---|
203 | begin
|
---|
204 | AssignFile(tIn, 'UnicodeData.txt');
|
---|
205 | Reset(tIn);
|
---|
206 |
|
---|
207 | cells := TStringList.Create;
|
---|
208 | codeCount := 0;
|
---|
209 | curBidi := '?';
|
---|
210 | codes := nil;
|
---|
211 | while not eof(tIn) do
|
---|
212 | begin
|
---|
213 | ReadLn(tIn,line);
|
---|
214 | cells.Delimiter := ';';
|
---|
215 | cells.QuoteChar := '"';
|
---|
216 | cells.StrictDelimiter := true;
|
---|
217 | cells.DelimitedText := line;
|
---|
218 | if cells.Count >= 5 then
|
---|
219 | begin
|
---|
220 | newBidi := cells[4];
|
---|
221 | if AClasses.IndexOf(newBidi)<>-1 then
|
---|
222 | begin
|
---|
223 | if newBidi <> curBidi then
|
---|
224 | begin
|
---|
225 | FlushCase;
|
---|
226 | curBidi := newBidi;
|
---|
227 | end;
|
---|
228 | curCode := StrToInt('$'+cells[0]);
|
---|
229 | if (curCode >= AMinCode) and (curCode <= AMaxCode) then
|
---|
230 | begin
|
---|
231 | if codeCount >= length(codes) then
|
---|
232 | setlength(codes, codeCount*2 + 8);
|
---|
233 | codes[codeCount] := curCode;
|
---|
234 | inc(codeCount);
|
---|
235 | end;
|
---|
236 | end;
|
---|
237 | end;
|
---|
238 | end;
|
---|
239 | FlushCase;
|
---|
240 | cells.Free;
|
---|
241 |
|
---|
242 | CloseFile(tIn);
|
---|
243 | end;
|
---|
244 |
|
---|
245 | procedure ParseUnicodeBidiClasses;
|
---|
246 | var c: TStringList;
|
---|
247 |
|
---|
248 | procedure Include(AMinCode,AMaxCode: integer);
|
---|
249 | begin
|
---|
250 | Writeln(tOut,Indent+'case u of');
|
---|
251 | c.CommaText := 'BN';
|
---|
252 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
253 | c.CommaText := 'S';
|
---|
254 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
255 | c.CommaText := 'B';
|
---|
256 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
257 | c.CommaText := 'WS';
|
---|
258 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
259 | c.CommaText := 'L,R,AL';
|
---|
260 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
261 | c.CommaText := 'EN';
|
---|
262 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
263 | c.CommaText := 'ES';
|
---|
264 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
265 | c.CommaText := 'ET';
|
---|
266 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
267 | c.CommaText := 'AN';
|
---|
268 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
269 | c.CommaText := 'CS,NSM';
|
---|
270 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
271 | c.CommaText := 'ON';
|
---|
272 | IncludeClasses(c, AMinCode,AMaxCode);
|
---|
273 | writeln(tout,Indent+'else result := ubcUnknown;');
|
---|
274 | writeln(tout,Indent+'end;');
|
---|
275 | end;
|
---|
276 |
|
---|
277 | begin
|
---|
278 | Writeln(tOut,'function GetUnicodeBidiClass(u: cardinal): TUnicodeBidiClass;');
|
---|
279 | FormatSettings.ShortDateFormat := 'yyyy/mm/dd';
|
---|
280 | Writeln(tOut,'begin //generated '+DateToStr(Date));
|
---|
281 | c := TStringList.Create;
|
---|
282 | writeln(tOut,' case u of');
|
---|
283 | writeln(tOut,' $00000..$07FFF:');
|
---|
284 | writeln(tOut,' case u of');
|
---|
285 | writeln(tOut,' $00000..$003FF:');
|
---|
286 | Include($00000, $003FF);
|
---|
287 | writeln(tOut,' $00400..$007FF:');
|
---|
288 | Include($00400, $007FF);
|
---|
289 | writeln(tOut,' $00800..$00FFF:');
|
---|
290 | Include($00800, $00FFF);
|
---|
291 | writeln(tOut,' $01000..$01FFF:');
|
---|
292 | Include($01000, $01FFF);
|
---|
293 | writeln(tOut,' else');
|
---|
294 | Include($02000, $07FFF);
|
---|
295 | writeln(tOut,' end;');
|
---|
296 | writeln(tOut,' $08000..$0FFFF:');
|
---|
297 | Include($08000, $0FFFF);
|
---|
298 | writeln(tOut,' else');
|
---|
299 | writeln(tOut,' case u of');
|
---|
300 | writeln(tOut,' $10000..$10FFF:');
|
---|
301 | Include($10000, $10FFF);
|
---|
302 | writeln(tOut,' $11000..$117FF:');
|
---|
303 | Include($11000, $117FF);
|
---|
304 | writeln(tOut,' $11800..$17FFF:');
|
---|
305 | Include($11800, $17FFF);
|
---|
306 | writeln(tOut,' $18000..$FFFFF:');
|
---|
307 | Include($18000, $FFFFF);
|
---|
308 | writeln(tOut,' else result := ubcUnknown;');
|
---|
309 | writeln(tOut,' end');
|
---|
310 | writeln(tOut,' end');
|
---|
311 |
|
---|
312 |
|
---|
313 | c.Free;
|
---|
314 |
|
---|
315 | writeln(tout,'end;');
|
---|
316 | writeln(tout);
|
---|
317 | end;
|
---|
318 |
|
---|
319 | procedure ParseBidiBrackets;
|
---|
320 | var elem: TStringList;
|
---|
321 | line: string;
|
---|
322 | begin
|
---|
323 | Writeln(tOut,'type');
|
---|
324 | writeln(tout,' TUnicodeBracketInfo = record');
|
---|
325 | writeln(tout,' IsBracket: boolean;');
|
---|
326 | writeln(tout,' OpeningBracket,ClosingBracket: cardinal;');
|
---|
327 | writeln(tout,' end;');
|
---|
328 | Writeln(tOut,'function GetUnicodeBracketInfo(u: cardinal): TUnicodeBracketInfo;');
|
---|
329 | Writeln(tOut,' procedure Bracket(AOpening,AClosing: cardinal);');
|
---|
330 | Writeln(tOut,' begin');
|
---|
331 | Writeln(tOut,' result.IsBracket := true;');
|
---|
332 | Writeln(tOut,' result.OpeningBracket := AOpening;');
|
---|
333 | Writeln(tOut,' result.ClosingBracket := AClosing;');
|
---|
334 | Writeln(tOut,' end;');
|
---|
335 | Writeln(tOut,'begin');
|
---|
336 | Writeln(tOut,' case u of');
|
---|
337 |
|
---|
338 | assignfile(tIn, 'BidiBrackets.txt');
|
---|
339 | reset(tin);
|
---|
340 | elem := TStringList.Create;
|
---|
341 | elem.Delimiter := ';';
|
---|
342 | elem.StrictDelimiter:= true;
|
---|
343 | while not eof(tin) do
|
---|
344 | begin
|
---|
345 | readln(tin, line);
|
---|
346 | elem.DelimitedText:= line;
|
---|
347 | if elem.Count >= 3 then
|
---|
348 | begin
|
---|
349 | if copy(trim(elem[2]),1,1) = 'o' then
|
---|
350 | writeln(tOut,' $'+trim(elem[0])+', $'+trim(elem[1])+': Bracket($'+trim(elem[0])+', $'+trim(elem[1])+');');
|
---|
351 | end;
|
---|
352 | end;
|
---|
353 | elem.Free;
|
---|
354 | closefile(tin);
|
---|
355 |
|
---|
356 | writeln(tout,' else');
|
---|
357 | writeln(tout,' begin');
|
---|
358 | writeln(tout,' result.IsBracket := false;');
|
---|
359 | writeln(tout,' result.OpeningBracket := 0;');
|
---|
360 | writeln(tout,' result.ClosingBracket := 0;');
|
---|
361 | writeln(tout,' end;');
|
---|
362 | Writeln(tOut,' end;');
|
---|
363 | Writeln(tOut,'end;');
|
---|
364 | Writeln(tOut);
|
---|
365 | end;
|
---|
366 |
|
---|
367 | begin
|
---|
368 | AssignFile(tOut, 'UnicodeFunctions.generated.pas');
|
---|
369 | Rewrite(tOut);
|
---|
370 |
|
---|
371 | ParseUnicodeBidiClasses;
|
---|
372 | ParseBidiBrackets;
|
---|
373 |
|
---|
374 | CloseFile(tOut);
|
---|
375 | end;
|
---|
376 |
|
---|
377 | function GetUnicodeBidiClass(u: cardinal): TUnicodeBidiClass;
|
---|
378 | begin //generated 2018-06-12
|
---|
379 | case u of
|
---|
380 | $00000..$07FFF:
|
---|
381 | case u of
|
---|
382 | $00000..$003FF:
|
---|
383 | case u of
|
---|
384 | $00..$08, $0E..$1B, $7F..$84, $86..$9F, $AD: result := ubcBoundaryNeutral;
|
---|
385 | $09, $0B, $1F: result := ubcSegmentSeparator;
|
---|
386 | $0A, $0D, $1C..$1E, $85: result := ubcParagraphSeparator;
|
---|
387 | $0C, $20: result := ubcWhiteSpace;
|
---|
388 | $41..$5A, $61..$7A, $AA, $B5, $BA, $C0..$D6, $D8..$F6, $F8..$2B8, $2BB..$2C1, $2D0, $2D1,
|
---|
389 | $2E0..$2E4, $2EE, $370..$373, $376, $377, $37A..$37D, $37F, $386, $388..$38A, $38C, $38E..$3A1,
|
---|
390 | $3A3..$3F5, $3F7..$3FF: result := ubcLeftToRight;
|
---|
391 | $30..$39, $B2, $B3, $B9: result := ubcEuropeanNumber;
|
---|
392 | $2B, $2D: result := ubcEuropeanNumberSeparator;
|
---|
393 | $23..$25, $A2..$A5, $B0, $B1: result := ubcEuropeanNumberTerminator;
|
---|
394 | $2C, $2E, $2F, $3A, $A0: result := ubcCommonSeparator;
|
---|
395 | $300..$36F: result := ubcNonSpacingMark;
|
---|
396 | $21, $22, $26..$2A, $3B..$40, $5B..$60, $7B..$7E, $A1, $A6..$A9, $AB, $AC, $AE, $AF, $B4,
|
---|
397 | $B6..$B8, $BB..$BF, $D7, $F7, $2B9, $2BA, $2C2..$2CF, $2D2..$2DF, $2E5..$2ED, $2EF..$2FF,
|
---|
398 | $374, $375, $37E, $384, $385, $387, $3F6: result := ubcOtherNeutrals;
|
---|
399 | else result := ubcUnknown;
|
---|
400 | end;
|
---|
401 | $00400..$007FF:
|
---|
402 | case u of
|
---|
403 | $400..$482, $48A..$52F, $531..$556, $559..$589: result := ubcLeftToRight;
|
---|
404 | $5BE, $5C0, $5C3, $5C6, $5D0..$5EA, $5EF..$5F4: result := ubcRightToLeft;
|
---|
405 | $608, $60B, $60D, $61B, $61C, $61E..$64A, $66D..$66F, $671..$6D5, $6E5, $6E6, $6EE, $6EF,
|
---|
406 | $6FA..$70D, $70F, $710, $712..$72F, $74D..$7A5, $7B1: result := ubcArabicLetter;
|
---|
407 | $7C0..$7EA, $7F4, $7F5, $7FA, $7FE, $7FF: result := ubcRightToLeft;
|
---|
408 | $6F0..$6F9: result := ubcEuropeanNumber;
|
---|
409 | $58F, $609, $60A, $66A: result := ubcEuropeanNumberTerminator;
|
---|
410 | $600..$605, $660..$669, $66B, $66C, $6DD: result := ubcArabicNumber;
|
---|
411 | $483..$489, $591..$5BD, $5BF, $5C1, $5C2, $5C4, $5C5, $5C7: result := ubcNonSpacingMark;
|
---|
412 | $60C: result := ubcCommonSeparator;
|
---|
413 | $610..$61A, $64B..$65F, $670, $6D6..$6DC, $6DF..$6E4, $6E7, $6E8, $6EA..$6ED, $711, $730..$74A,
|
---|
414 | $7A6..$7B0, $7EB..$7F3, $7FD: result := ubcNonSpacingMark;
|
---|
415 | $58A, $58D, $58E, $606, $607, $60E, $60F, $6DE, $6E9, $7F6..$7F9: result := ubcOtherNeutrals;
|
---|
416 | else result := ubcUnknown;
|
---|
417 | end;
|
---|
418 | $00800..$00FFF:
|
---|
419 | case u of
|
---|
420 | $800..$815, $81A, $824, $828, $830..$83E, $840..$858, $85E: result := ubcRightToLeft;
|
---|
421 | $860..$86A, $8A0..$8B4, $8B6..$8BD: result := ubcArabicLetter;
|
---|
422 | $903..$939, $93B, $93D..$940, $949..$94C, $94E..$950, $958..$961, $964..$980, $982, $983,
|
---|
423 | $985..$98C, $98F, $990, $993..$9A8, $9AA..$9B0, $9B2, $9B6..$9B9, $9BD..$9C0, $9C7, $9C8,
|
---|
424 | $9CB, $9CC, $9CE, $9D7, $9DC, $9DD, $9DF..$9E1, $9E6..$9F1, $9F4..$9FA, $9FC, $9FD, $A03,
|
---|
425 | $A05..$A0A, $A0F, $A10, $A13..$A28, $A2A..$A30, $A32, $A33, $A35, $A36, $A38, $A39, $A3E..$A40,
|
---|
426 | $A59..$A5C, $A5E, $A66..$A6F, $A72..$A74, $A76, $A83, $A85..$A8D, $A8F..$A91, $A93..$AA8,
|
---|
427 | $AAA..$AB0, $AB2, $AB3, $AB5..$AB9, $ABD..$AC0, $AC9, $ACB, $ACC, $AD0, $AE0, $AE1, $AE6..$AF0,
|
---|
428 | $AF9, $B02, $B03, $B05..$B0C, $B0F, $B10, $B13..$B28, $B2A..$B30, $B32, $B33, $B35..$B39,
|
---|
429 | $B3D, $B3E, $B40, $B47, $B48, $B4B, $B4C, $B57, $B5C, $B5D, $B5F..$B61, $B66..$B77, $B83,
|
---|
430 | $B85..$B8A, $B8E..$B90, $B92..$B95, $B99, $B9A, $B9C, $B9E, $B9F, $BA3, $BA4, $BA8..$BAA,
|
---|
431 | $BAE..$BB9, $BBE, $BBF, $BC1, $BC2, $BC6..$BC8, $BCA..$BCC, $BD0, $BD7, $BE6..$BF2, $C01..$C03,
|
---|
432 | $C05..$C0C, $C0E..$C10, $C12..$C28, $C2A..$C39, $C3D, $C41..$C44, $C58..$C5A, $C60, $C61,
|
---|
433 | $C66..$C6F, $C7F, $C80, $C82..$C8C, $C8E..$C90, $C92..$CA8, $CAA..$CB3, $CB5..$CB9, $CBD..$CC4,
|
---|
434 | $CC6..$CC8, $CCA, $CCB, $CD5, $CD6, $CDE, $CE0, $CE1, $CE6..$CEF, $CF1, $CF2, $D02, $D03,
|
---|
435 | $D05..$D0C, $D0E..$D10, $D12..$D3A, $D3D..$D40, $D46..$D48, $D4A..$D4C, $D4E, $D4F, $D54..$D61,
|
---|
436 | $D66..$D7F, $D82, $D83, $D85..$D96, $D9A..$DB1, $DB3..$DBB, $DBD, $DC0..$DC6, $DCF..$DD1,
|
---|
437 | $DD8..$DDF, $DE6..$DEF, $DF2..$DF4, $E01..$E30, $E32, $E33, $E40..$E46, $E4F..$E5B, $E81,
|
---|
438 | $E82, $E84, $E87, $E88, $E8A, $E8D, $E94..$E97, $E99..$E9F, $EA1..$EA3, $EA5, $EA7, $EAA,
|
---|
439 | $EAB, $EAD..$EB0, $EB2, $EB3, $EBD, $EC0..$EC4, $EC6, $ED0..$ED9, $EDC..$EDF, $F00..$F17,
|
---|
440 | $F1A..$F34, $F36, $F38, $F3E..$F47, $F49..$F6C, $F7F, $F85, $F88..$F8C, $FBE..$FC5, $FC7..$FCC,
|
---|
441 | $FCE..$FDA: result := ubcLeftToRight;
|
---|
442 | $9F2, $9F3, $9FB, $AF1, $BF9, $E3F: result := ubcEuropeanNumberTerminator;
|
---|
443 | $8E2: result := ubcArabicNumber;
|
---|
444 | $816..$819, $81B..$823, $825..$827, $829..$82D, $859..$85B, $8D3..$8E1, $8E3..$902, $93A,
|
---|
445 | $93C, $941..$948, $94D, $951..$957, $962, $963, $981, $9BC, $9C1..$9C4, $9CD, $9E2, $9E3,
|
---|
446 | $9FE, $A01, $A02, $A3C, $A41, $A42, $A47, $A48, $A4B..$A4D, $A51, $A70, $A71, $A75, $A81,
|
---|
447 | $A82, $ABC, $AC1..$AC5, $AC7, $AC8, $ACD, $AE2, $AE3, $AFA..$AFF, $B01, $B3C, $B3F, $B41..$B44,
|
---|
448 | $B4D, $B56, $B62, $B63, $B82, $BC0, $BCD, $C00, $C04, $C3E..$C40, $C46..$C48, $C4A..$C4D,
|
---|
449 | $C55, $C56, $C62, $C63, $C81, $CBC, $CCC, $CCD, $CE2, $CE3, $D00, $D01, $D3B, $D3C, $D41..$D44,
|
---|
450 | $D4D, $D62, $D63, $DCA, $DD2..$DD4, $DD6, $E31, $E34..$E3A, $E47..$E4E, $EB1, $EB4..$EB9,
|
---|
451 | $EBB, $EBC, $EC8..$ECD, $F18, $F19, $F35, $F37, $F39, $F71..$F7E, $F80..$F84, $F86, $F87,
|
---|
452 | $F8D..$F97, $F99..$FBC, $FC6: result := ubcNonSpacingMark;
|
---|
453 | $BF3..$BF8, $BFA, $C78..$C7E, $F3A..$F3D: result := ubcOtherNeutrals;
|
---|
454 | else result := ubcUnknown;
|
---|
455 | end;
|
---|
456 | $01000..$01FFF:
|
---|
457 | case u of
|
---|
458 | $180E: result := ubcBoundaryNeutral;
|
---|
459 | $1680: result := ubcWhiteSpace;
|
---|
460 | $1000..$102C, $1031, $1038, $103B, $103C, $103F..$1057, $105A..$105D, $1061..$1070, $1075..$1081,
|
---|
461 | $1083, $1084, $1087..$108C, $108E..$109C, $109E..$10C5, $10C7, $10CD, $10D0..$1248, $124A..$124D,
|
---|
462 | $1250..$1256, $1258, $125A..$125D, $1260..$1288, $128A..$128D, $1290..$12B0, $12B2..$12B5,
|
---|
463 | $12B8..$12BE, $12C0, $12C2..$12C5, $12C8..$12D6, $12D8..$1310, $1312..$1315, $1318..$135A,
|
---|
464 | $1360..$137C, $1380..$138F, $13A0..$13F5, $13F8..$13FD, $1401..$167F, $1681..$169A, $16A0..$16F8,
|
---|
465 | $1700..$170C, $170E..$1711, $1720..$1731, $1735, $1736, $1740..$1751, $1760..$176C, $176E..$1770,
|
---|
466 | $1780..$17B3, $17B6, $17BE..$17C5, $17C7, $17C8, $17D4..$17DA, $17DC, $17E0..$17E9, $1810..$1819,
|
---|
467 | $1820..$1878, $1880..$1884, $1887..$18A8, $18AA, $18B0..$18F5, $1900..$191E, $1923..$1926,
|
---|
468 | $1929..$192B, $1930, $1931, $1933..$1938, $1946..$196D, $1970..$1974, $1980..$19AB, $19B0..$19C9,
|
---|
469 | $19D0..$19DA, $1A00..$1A16, $1A19, $1A1A, $1A1E..$1A55, $1A57, $1A61, $1A63, $1A64, $1A6D..$1A72,
|
---|
470 | $1A80..$1A89, $1A90..$1A99, $1AA0..$1AAD, $1B04..$1B33, $1B35, $1B3B, $1B3D..$1B41, $1B43..$1B4B,
|
---|
471 | $1B50..$1B6A, $1B74..$1B7C, $1B82..$1BA1, $1BA6, $1BA7, $1BAA, $1BAE..$1BE5, $1BE7, $1BEA..$1BEC,
|
---|
472 | $1BEE, $1BF2, $1BF3, $1BFC..$1C2B, $1C34, $1C35, $1C3B..$1C49, $1C4D..$1C88, $1C90..$1CBA,
|
---|
473 | $1CBD..$1CC7, $1CD3, $1CE1, $1CE9..$1CEC, $1CEE..$1CF3, $1CF5..$1CF7, $1D00..$1DBF, $1E00..$1F15,
|
---|
474 | $1F18..$1F1D, $1F20..$1F45, $1F48..$1F4D, $1F50..$1F57, $1F59, $1F5B, $1F5D, $1F5F..$1F7D,
|
---|
475 | $1F80..$1FB4, $1FB6..$1FBC, $1FBE, $1FC2..$1FC4, $1FC6..$1FCC, $1FD0..$1FD3, $1FD6..$1FDB,
|
---|
476 | $1FE0..$1FEC, $1FF2..$1FF4, $1FF6..$1FFC: result := ubcLeftToRight;
|
---|
477 | $17DB: result := ubcEuropeanNumberTerminator;
|
---|
478 | $102D..$1030, $1032..$1037, $1039, $103A, $103D, $103E, $1058, $1059, $105E..$1060, $1071..$1074,
|
---|
479 | $1082, $1085, $1086, $108D, $109D, $135D..$135F, $1712..$1714, $1732..$1734, $1752, $1753,
|
---|
480 | $1772, $1773, $17B4, $17B5, $17B7..$17BD, $17C6, $17C9..$17D3, $17DD, $180B..$180D, $1885,
|
---|
481 | $1886, $18A9, $1920..$1922, $1927, $1928, $1932, $1939..$193B, $1A17, $1A18, $1A1B, $1A56,
|
---|
482 | $1A58..$1A5E, $1A60, $1A62, $1A65..$1A6C, $1A73..$1A7C, $1A7F, $1AB0..$1ABE, $1B00..$1B03,
|
---|
483 | $1B34, $1B36..$1B3A, $1B3C, $1B42, $1B6B..$1B73, $1B80, $1B81, $1BA2..$1BA5, $1BA8, $1BA9,
|
---|
484 | $1BAB..$1BAD, $1BE6, $1BE8, $1BE9, $1BED, $1BEF..$1BF1, $1C2C..$1C33, $1C36, $1C37, $1CD0..$1CD2,
|
---|
485 | $1CD4..$1CE0, $1CE2..$1CE8, $1CED, $1CF4, $1CF8, $1CF9, $1DC0..$1DF9, $1DFB..$1DFF: result := ubcNonSpacingMark;
|
---|
486 | $1390..$1399, $1400, $169B, $169C, $17F0..$17F9, $1800..$180A, $1940, $1944, $1945, $19DE..$19FF,
|
---|
487 | $1FBD, $1FBF..$1FC1, $1FCD..$1FCF, $1FDD..$1FDF, $1FED..$1FEF, $1FFD, $1FFE: result := ubcOtherNeutrals;
|
---|
488 | else result := ubcUnknown;
|
---|
489 | end;
|
---|
490 | else
|
---|
491 | case u of
|
---|
492 | $200B..$200D, $2060..$2064, $206A..$206F: result := ubcBoundaryNeutral;
|
---|
493 | $2029: result := ubcParagraphSeparator;
|
---|
494 | $2000..$200A, $2028, $205F, $3000: result := ubcWhiteSpace;
|
---|
495 | $200E: result := ubcLeftToRight;
|
---|
496 | $200F: result := ubcRightToLeft;
|
---|
497 | $2071, $207F, $2090..$209C, $2102, $2107, $210A..$2113, $2115, $2119..$211D, $2124, $2126,
|
---|
498 | $2128, $212A..$212D, $212F..$2139, $213C..$213F, $2145..$2149, $214E, $214F, $2160..$2188,
|
---|
499 | $2336..$237A, $2395, $249C..$24E9, $26AC, $2800..$28FF, $2C00..$2C2E, $2C30..$2C5E, $2C60..$2CE4,
|
---|
500 | $2CEB..$2CEE, $2CF2, $2CF3, $2D00..$2D25, $2D27, $2D2D, $2D30..$2D67, $2D6F, $2D70, $2D80..$2D96,
|
---|
501 | $2DA0..$2DA6, $2DA8..$2DAE, $2DB0..$2DB6, $2DB8..$2DBE, $2DC0..$2DC6, $2DC8..$2DCE, $2DD0..$2DD6,
|
---|
502 | $2DD8..$2DDE, $3005..$3007, $3021..$3029, $302E, $302F, $3031..$3035, $3038..$303C, $3041..$3096,
|
---|
503 | $309D..$309F, $30A1..$30FA, $30FC..$30FF, $3105..$312F, $3131..$318E, $3190..$31BA, $31F0..$321C,
|
---|
504 | $3220..$324F, $3260..$327B, $327F..$32B0, $32C0..$32CB, $32D0..$32FE, $3300..$3376, $337B..$33DD,
|
---|
505 | $33E0..$33FE, $3400, $4DB5, $4E00: result := ubcLeftToRight;
|
---|
506 | $2070, $2074..$2079, $2080..$2089, $2488..$249B: result := ubcEuropeanNumber;
|
---|
507 | $207A, $207B, $208A, $208B, $2212: result := ubcEuropeanNumberSeparator;
|
---|
508 | $2030..$2034, $20A0..$20BF, $212E, $2213: result := ubcEuropeanNumberTerminator;
|
---|
509 | $202F, $2044: result := ubcCommonSeparator;
|
---|
510 | $20D0..$20F0, $2CEF..$2CF1, $2D7F, $2DE0..$2DFF, $302A..$302D, $3099, $309A: result := ubcNonSpacingMark;
|
---|
511 | $2010..$2027, $2035..$2043, $2045..$205E, $207C..$207E, $208C..$208E, $2100, $2101, $2103..$2106,
|
---|
512 | $2108, $2109, $2114, $2116..$2118, $211E..$2123, $2125, $2127, $2129, $213A, $213B, $2140..$2144,
|
---|
513 | $214A..$214D, $2150..$215F, $2189..$218B, $2190..$2211, $2214..$2335, $237B..$2394, $2396..$2426,
|
---|
514 | $2440..$244A, $2460..$2487, $24EA..$26AB, $26AD..$27FF, $2900..$2B73, $2B76..$2B95, $2B98..$2BC8,
|
---|
515 | $2BCA..$2BFE, $2CE5..$2CEA, $2CF9..$2CFF, $2E00..$2E4E, $2E80..$2E99, $2E9B..$2EF3, $2F00..$2FD5,
|
---|
516 | $2FF0..$2FFB, $3001..$3004, $3008..$3020, $3030, $3036, $3037, $303D..$303F, $309B, $309C,
|
---|
517 | $30A0, $30FB, $31C0..$31E3, $321D, $321E, $3250..$325F, $327C..$327E, $32B1..$32BF, $32CC..$32CF,
|
---|
518 | $3377..$337A, $33DE, $33DF, $33FF, $4DC0..$4DFF: result := ubcOtherNeutrals;
|
---|
519 | else result := ubcUnknown;
|
---|
520 | end;
|
---|
521 | end;
|
---|
522 | $08000..$0FFFF:
|
---|
523 | case u of
|
---|
524 | $FEFF: result := ubcBoundaryNeutral;
|
---|
525 | $9FEF, $A000..$A48C, $A4D0..$A60C, $A610..$A62B, $A640..$A66E, $A680..$A69D, $A6A0..$A6EF,
|
---|
526 | $A6F2..$A6F7, $A722..$A787, $A789..$A7B9, $A7F7..$A801, $A803..$A805, $A807..$A80A, $A80C..$A824,
|
---|
527 | $A827, $A830..$A837, $A840..$A873, $A880..$A8C3, $A8CE..$A8D9, $A8F2..$A8FE, $A900..$A925,
|
---|
528 | $A92E..$A946, $A952, $A953, $A95F..$A97C, $A983..$A9B2, $A9B4, $A9B5, $A9BA, $A9BB, $A9BD..$A9CD,
|
---|
529 | $A9CF..$A9D9, $A9DE..$A9E4, $A9E6..$A9FE, $AA00..$AA28, $AA2F, $AA30, $AA33, $AA34, $AA40..$AA42,
|
---|
530 | $AA44..$AA4B, $AA4D, $AA50..$AA59, $AA5C..$AA7B, $AA7D..$AAAF, $AAB1, $AAB5, $AAB6, $AAB9..$AABD,
|
---|
531 | $AAC0, $AAC2, $AADB..$AAEB, $AAEE..$AAF5, $AB01..$AB06, $AB09..$AB0E, $AB11..$AB16, $AB20..$AB26,
|
---|
532 | $AB28..$AB2E, $AB30..$AB65, $AB70..$ABE4, $ABE6, $ABE7, $ABE9..$ABEC, $ABF0..$ABF9, $AC00,
|
---|
533 | $D7A3, $D7B0..$D7C6, $D7CB..$D7FB, $D800, $DB7F, $DB80, $DBFF, $DC00, $DFFF, $E000, $F8FF..$FA6D,
|
---|
534 | $FA70..$FAD9, $FB00..$FB06, $FB13..$FB17: result := ubcLeftToRight;
|
---|
535 | $FB1D, $FB1F..$FB28, $FB2A..$FB36, $FB38..$FB3C, $FB3E, $FB40, $FB41, $FB43, $FB44, $FB46..$FB4F: result := ubcRightToLeft;
|
---|
536 | $FB50..$FBC1, $FBD3..$FD3D, $FD50..$FD8F, $FD92..$FDC7, $FDF0..$FDFC, $FE70..$FE74, $FE76..$FEFC: result := ubcArabicLetter;
|
---|
537 | $FF21..$FF3A, $FF41..$FF5A, $FF66..$FFBE, $FFC2..$FFC7, $FFCA..$FFCF, $FFD2..$FFD7, $FFDA..$FFDC: result := ubcLeftToRight;
|
---|
538 | $FF10..$FF19: result := ubcEuropeanNumber;
|
---|
539 | $FB29, $FE62, $FE63, $FF0B, $FF0D: result := ubcEuropeanNumberSeparator;
|
---|
540 | $A838, $A839, $FE5F, $FE69, $FE6A, $FF03..$FF05, $FFE0, $FFE1, $FFE5, $FFE6: result := ubcEuropeanNumberTerminator;
|
---|
541 | $A66F..$A672, $A674..$A67D, $A69E, $A69F, $A6F0, $A6F1, $A802, $A806, $A80B, $A825, $A826,
|
---|
542 | $A8C4, $A8C5, $A8E0..$A8F1, $A8FF, $A926..$A92D, $A947..$A951, $A980..$A982, $A9B3, $A9B6..$A9B9,
|
---|
543 | $A9BC, $A9E5, $AA29..$AA2E, $AA31, $AA32, $AA35, $AA36, $AA43, $AA4C, $AA7C, $AAB0, $AAB2..$AAB4,
|
---|
544 | $AAB7, $AAB8, $AABE, $AABF, $AAC1, $AAEC, $AAED, $AAF6, $ABE5, $ABE8, $ABED, $FB1E, $FE00..$FE0F,
|
---|
545 | $FE20..$FE2F: result := ubcNonSpacingMark;
|
---|
546 | $FE50, $FE52, $FE55, $FF0C, $FF0E, $FF0F, $FF1A: result := ubcCommonSeparator;
|
---|
547 | $A490..$A4C6, $A60D..$A60F, $A673, $A67E, $A67F, $A700..$A721, $A788, $A828..$A82B, $A874..$A877,
|
---|
548 | $FD3E, $FD3F, $FDFD, $FE10..$FE19, $FE30..$FE4F, $FE51, $FE54, $FE56..$FE5E, $FE60, $FE61,
|
---|
549 | $FE64..$FE66, $FE68, $FE6B, $FF01, $FF02, $FF06..$FF0A, $FF1B..$FF20, $FF3B..$FF40, $FF5B..$FF65,
|
---|
550 | $FFE2..$FFE4, $FFE8..$FFEE, $FFF9..$FFFD: result := ubcOtherNeutrals;
|
---|
551 | else result := ubcUnknown;
|
---|
552 | end;
|
---|
553 | else
|
---|
554 | case u of
|
---|
555 | $10000..$10FFF:
|
---|
556 | case u of
|
---|
557 | $10000..$1000B, $1000D..$10026, $10028..$1003A, $1003C, $1003D, $1003F..$1004D, $10050..$1005D,
|
---|
558 | $10080..$100FA, $10100, $10102, $10107..$10133, $10137..$1013F, $1018D, $1018E, $101D0..$101FC,
|
---|
559 | $10280..$1029C, $102A0..$102D0, $10300..$10323, $1032D..$1034A, $10350..$10375, $10380..$1039D,
|
---|
560 | $1039F..$103C3, $103C8..$103D5, $10400..$1049D, $104A0..$104A9, $104B0..$104D3, $104D8..$104FB,
|
---|
561 | $10500..$10527, $10530..$10563, $1056F, $10600..$10736, $10740..$10755, $10760..$10767: result := ubcLeftToRight;
|
---|
562 | $10800..$10805, $10808, $1080A..$10835, $10837, $10838, $1083C, $1083F..$10855, $10857..$1089E,
|
---|
563 | $108A7..$108AF, $108E0..$108F2, $108F4, $108F5, $108FB..$1091B, $10920..$10939, $1093F,
|
---|
564 | $10980..$109B7, $109BC..$109CF, $109D2..$10A00, $10A10..$10A13, $10A15..$10A17, $10A19..$10A35,
|
---|
565 | $10A40..$10A48, $10A50..$10A58, $10A60..$10A9F, $10AC0..$10AE4, $10AEB..$10AF6, $10B00..$10B35,
|
---|
566 | $10B40..$10B55, $10B58..$10B72, $10B78..$10B91, $10B99..$10B9C, $10BA9..$10BAF, $10C00..$10C48,
|
---|
567 | $10C80..$10CB2, $10CC0..$10CF2, $10CFA..$10CFF: result := ubcRightToLeft;
|
---|
568 | $10D00..$10D23: result := ubcArabicLetter;
|
---|
569 | $10F00..$10F27: result := ubcRightToLeft;
|
---|
570 | $10F30..$10F45, $10F51..$10F59: result := ubcArabicLetter;
|
---|
571 | $102E1..$102FB: result := ubcEuropeanNumber;
|
---|
572 | $10D30..$10D39, $10E60..$10E7E: result := ubcArabicNumber;
|
---|
573 | $101FD, $102E0, $10376..$1037A, $10A01..$10A03, $10A05, $10A06, $10A0C..$10A0F, $10A38..$10A3A,
|
---|
574 | $10A3F, $10AE5, $10AE6, $10D24..$10D27, $10F46..$10F50: result := ubcNonSpacingMark;
|
---|
575 | $10101, $10140..$1018C, $10190..$1019B, $101A0, $1091F, $10B39..$10B3F: result := ubcOtherNeutrals;
|
---|
576 | else result := ubcUnknown;
|
---|
577 | end;
|
---|
578 | $11000..$117FF:
|
---|
579 | case u of
|
---|
580 | $11000, $11002..$11037, $11047..$1104D, $11066..$1106F, $11082..$110B2, $110B7, $110B8,
|
---|
581 | $110BB..$110C1, $110CD, $110D0..$110E8, $110F0..$110F9, $11103..$11126, $1112C, $11136..$11146,
|
---|
582 | $11150..$11172, $11174..$11176, $11182..$111B5, $111BF..$111C8, $111CD, $111D0..$111DF,
|
---|
583 | $111E1..$111F4, $11200..$11211, $11213..$1122E, $11232, $11233, $11235, $11238..$1123D,
|
---|
584 | $11280..$11286, $11288, $1128A..$1128D, $1128F..$1129D, $1129F..$112A9, $112B0..$112DE,
|
---|
585 | $112E0..$112E2, $112F0..$112F9, $11302, $11303, $11305..$1130C, $1130F, $11310, $11313..$11328,
|
---|
586 | $1132A..$11330, $11332, $11333, $11335..$11339, $1133D..$1133F, $11341..$11344, $11347,
|
---|
587 | $11348, $1134B..$1134D, $11350, $11357, $1135D..$11363, $11400..$11437, $11440, $11441,
|
---|
588 | $11445, $11447..$11459, $1145B, $1145D, $11480..$114B2, $114B9, $114BB..$114BE, $114C1,
|
---|
589 | $114C4..$114C7, $114D0..$114D9, $11580..$115B1, $115B8..$115BB, $115BE, $115C1..$115DB,
|
---|
590 | $11600..$11632, $1163B, $1163C, $1163E, $11641..$11644, $11650..$11659, $11680..$116AA,
|
---|
591 | $116AC, $116AE, $116AF, $116B6, $116C0..$116C9, $11700..$1171A, $11720, $11721, $11726,
|
---|
592 | $11730..$1173F: result := ubcLeftToRight;
|
---|
593 | $11001, $11038..$11046, $1107F..$11081, $110B3..$110B6, $110B9, $110BA, $11100..$11102,
|
---|
594 | $11127..$1112B, $1112D..$11134, $11173, $11180, $11181, $111B6..$111BE, $111C9..$111CC,
|
---|
595 | $1122F..$11231, $11234, $11236, $11237, $1123E, $112DF, $112E3..$112EA, $11300, $11301,
|
---|
596 | $1133B, $1133C, $11340, $11366..$1136C, $11370..$11374, $11438..$1143F, $11442..$11444,
|
---|
597 | $11446, $1145E, $114B3..$114B8, $114BA, $114BF, $114C0, $114C2, $114C3, $115B2..$115B5,
|
---|
598 | $115BC, $115BD, $115BF, $115C0, $115DC, $115DD, $11633..$1163A, $1163D, $1163F, $11640,
|
---|
599 | $116AB, $116AD, $116B0..$116B5, $116B7, $1171D..$1171F, $11722..$11725, $11727..$1172B: result := ubcNonSpacingMark;
|
---|
600 | $11052..$11065, $11660..$1166C: result := ubcOtherNeutrals;
|
---|
601 | else result := ubcUnknown;
|
---|
602 | end;
|
---|
603 | $11800..$17FFF:
|
---|
604 | case u of
|
---|
605 | $11800..$1182E, $11838, $1183B, $118A0..$118F2, $118FF, $11A00, $11A07, $11A08, $11A0B..$11A32,
|
---|
606 | $11A39, $11A3A, $11A3F..$11A46, $11A50, $11A57, $11A58, $11A5C..$11A83, $11A86..$11A89,
|
---|
607 | $11A97, $11A9A..$11AA2, $11AC0..$11AF8, $11C00..$11C08, $11C0A..$11C2F, $11C3E..$11C45,
|
---|
608 | $11C50..$11C6C, $11C70..$11C8F, $11CA9, $11CB1, $11CB4, $11D00..$11D06, $11D08, $11D09,
|
---|
609 | $11D0B..$11D30, $11D46, $11D50..$11D59, $11D60..$11D65, $11D67, $11D68, $11D6A..$11D8E,
|
---|
610 | $11D93, $11D94, $11D96, $11D98, $11DA0..$11DA9, $11EE0..$11EF2, $11EF5..$11EF8, $12000..$12399,
|
---|
611 | $12400..$1246E, $12470..$12474, $12480..$12543, $13000..$1342E, $14400..$14646, $16800..$16A38,
|
---|
612 | $16A40..$16A5E, $16A60..$16A69, $16A6E, $16A6F, $16AD0..$16AED, $16AF5, $16B00..$16B2F,
|
---|
613 | $16B37..$16B45, $16B50..$16B59, $16B5B..$16B61, $16B63..$16B77, $16B7D..$16B8F, $16E40..$16E9A,
|
---|
614 | $16F00..$16F44, $16F50..$16F7E, $16F93..$16F9F, $16FE0, $16FE1, $17000: result := ubcLeftToRight;
|
---|
615 | $1182F..$11837, $11839, $1183A, $11A01..$11A06, $11A09, $11A0A, $11A33..$11A38, $11A3B..$11A3E,
|
---|
616 | $11A47, $11A51..$11A56, $11A59..$11A5B, $11A8A..$11A96, $11A98, $11A99, $11C30..$11C36,
|
---|
617 | $11C38..$11C3D, $11C92..$11CA7, $11CAA..$11CB0, $11CB2, $11CB3, $11CB5, $11CB6, $11D31..$11D36,
|
---|
618 | $11D3A, $11D3C, $11D3D, $11D3F..$11D45, $11D47, $11D90, $11D91, $11D95, $11D97, $11EF3,
|
---|
619 | $11EF4, $16AF0..$16AF4, $16B30..$16B36, $16F8F..$16F92: result := ubcNonSpacingMark;
|
---|
620 | else result := ubcUnknown;
|
---|
621 | end;
|
---|
622 | $18000..$FFFFF:
|
---|
623 | case u of
|
---|
624 | $1BCA0..$1BCA3, $1D173..$1D17A, $E0001, $E0020..$E007F: result := ubcBoundaryNeutral;
|
---|
625 | $187F1, $18800..$18AF2, $1B000..$1B11E, $1B170..$1B2FB, $1BC00..$1BC6A, $1BC70..$1BC7C,
|
---|
626 | $1BC80..$1BC88, $1BC90..$1BC99, $1BC9C, $1BC9F, $1D000..$1D0F5, $1D100..$1D126, $1D129..$1D166,
|
---|
627 | $1D16A..$1D172, $1D183, $1D184, $1D18C..$1D1A9, $1D1AE..$1D1E8, $1D2E0..$1D2F3, $1D360..$1D378,
|
---|
628 | $1D400..$1D454, $1D456..$1D49C, $1D49E, $1D49F, $1D4A2, $1D4A5, $1D4A6, $1D4A9..$1D4AC,
|
---|
629 | $1D4AE..$1D4B9, $1D4BB, $1D4BD..$1D4C3, $1D4C5..$1D505, $1D507..$1D50A, $1D50D..$1D514,
|
---|
630 | $1D516..$1D51C, $1D51E..$1D539, $1D53B..$1D53E, $1D540..$1D544, $1D546, $1D54A..$1D550,
|
---|
631 | $1D552..$1D6A5, $1D6A8..$1D6DA, $1D6DC..$1D714, $1D716..$1D74E, $1D750..$1D788, $1D78A..$1D7C2,
|
---|
632 | $1D7C4..$1D7CB, $1D800..$1D9FF, $1DA37..$1DA3A, $1DA6D..$1DA74, $1DA76..$1DA83, $1DA85..$1DA8B: result := ubcLeftToRight;
|
---|
633 | $1E800..$1E8C4, $1E8C7..$1E8CF, $1E900..$1E943, $1E950..$1E959, $1E95E, $1E95F: result := ubcRightToLeft;
|
---|
634 | $1EC71..$1ECB4, $1EE00..$1EE03, $1EE05..$1EE1F, $1EE21, $1EE22, $1EE24, $1EE27, $1EE29..$1EE32,
|
---|
635 | $1EE34..$1EE37, $1EE39, $1EE3B, $1EE42, $1EE47, $1EE49, $1EE4B, $1EE4D..$1EE4F, $1EE51,
|
---|
636 | $1EE52, $1EE54, $1EE57, $1EE59, $1EE5B, $1EE5D, $1EE5F, $1EE61, $1EE62, $1EE64, $1EE67..$1EE6A,
|
---|
637 | $1EE6C..$1EE72, $1EE74..$1EE77, $1EE79..$1EE7C, $1EE7E, $1EE80..$1EE89, $1EE8B..$1EE9B,
|
---|
638 | $1EEA1..$1EEA3, $1EEA5..$1EEA9, $1EEAB..$1EEBB: result := ubcArabicLetter;
|
---|
639 | $1F110..$1F12E, $1F130..$1F169, $1F170..$1F1AC, $1F1E6..$1F202, $1F210..$1F23B, $1F240..$1F248,
|
---|
640 | $1F250, $1F251, $20000, $2A6D6, $2A700, $2B734, $2B740, $2B81D, $2B820, $2CEA1, $2CEB0,
|
---|
641 | $2EBE0, $2F800..$2FA1D, $F0000, $FFFFD: result := ubcLeftToRight;
|
---|
642 | $1D7CE..$1D7FF, $1F100..$1F10A: result := ubcEuropeanNumber;
|
---|
643 | $1BC9D, $1BC9E, $1D167..$1D169, $1D17B..$1D182, $1D185..$1D18B, $1D1AA..$1D1AD, $1D242..$1D244,
|
---|
644 | $1DA00..$1DA36, $1DA3B..$1DA6C, $1DA75, $1DA84, $1DA9B..$1DA9F, $1DAA1..$1DAAF, $1E000..$1E006,
|
---|
645 | $1E008..$1E018, $1E01B..$1E021, $1E023, $1E024, $1E026..$1E02A, $1E8D0..$1E8D6, $1E944..$1E94A,
|
---|
646 | $E0100..$E01EF: result := ubcNonSpacingMark;
|
---|
647 | $1D200..$1D241, $1D245, $1D300..$1D356, $1D6DB, $1D715, $1D74F, $1D789, $1D7C3, $1EEF0,
|
---|
648 | $1EEF1, $1F000..$1F02B, $1F030..$1F093, $1F0A0..$1F0AE, $1F0B1..$1F0BF, $1F0C1..$1F0CF,
|
---|
649 | $1F0D1..$1F0F5, $1F10B, $1F10C, $1F12F, $1F16A, $1F16B, $1F260..$1F265, $1F300..$1F6D4,
|
---|
650 | $1F6E0..$1F6EC, $1F6F0..$1F6F9, $1F700..$1F773, $1F780..$1F7D8, $1F800..$1F80B, $1F810..$1F847,
|
---|
651 | $1F850..$1F859, $1F860..$1F887, $1F890..$1F8AD, $1F900..$1F90B, $1F910..$1F93E, $1F940..$1F970,
|
---|
652 | $1F973..$1F976, $1F97A, $1F97C..$1F9A2, $1F9B0..$1F9B9, $1F9C0..$1F9C2, $1F9D0..$1F9FF,
|
---|
653 | $1FA60..$1FA6D: result := ubcOtherNeutrals;
|
---|
654 | else result := ubcUnknown;
|
---|
655 | end;
|
---|
656 | else result := ubcUnknown;
|
---|
657 | end
|
---|
658 | end
|
---|
659 | end;
|
---|
660 |
|
---|
661 | {$PUSH}{$WARNINGS OFF}
|
---|
662 | function GetUnicodeBracketInfo(u: cardinal): TUnicodeBracketInfo;
|
---|
663 | procedure Bracket(AOpening,AClosing: cardinal);
|
---|
664 | begin
|
---|
665 | result.IsBracket := true;
|
---|
666 | result.OpeningBracket := AOpening;
|
---|
667 | result.ClosingBracket := AClosing;
|
---|
668 | end;
|
---|
669 | begin
|
---|
670 | case u of
|
---|
671 | $0028, $0029: Bracket($0028, $0029);
|
---|
672 | $005B, $005D: Bracket($005B, $005D);
|
---|
673 | $007B, $007D: Bracket($007B, $007D);
|
---|
674 | $0F3A, $0F3B: Bracket($0F3A, $0F3B);
|
---|
675 | $0F3C, $0F3D: Bracket($0F3C, $0F3D);
|
---|
676 | $169B, $169C: Bracket($169B, $169C);
|
---|
677 | $2045, $2046: Bracket($2045, $2046);
|
---|
678 | $207D, $207E: Bracket($207D, $207E);
|
---|
679 | $208D, $208E: Bracket($208D, $208E);
|
---|
680 | $2308, $2309: Bracket($2308, $2309);
|
---|
681 | $230A, $230B: Bracket($230A, $230B);
|
---|
682 | $2329, $232A: Bracket($2329, $232A);
|
---|
683 | $2768, $2769: Bracket($2768, $2769);
|
---|
684 | $276A, $276B: Bracket($276A, $276B);
|
---|
685 | $276C, $276D: Bracket($276C, $276D);
|
---|
686 | $276E, $276F: Bracket($276E, $276F);
|
---|
687 | $2770, $2771: Bracket($2770, $2771);
|
---|
688 | $2772, $2773: Bracket($2772, $2773);
|
---|
689 | $2774, $2775: Bracket($2774, $2775);
|
---|
690 | $27C5, $27C6: Bracket($27C5, $27C6);
|
---|
691 | $27E6, $27E7: Bracket($27E6, $27E7);
|
---|
692 | $27E8, $27E9: Bracket($27E8, $27E9);
|
---|
693 | $27EA, $27EB: Bracket($27EA, $27EB);
|
---|
694 | $27EC, $27ED: Bracket($27EC, $27ED);
|
---|
695 | $27EE, $27EF: Bracket($27EE, $27EF);
|
---|
696 | $2983, $2984: Bracket($2983, $2984);
|
---|
697 | $2985, $2986: Bracket($2985, $2986);
|
---|
698 | $2987, $2988: Bracket($2987, $2988);
|
---|
699 | $2989, $298A: Bracket($2989, $298A);
|
---|
700 | $298B, $298C: Bracket($298B, $298C);
|
---|
701 | $298D, $2990: Bracket($298D, $2990);
|
---|
702 | $298F, $298E: Bracket($298F, $298E);
|
---|
703 | $2991, $2992: Bracket($2991, $2992);
|
---|
704 | $2993, $2994: Bracket($2993, $2994);
|
---|
705 | $2995, $2996: Bracket($2995, $2996);
|
---|
706 | $2997, $2998: Bracket($2997, $2998);
|
---|
707 | $29D8, $29D9: Bracket($29D8, $29D9);
|
---|
708 | $29DA, $29DB: Bracket($29DA, $29DB);
|
---|
709 | $29FC, $29FD: Bracket($29FC, $29FD);
|
---|
710 | $2E22, $2E23: Bracket($2E22, $2E23);
|
---|
711 | $2E24, $2E25: Bracket($2E24, $2E25);
|
---|
712 | $2E26, $2E27: Bracket($2E26, $2E27);
|
---|
713 | $2E28, $2E29: Bracket($2E28, $2E29);
|
---|
714 | $3008, $3009: Bracket($3008, $3009);
|
---|
715 | $300A, $300B: Bracket($300A, $300B);
|
---|
716 | $300C, $300D: Bracket($300C, $300D);
|
---|
717 | $300E, $300F: Bracket($300E, $300F);
|
---|
718 | $3010, $3011: Bracket($3010, $3011);
|
---|
719 | $3014, $3015: Bracket($3014, $3015);
|
---|
720 | $3016, $3017: Bracket($3016, $3017);
|
---|
721 | $3018, $3019: Bracket($3018, $3019);
|
---|
722 | $301A, $301B: Bracket($301A, $301B);
|
---|
723 | $FE59, $FE5A: Bracket($FE59, $FE5A);
|
---|
724 | $FE5B, $FE5C: Bracket($FE5B, $FE5C);
|
---|
725 | $FE5D, $FE5E: Bracket($FE5D, $FE5E);
|
---|
726 | $FF08, $FF09: Bracket($FF08, $FF09);
|
---|
727 | $FF3B, $FF3D: Bracket($FF3B, $FF3D);
|
---|
728 | $FF5B, $FF5D: Bracket($FF5B, $FF5D);
|
---|
729 | $FF5F, $FF60: Bracket($FF5F, $FF60);
|
---|
730 | $FF62, $FF63: Bracket($FF62, $FF63);
|
---|
731 | else
|
---|
732 | begin
|
---|
733 | result.IsBracket := false;
|
---|
734 | result.OpeningBracket := 0;
|
---|
735 | result.ClosingBracket := 0;
|
---|
736 | end;
|
---|
737 | end;
|
---|
738 | end;
|
---|
739 | {$POP}
|
---|
740 |
|
---|
741 | function IsZeroWidthUnicode(u: cardinal): boolean;
|
---|
742 | begin
|
---|
743 | case u of
|
---|
744 | UNICODE_ZERO_WIDTH_SPACE, UNICODE_ZERO_WIDTH_NON_JOINER,
|
---|
745 | UNICODE_ZERO_WIDTH_JOINER, UNICODE_ZERO_WIDTH_NO_BREAK_SPACE,
|
---|
746 | UNICODE_LEFT_TO_RIGHT_MARK,UNICODE_RIGHT_TO_LEFT_MARK,
|
---|
747 | UNICODE_ARABIC_LETTER_MARK: result := true;
|
---|
748 | else result := false;
|
---|
749 | end;
|
---|
750 | end;
|
---|
751 |
|
---|
752 | function IsUnicodeParagraphSeparator(u: cardinal): boolean;
|
---|
753 | begin
|
---|
754 | case u of
|
---|
755 | $0A, $0D, UNICODE_NEXT_LINE, UNICODE_PARAGRAPH_SEPARATOR,
|
---|
756 | UNICODE_INFORMATION_SEPARATOR_FOUR, UNICODE_INFORMATION_SEPARATOR_THREE, UNICODE_INFORMATION_SEPARATOR_TWO: result := true;
|
---|
757 | else result := false;
|
---|
758 | end;
|
---|
759 | end;
|
---|
760 |
|
---|
761 | function IsUnicodeCrLf(u: cardinal): boolean;
|
---|
762 | begin
|
---|
763 | result := (u=10) or (u=13);
|
---|
764 | end;
|
---|
765 |
|
---|
766 | function IsUnicodeIsolateOrFormatting(u: cardinal): boolean;
|
---|
767 | begin
|
---|
768 | case u of
|
---|
769 | UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE,
|
---|
770 | UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
|
---|
771 | UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(true)
|
---|
772 | else exit(false);
|
---|
773 | end;
|
---|
774 | end;
|
---|
775 |
|
---|
776 | { TUnicodeBidiInfo }
|
---|
777 |
|
---|
778 | function TUnicodeBidiInfo.GetEndOfLine: boolean;
|
---|
779 | begin
|
---|
780 | result := (Flags and BIDI_FLAG_END_OF_LINE) <> 0;
|
---|
781 | end;
|
---|
782 |
|
---|
783 | function TUnicodeBidiInfo.GetEndOfParagraph: boolean;
|
---|
784 | begin
|
---|
785 | result := (Flags and BIDI_FLAG_END_OF_PARAGRAPH) <> 0;
|
---|
786 | end;
|
---|
787 |
|
---|
788 | function TUnicodeBidiInfo.GetRemoved: boolean;
|
---|
789 | begin
|
---|
790 | result := (Flags and BIDI_FLAG_REMOVED) <> 0;
|
---|
791 | end;
|
---|
792 |
|
---|
793 | function TUnicodeBidiInfo.GetRightToLeft: boolean;
|
---|
794 | begin
|
---|
795 | result := Odd(BidiLevel);
|
---|
796 | end;
|
---|
797 |
|
---|
798 | function TUnicodeBidiInfo.GetParagraphRightToLeft: boolean;
|
---|
799 | begin
|
---|
800 | result := Odd(ParagraphBidiLevel);
|
---|
801 | end;
|
---|
802 |
|
---|
803 | function AnalyzeBidiUnicode(u: PCardinal; ALength: integer; baseDirection: cardinal): TUnicodeBidiArray;
|
---|
804 | type
|
---|
805 | TUnicodeAnalysisElement = record
|
---|
806 | bidiClass: TUnicodeBidiClass;
|
---|
807 | prevInIsolate, nextInIsolate: integer; //next index in current isolate
|
---|
808 | end;
|
---|
809 | TUnicodeAnalysisArray = array of TUnicodeAnalysisElement;
|
---|
810 |
|
---|
811 | var
|
---|
812 | a: TUnicodeAnalysisArray;
|
---|
813 |
|
---|
814 | procedure ResolveWeakTypes(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
|
---|
815 | var
|
---|
816 | curIndex,backIndex: Integer;
|
---|
817 | latestStrongClass, prevClass: TUnicodeBidiClass;
|
---|
818 | begin
|
---|
819 | //rules W1 and W2
|
---|
820 | prevClass := startOfSequence;
|
---|
821 | latestStrongClass:= prevClass;
|
---|
822 | curIndex := startIndex;
|
---|
823 | while curIndex <> afterEndIndex do
|
---|
824 | begin
|
---|
825 | if not result[curIndex].IsRemoved then
|
---|
826 | begin
|
---|
827 | case a[curIndex].bidiClass of
|
---|
828 | ubcNonSpacingMark: a[curIndex].bidiClass:= prevClass;
|
---|
829 | ubcEuropeanNumber: if latestStrongClass = ubcArabicLetter then a[curIndex].bidiClass:= ubcArabicNumber;
|
---|
830 | end;
|
---|
831 | case u[curIndex] of
|
---|
832 | UNICODE_LEFT_TO_RIGHT_ISOLATE,
|
---|
833 | UNICODE_RIGHT_TO_LEFT_ISOLATE,
|
---|
834 | UNICODE_FIRST_STRONG_ISOLATE,
|
---|
835 | UNICODE_POP_DIRECTIONAL_ISOLATE: prevClass := ubcOtherNeutrals;
|
---|
836 | else prevClass := a[curIndex].bidiClass;
|
---|
837 | end;
|
---|
838 | if prevClass in [ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then latestStrongClass:= prevClass;
|
---|
839 | end;
|
---|
840 | curIndex := a[curIndex].nextInIsolate;
|
---|
841 | end;
|
---|
842 |
|
---|
843 | // rule W4 and W5
|
---|
844 | prevClass := startOfSequence;
|
---|
845 | curIndex := startIndex;
|
---|
846 | while curIndex <> afterEndIndex do
|
---|
847 | begin
|
---|
848 | if not result[curIndex].IsRemoved then
|
---|
849 | begin
|
---|
850 | case a[curIndex].bidiClass of
|
---|
851 | ubcArabicLetter: a[curIndex].bidiClass := ubcRightToLeft;
|
---|
852 | ubcEuropeanNumber:
|
---|
853 | begin
|
---|
854 | backIndex := curIndex;
|
---|
855 | while backIndex > startIndex do
|
---|
856 | begin
|
---|
857 | backIndex -= 1;
|
---|
858 | if result[backIndex].IsRemoved then continue;
|
---|
859 | if a[backIndex].bidiClass = ubcEuropeanNumberTerminator then
|
---|
860 | a[backIndex].bidiClass := ubcEuropeanNumber
|
---|
861 | else break;
|
---|
862 | end;
|
---|
863 | end;
|
---|
864 | ubcEuropeanNumberSeparator:
|
---|
865 | if (prevClass = ubcEuropeanNumber) and (a[curIndex].nextInIsolate <> afterEndIndex) and
|
---|
866 | (a[a[curIndex].nextInIsolate].bidiClass = ubcEuropeanNumber) then
|
---|
867 | a[curIndex].bidiClass:= ubcEuropeanNumber;
|
---|
868 | ubcCommonSeparator:
|
---|
869 | if (prevClass in[ubcEuropeanNumber,ubcArabicNumber]) and (a[curIndex].nextInIsolate <> afterEndIndex) and
|
---|
870 | (a[a[curIndex].nextInIsolate].bidiClass = prevClass) then
|
---|
871 | a[curIndex].bidiClass:= prevClass;
|
---|
872 | ubcEuropeanNumberTerminator:
|
---|
873 | if prevClass = ubcEuropeanNumber then
|
---|
874 | a[curIndex].bidiClass:= ubcEuropeanNumber;
|
---|
875 | end;
|
---|
876 | prevClass := a[curIndex].bidiClass;
|
---|
877 | end;
|
---|
878 |
|
---|
879 | curIndex := a[curIndex].nextInIsolate;
|
---|
880 | end;
|
---|
881 |
|
---|
882 | // rule W6 and W7
|
---|
883 | curIndex := startIndex;
|
---|
884 | latestStrongClass := startOfSequence;
|
---|
885 | while curIndex <> afterEndIndex do
|
---|
886 | begin
|
---|
887 | if not result[curIndex].IsRemoved then
|
---|
888 | begin
|
---|
889 | case a[curIndex].bidiClass of
|
---|
890 | ubcEuropeanNumberSeparator,ubcEuropeanNumberTerminator,ubcCommonSeparator: a[curIndex].bidiClass := ubcOtherNeutrals;
|
---|
891 | ubcLeftToRight,ubcRightToLeft,ubcArabicLetter: latestStrongClass:= a[curIndex].bidiClass;
|
---|
892 | ubcEuropeanNumber: if latestStrongClass = ubcLeftToRight then a[curIndex].bidiClass := ubcLeftToRight;
|
---|
893 | end;
|
---|
894 | end;
|
---|
895 | curIndex := a[curIndex].nextInIsolate;
|
---|
896 | end;
|
---|
897 | end;
|
---|
898 |
|
---|
899 | procedure ResolveNeutrals(startIndex, afterEndIndex: integer; startOfSequence, endOfSequence: TUnicodeBidiClass);
|
---|
900 | var
|
---|
901 | curIndex,prevIndex,previewIndex: Integer;
|
---|
902 | curRTL, include, rightToLeftEmbedding: Boolean;
|
---|
903 | bidiClass: TUnicodeBidiClass;
|
---|
904 | begin
|
---|
905 | rightToLeftEmbedding := odd(result[startIndex].BidiLevel);
|
---|
906 | curIndex := startIndex;
|
---|
907 | curRTL := startOfSequence in [ubcRightToLeft,ubcArabicLetter];
|
---|
908 | while curIndex <> afterEndIndex do
|
---|
909 | begin
|
---|
910 | case a[curIndex].bidiClass of
|
---|
911 | ubcLeftToRight: curRTL := false;
|
---|
912 | ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber: curRTL := true;
|
---|
913 | else
|
---|
914 | if curRTL <> rightToLeftEmbedding then
|
---|
915 | begin
|
---|
916 | //determine whether following neutral chars are included in reverse direction
|
---|
917 | prevIndex := curIndex;
|
---|
918 | previewIndex := a[curIndex].nextInIsolate;
|
---|
919 | include := false;
|
---|
920 | while previewIndex <> afterEndIndex do //uses endOfSequence for overflow
|
---|
921 | begin
|
---|
922 | if previewIndex = afterEndIndex then
|
---|
923 | bidiClass:= endOfSequence
|
---|
924 | else
|
---|
925 | bidiClass:= a[previewIndex].bidiClass;
|
---|
926 | case bidiClass of
|
---|
927 | ubcLeftToRight:
|
---|
928 | begin
|
---|
929 | include := not curRTL;
|
---|
930 | break;
|
---|
931 | end;
|
---|
932 | ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber:
|
---|
933 | begin
|
---|
934 | include := curRTL;
|
---|
935 | break;
|
---|
936 | end;
|
---|
937 | end;
|
---|
938 | prevIndex := previewIndex;
|
---|
939 | previewIndex := a[previewIndex].nextInIsolate;
|
---|
940 | end;
|
---|
941 | if previewIndex = afterEndIndex then previewIndex := prevIndex;
|
---|
942 | if include then
|
---|
943 | begin
|
---|
944 | while curIndex <> previewIndex do
|
---|
945 | begin
|
---|
946 | if a[curIndex].bidiClass = ubcBoundaryNeutral then
|
---|
947 | result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
|
---|
948 |
|
---|
949 | if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
|
---|
950 | begin
|
---|
951 | if curRTL then a[curIndex].bidiClass := ubcRightToLeft
|
---|
952 | else a[curIndex].bidiClass := ubcLeftToRight;
|
---|
953 | end;
|
---|
954 |
|
---|
955 | curIndex := a[curIndex].nextInIsolate;
|
---|
956 | end;
|
---|
957 | end else
|
---|
958 | curRTL := rightToLeftEmbedding;
|
---|
959 | end;
|
---|
960 | end;
|
---|
961 |
|
---|
962 | if a[curIndex].bidiClass = ubcBoundaryNeutral then
|
---|
963 | result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
|
---|
964 |
|
---|
965 | if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
|
---|
966 | begin
|
---|
967 | if curRTL then a[curIndex].bidiClass := ubcRightToLeft
|
---|
968 | else a[curIndex].bidiClass := ubcLeftToRight;
|
---|
969 | end;
|
---|
970 |
|
---|
971 | curIndex := a[curIndex].nextInIsolate;
|
---|
972 | end;
|
---|
973 | end;
|
---|
974 |
|
---|
975 | procedure ResolveBrackets(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
|
---|
976 | type TBracketPair = record
|
---|
977 | openIndex,closeIndex: integer;
|
---|
978 | end;
|
---|
979 | var
|
---|
980 | bracketPairs: array of TBracketPair;
|
---|
981 | bracketPairCount: integer;
|
---|
982 | rightToLeft: boolean;
|
---|
983 |
|
---|
984 | procedure SortBracketPairs;
|
---|
985 | var
|
---|
986 | i,j,k: Integer;
|
---|
987 | temp: TBracketPair;
|
---|
988 | begin
|
---|
989 | for i := 1 to bracketPairCount-1 do
|
---|
990 | begin
|
---|
991 | for j := 0 to i-1 do
|
---|
992 | if bracketPairs[j].openIndex > bracketPairs[i].openIndex then
|
---|
993 | begin
|
---|
994 | temp := bracketPairs[i];
|
---|
995 | for k := i downto j+1 do
|
---|
996 | bracketPairs[k] := bracketPairs[k-1];
|
---|
997 | bracketPairs[j] := temp;
|
---|
998 | end;
|
---|
999 | end;
|
---|
1000 | end;
|
---|
1001 |
|
---|
1002 | procedure FindBrackets; // rule BD16
|
---|
1003 | const MAX_BRACKET_STACK = 63;
|
---|
1004 | var
|
---|
1005 | bracketStack: array[0..MAX_BRACKET_STACK-1] of record
|
---|
1006 | bracketCharInfo: TUnicodeBracketInfo;
|
---|
1007 | index: integer;
|
---|
1008 | end;
|
---|
1009 | bracketStackPos,peekPos: integer;
|
---|
1010 | curIndex: integer;
|
---|
1011 | curBracket: TUnicodeBracketInfo;
|
---|
1012 | begin
|
---|
1013 | bracketPairCount := 0;
|
---|
1014 | bracketStackPos := 0;
|
---|
1015 | bracketStack[0].index := -1; //avoid warning
|
---|
1016 | curIndex := startIndex;
|
---|
1017 | while curIndex <> afterEndIndex do
|
---|
1018 | begin
|
---|
1019 | if not (a[curIndex].bidiClass in [ubcLeftToRight,ubcRightToLeft]) then
|
---|
1020 | begin
|
---|
1021 | curBracket := GetUnicodeBracketInfo(u[curIndex]);
|
---|
1022 | if curBracket.IsBracket then
|
---|
1023 | begin
|
---|
1024 | // found opening bracket
|
---|
1025 | if curBracket.OpeningBracket = u[curIndex] then
|
---|
1026 | begin
|
---|
1027 | if bracketStackPos <= high(bracketStack) then
|
---|
1028 | begin
|
---|
1029 | bracketStack[bracketStackPos].bracketCharInfo := curBracket;
|
---|
1030 | bracketStack[bracketStackPos].index := curIndex;
|
---|
1031 | bracketStackPos += 1;
|
---|
1032 | end else
|
---|
1033 | break;
|
---|
1034 | end else
|
---|
1035 | begin
|
---|
1036 | for peekPos := bracketStackPos-1 downto 0 do
|
---|
1037 | if (bracketStack[peekPos].bracketCharInfo.ClosingBracket = u[curIndex]) or
|
---|
1038 | ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_POINTING_ANGLE_BRACKET)) or
|
---|
1039 | ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_POINTING_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_ANGLE_BRACKET)) then
|
---|
1040 | begin
|
---|
1041 | bracketStackPos := peekPos;
|
---|
1042 | if bracketPairCount >= length(bracketPairs) then
|
---|
1043 | setlength(bracketPairs, bracketPairCount*2 + 8);
|
---|
1044 | bracketPairs[bracketPairCount].openIndex := bracketStack[peekPos].index;
|
---|
1045 | bracketPairs[bracketPairCount].closeIndex := curIndex;
|
---|
1046 | inc(bracketPairCount);
|
---|
1047 | break;
|
---|
1048 | end;
|
---|
1049 | end;
|
---|
1050 | end;
|
---|
1051 | end;
|
---|
1052 | curIndex := a[curIndex].nextInIsolate;
|
---|
1053 | end;
|
---|
1054 | end;
|
---|
1055 |
|
---|
1056 | procedure SetCharClass(index: integer; newClass: TUnicodeBidiClass);
|
---|
1057 | begin
|
---|
1058 | a[index].bidiClass:= newClass;
|
---|
1059 | index := a[index].nextInIsolate;
|
---|
1060 | while (index <> afterEndIndex) and (GetUnicodeBidiClass(u[index]) = ubcNonSpacingMark) do
|
---|
1061 | begin
|
---|
1062 | a[index].bidiClass := newClass;
|
---|
1063 | index := a[index].nextInIsolate;
|
---|
1064 | end;
|
---|
1065 | end;
|
---|
1066 |
|
---|
1067 | procedure ResolveBrackets; // rule N0
|
---|
1068 | var
|
---|
1069 | i, curIndex: Integer;
|
---|
1070 | sameDirection, oppositeDirection, oppositeContext: boolean;
|
---|
1071 | begin
|
---|
1072 | for i := 0 to bracketPairCount-1 do
|
---|
1073 | begin
|
---|
1074 | curIndex := bracketPairs[i].openIndex+1;
|
---|
1075 | sameDirection:= false;
|
---|
1076 | oppositeDirection:= false;
|
---|
1077 | while curIndex <> bracketPairs[i].closeIndex do
|
---|
1078 | begin
|
---|
1079 | Assert((curIndex >= startIndex) and (curIndex < length(a)), 'Expecting valid index');
|
---|
1080 | case a[curIndex].bidiClass of
|
---|
1081 | ubcLeftToRight:
|
---|
1082 | if not rightToLeft then
|
---|
1083 | begin
|
---|
1084 | sameDirection := true;
|
---|
1085 | break;
|
---|
1086 | end else oppositeDirection:= true;
|
---|
1087 | ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
|
---|
1088 | if rightToLeft then
|
---|
1089 | begin
|
---|
1090 | sameDirection := true;
|
---|
1091 | break;
|
---|
1092 | end else oppositeDirection:= true;
|
---|
1093 | end;
|
---|
1094 | curIndex := a[curIndex].nextInIsolate;
|
---|
1095 | end;
|
---|
1096 | if sameDirection then
|
---|
1097 | begin
|
---|
1098 | if rightToLeft then
|
---|
1099 | begin
|
---|
1100 | SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
|
---|
1101 | SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
|
---|
1102 | end else
|
---|
1103 | begin
|
---|
1104 | SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
|
---|
1105 | SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
|
---|
1106 | end;
|
---|
1107 | end else
|
---|
1108 | if oppositeDirection then
|
---|
1109 | begin
|
---|
1110 | curIndex := a[bracketPairs[i].openIndex].prevInIsolate;
|
---|
1111 | oppositeContext := false;
|
---|
1112 | while curIndex >= startIndex do
|
---|
1113 | begin
|
---|
1114 | case a[curIndex].bidiClass of
|
---|
1115 | ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
|
---|
1116 | begin
|
---|
1117 | oppositeContext := not rightToLeft;
|
---|
1118 | break;
|
---|
1119 | end;
|
---|
1120 | ubcLeftToRight:
|
---|
1121 | begin
|
---|
1122 | oppositeContext := rightToLeft;
|
---|
1123 | break;
|
---|
1124 | end;
|
---|
1125 | end;
|
---|
1126 | curIndex := a[curIndex].prevInIsolate;
|
---|
1127 | end;
|
---|
1128 | if rightToLeft xor oppositeContext then
|
---|
1129 | begin
|
---|
1130 | SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
|
---|
1131 | SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
|
---|
1132 | end else
|
---|
1133 | begin
|
---|
1134 | SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
|
---|
1135 | SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
|
---|
1136 | end;
|
---|
1137 | end;
|
---|
1138 | end;
|
---|
1139 | end;
|
---|
1140 |
|
---|
1141 | begin
|
---|
1142 | rightToLeft:= startOfSequence in[ubcRightToLeft,ubcArabicLetter];
|
---|
1143 | FindBrackets;
|
---|
1144 | SortBracketPairs;
|
---|
1145 | ResolveBrackets;
|
---|
1146 | end;
|
---|
1147 |
|
---|
1148 | procedure AnalyzeSequence(startIndex, afterEndIndex: integer; sos, eos: TUnicodeBidiClass);
|
---|
1149 | begin
|
---|
1150 | if afterEndIndex = startIndex then exit;
|
---|
1151 | ResolveWeakTypes(startIndex, afterEndIndex, sos, eos);
|
---|
1152 | ResolveBrackets(startIndex, afterEndIndex, sos, eos);
|
---|
1153 | ResolveNeutrals(startIndex, afterEndIndex, sos, eos);
|
---|
1154 | end;
|
---|
1155 |
|
---|
1156 | procedure SameLevelRuns(startIndex: integer);
|
---|
1157 | var
|
---|
1158 | curBidiLevel: byte;
|
---|
1159 | latestIndex,curIndex, curStartIndex: Integer;
|
---|
1160 | curSos,eos: TUnicodeBidiClass;
|
---|
1161 | begin
|
---|
1162 | curIndex := startIndex;
|
---|
1163 | while (curIndex<>-1) and result[curIndex].IsRemoved do
|
---|
1164 | curIndex := a[curIndex].nextInIsolate;
|
---|
1165 | if curIndex = -1 then exit;
|
---|
1166 |
|
---|
1167 | curStartIndex:= curIndex;
|
---|
1168 | curBidiLevel := result[curIndex].bidiLevel;
|
---|
1169 | if odd(curBidiLevel) then curSos := ubcRightToLeft else curSos := ubcLeftToRight;
|
---|
1170 | latestIndex := -1;
|
---|
1171 | while curIndex <> -1 do
|
---|
1172 | begin
|
---|
1173 | if not result[curIndex].IsRemoved then
|
---|
1174 | begin
|
---|
1175 | if (latestIndex <> -1) and (result[curIndex].bidiLevel <> curBidiLevel) then
|
---|
1176 | begin
|
---|
1177 | if result[curIndex].bidiLevel > curBidiLevel then
|
---|
1178 | begin
|
---|
1179 | if odd(result[curIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
|
---|
1180 | end else
|
---|
1181 | begin
|
---|
1182 | if odd(curBidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
|
---|
1183 | end;
|
---|
1184 |
|
---|
1185 | AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
|
---|
1186 |
|
---|
1187 | curSos := eos;
|
---|
1188 | curBidiLevel:= result[curIndex].bidiLevel;
|
---|
1189 | curStartIndex:= curIndex;
|
---|
1190 | end;
|
---|
1191 | latestIndex := curIndex;
|
---|
1192 | end;
|
---|
1193 |
|
---|
1194 | if (a[curIndex].nextInIsolate = -1) and (latestIndex<>-1) then
|
---|
1195 | begin
|
---|
1196 | if odd(result[latestIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
|
---|
1197 | AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
|
---|
1198 | break;
|
---|
1199 | end;
|
---|
1200 |
|
---|
1201 | curIndex := a[curIndex].nextInIsolate;
|
---|
1202 | end;
|
---|
1203 | end;
|
---|
1204 |
|
---|
1205 | //analyse bidi formatting of an embedding or an override block
|
---|
1206 | procedure AnalyzeFormattingBlocks(startIndex, lastIndex: integer; minBidiLevel: byte; formattingCode: cardinal);
|
---|
1207 | var curIndex, nextIndex, levelIncrease: integer;
|
---|
1208 | subFormatBeforeStart, subFormatStart, formatNesting: integer;
|
---|
1209 | subFormatCode: cardinal;
|
---|
1210 | begin
|
---|
1211 | case formattingCode of
|
---|
1212 | UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
|
---|
1213 | if odd(minBidiLevel) then minBidiLevel += 1;
|
---|
1214 | UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
|
---|
1215 | if not odd(minBidiLevel) then minBidiLevel += 1;
|
---|
1216 | end;
|
---|
1217 | nextIndex := startIndex;
|
---|
1218 | repeat
|
---|
1219 | Assert(nextIndex >= 0, 'Expecting valid index');
|
---|
1220 | curIndex := nextIndex;
|
---|
1221 | nextIndex := a[curIndex].nextInIsolate;
|
---|
1222 | result[curIndex].bidiLevel := minBidiLevel;
|
---|
1223 |
|
---|
1224 | //apply override
|
---|
1225 | if formattingCode = UNICODE_LEFT_TO_RIGHT_OVERRIDE then a[curIndex].bidiClass := ubcLeftToRight
|
---|
1226 | else if formattingCode = UNICODE_RIGHT_TO_LEFT_OVERRIDE then a[curIndex].bidiClass := ubcRightToLeft;
|
---|
1227 |
|
---|
1228 | case u[curIndex] of
|
---|
1229 | UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
|
---|
1230 | UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE:
|
---|
1231 | begin
|
---|
1232 | result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
|
---|
1233 | case u[curIndex] of
|
---|
1234 | UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
|
---|
1235 | if odd(minBidiLevel) then levelIncrease := 1
|
---|
1236 | else levelIncrease := 2;
|
---|
1237 | UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
|
---|
1238 | if odd(minBidiLevel) then levelIncrease := 2
|
---|
1239 | else levelIncrease := 1;
|
---|
1240 | else levelIncrease:= 2;
|
---|
1241 | end;
|
---|
1242 | if minBidiLevel <= UNICODE_MAX_BIDI_DEPTH-levelIncrease-1 then
|
---|
1243 | begin
|
---|
1244 | subFormatCode:= u[curIndex];
|
---|
1245 | subFormatBeforeStart := curIndex;
|
---|
1246 | subFormatStart := nextIndex;
|
---|
1247 | formatNesting:= 1;
|
---|
1248 | while formatNesting > 0 do
|
---|
1249 | begin
|
---|
1250 | //sub-format ends because no more chars
|
---|
1251 | if curIndex = lastIndex then
|
---|
1252 | begin
|
---|
1253 | if curIndex <> subFormatBeforeStart then
|
---|
1254 | AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
|
---|
1255 | break;
|
---|
1256 | end;
|
---|
1257 |
|
---|
1258 | Assert(nextIndex >= 0, 'Expecting valid index');
|
---|
1259 | case u[nextIndex] of
|
---|
1260 | UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
|
---|
1261 | UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: inc(formatNesting);
|
---|
1262 | UNICODE_POP_DIRECTIONAL_FORMATTING:
|
---|
1263 | begin
|
---|
1264 | dec(formatNesting);
|
---|
1265 | if formatNesting = 0 then
|
---|
1266 | begin
|
---|
1267 | //sub-format ends because enough matching pop chars found
|
---|
1268 | if curIndex <> subFormatBeforeStart then
|
---|
1269 | AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
|
---|
1270 |
|
---|
1271 | curIndex := nextIndex;
|
---|
1272 | nextIndex := a[curIndex].nextInIsolate;
|
---|
1273 | result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
|
---|
1274 | break;
|
---|
1275 | end;
|
---|
1276 | end;
|
---|
1277 | end;
|
---|
1278 |
|
---|
1279 | curIndex := nextIndex;
|
---|
1280 | nextIndex := a[curIndex].nextInIsolate;
|
---|
1281 | end;
|
---|
1282 | end;
|
---|
1283 | end;
|
---|
1284 | UNICODE_POP_DIRECTIONAL_FORMATTING: //ignored when no matching formatting code
|
---|
1285 | begin
|
---|
1286 | result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
|
---|
1287 | end;
|
---|
1288 | end;
|
---|
1289 | until curIndex = lastIndex;
|
---|
1290 | end;
|
---|
1291 |
|
---|
1292 | procedure ResolveImplicitLevels(startIndex: integer); // rule I1 and I2
|
---|
1293 | var
|
---|
1294 | curIndex: Integer;
|
---|
1295 | begin
|
---|
1296 | curIndex := startIndex;
|
---|
1297 | while curIndex <> -1 do
|
---|
1298 | begin
|
---|
1299 | case a[curIndex].bidiClass of
|
---|
1300 | ubcRightToLeft,ubcArabicLetter:
|
---|
1301 | if not Odd(result[curIndex].bidiLevel) then result[curIndex].bidiLevel += 1;
|
---|
1302 | ubcEuropeanNumber,ubcArabicNumber:
|
---|
1303 | if Odd(result[curIndex].bidiLevel) then result[curIndex].bidiLevel += 1
|
---|
1304 | else result[curIndex].bidiLevel += 2;
|
---|
1305 | ubcLeftToRight: if Odd(result[curIndex].bidiLevel) then result[curIndex].bidiLevel += 1;
|
---|
1306 | end;
|
---|
1307 | curIndex := a[curIndex].nextInIsolate;
|
---|
1308 | end;
|
---|
1309 | end;
|
---|
1310 |
|
---|
1311 | procedure ResetEndOfParagraphLevels(startIndex: integer); // rule L1
|
---|
1312 | var
|
---|
1313 | prevIndex,curIndex: Integer;
|
---|
1314 |
|
---|
1315 | procedure TweakWhiteSpaceBefore(index: integer);
|
---|
1316 | var
|
---|
1317 | isWhiteSpaceOrIsolate: boolean;
|
---|
1318 | begin
|
---|
1319 | while index <> -1 do
|
---|
1320 | begin
|
---|
1321 | case u[index] of
|
---|
1322 | UNICODE_FIRST_STRONG_ISOLATE, UNICODE_POP_DIRECTIONAL_ISOLATE,
|
---|
1323 | UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE:
|
---|
1324 | isWhiteSpaceOrIsolate:= true;
|
---|
1325 | else
|
---|
1326 | isWhiteSpaceOrIsolate:= GetUnicodeBidiClass(u[index]) = ubcWhiteSpace;
|
---|
1327 | end;
|
---|
1328 | if isWhiteSpaceOrIsolate then
|
---|
1329 | result[index].bidiLevel := result[index].ParagraphBidiLevel
|
---|
1330 | else
|
---|
1331 | break;
|
---|
1332 | index := a[index].prevInIsolate;
|
---|
1333 | end;
|
---|
1334 | end;
|
---|
1335 |
|
---|
1336 | begin
|
---|
1337 | prevIndex := -1;
|
---|
1338 | curIndex := startIndex;
|
---|
1339 | while curIndex <> -1 do
|
---|
1340 | begin
|
---|
1341 | case GetUnicodeBidiClass(u[curIndex]) of
|
---|
1342 | ubcSegmentSeparator, ubcParagraphSeparator:
|
---|
1343 | begin
|
---|
1344 | result[curIndex].bidiLevel := result[curIndex].ParagraphBidiLevel;
|
---|
1345 | TweakWhiteSpaceBefore(prevIndex);
|
---|
1346 | end;
|
---|
1347 | end;
|
---|
1348 | prevIndex := curIndex;
|
---|
1349 | curIndex := a[curIndex].nextInIsolate;
|
---|
1350 | end;
|
---|
1351 | TweakWhiteSpaceBefore(prevIndex);
|
---|
1352 | end;
|
---|
1353 |
|
---|
1354 | function DetermineIsolateDirectionFromFirstStrongClass(startIndex: integer): cardinal;
|
---|
1355 | var
|
---|
1356 | curIndex: Integer;
|
---|
1357 | firstStrongClass: TUnicodeBidiClass;
|
---|
1358 | begin
|
---|
1359 | curIndex := startIndex;
|
---|
1360 | firstStrongClass := ubcUnknown;
|
---|
1361 | while curIndex <> -1 do
|
---|
1362 | begin
|
---|
1363 | Assert(curIndex >= 0, 'Expecting valid index');
|
---|
1364 | if firstStrongClass = ubcUnknown then
|
---|
1365 | begin
|
---|
1366 | if a[curIndex].bidiClass in [ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then
|
---|
1367 | begin
|
---|
1368 | firstStrongClass := a[curIndex].bidiClass;
|
---|
1369 | break;
|
---|
1370 | end;
|
---|
1371 | end;
|
---|
1372 | curIndex := a[curIndex].nextInIsolate;
|
---|
1373 | end;
|
---|
1374 |
|
---|
1375 | if firstStrongClass in[ubcRightToLeft,ubcArabicLetter] then
|
---|
1376 | result := UNICODE_RIGHT_TO_LEFT_ISOLATE
|
---|
1377 | else
|
---|
1378 | result := UNICODE_LEFT_TO_RIGHT_ISOLATE;
|
---|
1379 | end;
|
---|
1380 |
|
---|
1381 | procedure LinkCharsInIsolate(startIndex: integer; charCount: integer; out endIndex : integer);
|
---|
1382 | var
|
---|
1383 | curIndex,isolateStackPos,
|
---|
1384 | prevIndex: Integer;
|
---|
1385 | begin
|
---|
1386 | a[startIndex].prevInIsolate := -1;
|
---|
1387 | prevIndex := -1;
|
---|
1388 | curIndex := startIndex;
|
---|
1389 | isolateStackPos:= 0;
|
---|
1390 | while curIndex < startIndex+charCount do
|
---|
1391 | begin
|
---|
1392 | if u[curIndex] = UNICODE_POP_DIRECTIONAL_ISOLATE then
|
---|
1393 | if isolateStackPos > 0 then dec(isolateStackPos);
|
---|
1394 |
|
---|
1395 | if isolateStackPos = 0 then
|
---|
1396 | begin
|
---|
1397 | if prevIndex<>-1 then a[prevIndex].nextInIsolate := curIndex;
|
---|
1398 | a[curIndex].prevInIsolate := prevIndex;
|
---|
1399 |
|
---|
1400 | prevIndex := curIndex;
|
---|
1401 | end;
|
---|
1402 |
|
---|
1403 | case u[curIndex] of
|
---|
1404 | UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE: inc(isolateStackPos);
|
---|
1405 | end;
|
---|
1406 | inc(curIndex);
|
---|
1407 | end;
|
---|
1408 | a[prevIndex].nextInIsolate := -1;
|
---|
1409 | endIndex := prevIndex;
|
---|
1410 | end;
|
---|
1411 |
|
---|
1412 | //split isolates in order to format them independently
|
---|
1413 | procedure AnalyzeIsolates(startIndex: integer; charCount: integer; isolateDirection: cardinal; minBidiLevel: byte = 0;
|
---|
1414 | isParagraph: boolean = false);
|
---|
1415 | var curIndex, endIndex: integer;
|
---|
1416 | nextIndex: integer;
|
---|
1417 | subBidiLevel, levelIncrease: byte;
|
---|
1418 | subIsolateStart: integer;
|
---|
1419 | subIsolateDirection: cardinal;
|
---|
1420 | begin
|
---|
1421 | if charCount = 0 then exit;
|
---|
1422 | Assert(startIndex>=0, 'Invalid start index');
|
---|
1423 |
|
---|
1424 | LinkCharsInIsolate(startIndex, charCount, endIndex);
|
---|
1425 |
|
---|
1426 | if isolateDirection = UNICODE_FIRST_STRONG_ISOLATE then
|
---|
1427 | isolateDirection := DetermineIsolateDirectionFromFirstStrongClass(startIndex);
|
---|
1428 |
|
---|
1429 | case isolateDirection of
|
---|
1430 | UNICODE_LEFT_TO_RIGHT_ISOLATE: if Odd(minBidiLevel) then minBidiLevel += 1;
|
---|
1431 | UNICODE_RIGHT_TO_LEFT_ISOLATE: if not Odd(minBidiLevel) then minBidiLevel += 1;
|
---|
1432 | else
|
---|
1433 | raise EInvalidOperation.Create('Unknown isolate direction');
|
---|
1434 | end;
|
---|
1435 |
|
---|
1436 | if isParagraph then
|
---|
1437 | begin
|
---|
1438 | curIndex := startIndex;
|
---|
1439 | while curIndex <> -1 do
|
---|
1440 | begin
|
---|
1441 | result[curIndex].ParagraphBidiLevel := minBidiLevel;
|
---|
1442 | curIndex := a[curIndex].nextInIsolate;
|
---|
1443 | end;
|
---|
1444 | end;
|
---|
1445 |
|
---|
1446 | case isolateDirection of
|
---|
1447 | UNICODE_LEFT_TO_RIGHT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_LEFT_TO_RIGHT_EMBEDDING);
|
---|
1448 | UNICODE_RIGHT_TO_LEFT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_RIGHT_TO_LEFT_EMBEDDING);
|
---|
1449 | end;
|
---|
1450 |
|
---|
1451 | SameLevelRuns(startIndex);
|
---|
1452 | ResolveImplicitLevels(startIndex);
|
---|
1453 |
|
---|
1454 | if isParagraph then
|
---|
1455 | ResetEndOfParagraphLevels(startIndex);
|
---|
1456 |
|
---|
1457 | //analyse sub-isolates
|
---|
1458 | curIndex := startIndex;
|
---|
1459 | while curIndex <> -1 do
|
---|
1460 | begin
|
---|
1461 | Assert(curIndex >= 0, 'Expecting valid index');
|
---|
1462 | case u[curIndex] of
|
---|
1463 | UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE:
|
---|
1464 | begin
|
---|
1465 | subBidiLevel := result[curIndex].bidiLevel;
|
---|
1466 | nextIndex := a[curIndex].nextInIsolate;
|
---|
1467 | if nextIndex <> -1 then
|
---|
1468 | begin
|
---|
1469 | if result[nextIndex].bidiLevel > subBidiLevel then
|
---|
1470 | subBidiLevel:= result[nextIndex].bidiLevel;
|
---|
1471 | end;
|
---|
1472 | if ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
|
---|
1473 | (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) or
|
---|
1474 | ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
|
---|
1475 | (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) then
|
---|
1476 | levelIncrease := 1
|
---|
1477 | else
|
---|
1478 | levelIncrease:= 2;
|
---|
1479 | if subBidiLevel+levelIncrease <= UNICODE_MAX_BIDI_DEPTH-1 then
|
---|
1480 | begin
|
---|
1481 | subIsolateDirection := u[curIndex];
|
---|
1482 | subIsolateStart:= curIndex+1;
|
---|
1483 | curIndex := nextIndex;
|
---|
1484 |
|
---|
1485 | //sub-isolates ends because no more chars
|
---|
1486 | if curIndex = -1 then
|
---|
1487 | begin
|
---|
1488 | AnalyzeIsolates(subIsolateStart, startIndex+charCount-subIsolateStart, subIsolateDirection, subBidiLevel+1);
|
---|
1489 | break;
|
---|
1490 | end else
|
---|
1491 | begin
|
---|
1492 | AnalyzeIsolates(subIsolateStart, curIndex-subIsolateStart, subIsolateDirection, subBidiLevel+1);
|
---|
1493 | continue;
|
---|
1494 | end;
|
---|
1495 | end;
|
---|
1496 | end;
|
---|
1497 | end;
|
---|
1498 | curIndex := a[curIndex].nextInIsolate;
|
---|
1499 | end;
|
---|
1500 | end;
|
---|
1501 |
|
---|
1502 | //split UTF8 string into paragraphs
|
---|
1503 | procedure SplitParagraphs;
|
---|
1504 | var
|
---|
1505 | lineStartIndex, curIndex: integer;
|
---|
1506 | begin
|
---|
1507 | curIndex := 0;
|
---|
1508 | lineStartIndex := curIndex;
|
---|
1509 | while curIndex < ALength do
|
---|
1510 | begin
|
---|
1511 | if a[curIndex].bidiClass = ubcParagraphSeparator then
|
---|
1512 | begin
|
---|
1513 | //skip second CRLF char
|
---|
1514 | if ((u[curIndex] = 13) or (u[curIndex] = 10)) and (curIndex+1 < ALength) and
|
---|
1515 | ((u[curIndex+1] = 13) or (u[curIndex+1] = 10)) and (u[curIndex+1] <> u[curIndex]) then
|
---|
1516 | inc(curIndex);
|
---|
1517 |
|
---|
1518 | result[curIndex].Flags := result[curIndex].Flags or BIDI_FLAG_END_OF_PARAGRAPH;
|
---|
1519 |
|
---|
1520 | AnalyzeIsolates(lineStartIndex, curIndex+1-lineStartIndex, baseDirection, 0, true);
|
---|
1521 | lineStartIndex := curIndex+1;
|
---|
1522 | end;
|
---|
1523 | inc(curIndex);
|
---|
1524 | end;
|
---|
1525 | if curIndex > lineStartIndex then
|
---|
1526 | begin
|
---|
1527 | result[curIndex-1].Flags := result[curIndex-1].Flags or BIDI_FLAG_END_OF_PARAGRAPH;
|
---|
1528 | AnalyzeIsolates(lineStartIndex, curIndex-lineStartIndex, baseDirection, 0, true);
|
---|
1529 | end;
|
---|
1530 | end;
|
---|
1531 |
|
---|
1532 | var i: integer;
|
---|
1533 | begin
|
---|
1534 | setlength(a, ALength);
|
---|
1535 | setlength(result, ALength);
|
---|
1536 | if ALength > 0 then
|
---|
1537 | begin
|
---|
1538 | for i := 0 to high(a) do
|
---|
1539 | begin
|
---|
1540 | a[i].bidiClass := GetUnicodeBidiClass(u[i]);
|
---|
1541 | if u[i] = UNICODE_LINE_SEPARATOR then //line separator within paragraph
|
---|
1542 | result[i].Flags := result[i].Flags or BIDI_FLAG_END_OF_LINE
|
---|
1543 | end;
|
---|
1544 | SplitParagraphs;
|
---|
1545 | end;
|
---|
1546 | end;
|
---|
1547 |
|
---|
1548 | function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder;
|
---|
1549 | begin
|
---|
1550 | if length(AInfo)=0 then
|
---|
1551 | result := nil
|
---|
1552 | else
|
---|
1553 | result := GetUnicodeDisplayOrder(@AInfo[0], sizeof(TUnicodeBidiInfo), length(AInfo));
|
---|
1554 | end;
|
---|
1555 |
|
---|
1556 | function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder;
|
---|
1557 |
|
---|
1558 | procedure DetermineDisplayOrderRec(AOffset: integer; AStartIndex, ABlockCount: integer; AEmbeddingLevel: byte);
|
---|
1559 | var minLevel: byte;
|
---|
1560 | blockIndex,subStartIndex,subCount, subOffset: integer;
|
---|
1561 | begin
|
---|
1562 | //writeln('DetermineDisplayOrderRec('+inttostr(AOffset)+'/'+inttostr(ACount)+',' + inttostr(AStartIndex) +',*' +inttostr(ABlockCount)+','+inttostr(AEmbeddingLevel)+')');
|
---|
1563 | blockIndex := 0;
|
---|
1564 | subStartIndex := 0; //avoid warning
|
---|
1565 | while blockIndex < ABlockCount do
|
---|
1566 | begin
|
---|
1567 | Assert(AOffset < ACount, 'Offset out of bounds');
|
---|
1568 | if ALevels[AOffset] = AEmbeddingLevel then
|
---|
1569 | begin
|
---|
1570 | if odd(AEmbeddingLevel) then
|
---|
1571 | result[AStartIndex+ABlockCount-1-blockIndex] := AOffset
|
---|
1572 | else
|
---|
1573 | result[AStartIndex+blockIndex] := AOffset;
|
---|
1574 | inc(AOffset);
|
---|
1575 | inc(blockIndex);
|
---|
1576 | end else
|
---|
1577 | begin
|
---|
1578 | if not odd(AEmbeddingLevel) then
|
---|
1579 | subStartIndex := AStartIndex+blockIndex;
|
---|
1580 | subOffset := AOffset;
|
---|
1581 | minLevel := ALevels[AOffset];
|
---|
1582 | inc(AOffset);
|
---|
1583 | inc(blockIndex);
|
---|
1584 | subCount := 1;
|
---|
1585 | while true do
|
---|
1586 | begin
|
---|
1587 | if (blockIndex < ABlockCount) and (ALevels[AOffset] > AEmbeddingLevel) then
|
---|
1588 | begin
|
---|
1589 | Assert(AOffset < ACount, 'Offset out of bounds');
|
---|
1590 | if ALevels[AOffset] < minLevel then
|
---|
1591 | minLevel:= ALevels[AOffset];
|
---|
1592 | inc(AOffset);
|
---|
1593 | inc(blockIndex);
|
---|
1594 | inc(subCount);
|
---|
1595 | end else
|
---|
1596 | begin
|
---|
1597 | if odd(AEmbeddingLevel) then
|
---|
1598 | subStartIndex := AStartIndex+ABlockCount-1-(blockIndex-1);
|
---|
1599 | DetermineDisplayOrderRec(subOffset, subStartIndex, subCount, minLevel);
|
---|
1600 | break;
|
---|
1601 | end;
|
---|
1602 | end;
|
---|
1603 | end;
|
---|
1604 | end;
|
---|
1605 | end;
|
---|
1606 |
|
---|
1607 | begin
|
---|
1608 | setlength(result, ACount);
|
---|
1609 | DetermineDisplayOrderRec(0, 0, ACount, 0);
|
---|
1610 | end;
|
---|
1611 |
|
---|
1612 | function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder;
|
---|
1613 | var
|
---|
1614 | levels: packed array of byte;
|
---|
1615 | originalIndices: array of integer;
|
---|
1616 | index,len, i: integer;
|
---|
1617 | p: PByte;
|
---|
1618 | begin
|
---|
1619 | len := 0;
|
---|
1620 | p := PByte(ABidiInfo);
|
---|
1621 | for i := 0 to ACount-1 do
|
---|
1622 | begin
|
---|
1623 | if not PUnicodeBidiInfo(p)^.IsRemoved then inc(len);
|
---|
1624 | inc(p, AStride);
|
---|
1625 | end;
|
---|
1626 | if len = 0 then
|
---|
1627 | result := nil
|
---|
1628 | else
|
---|
1629 | begin
|
---|
1630 | setlength(levels, len);
|
---|
1631 | setlength(originalIndices, len);
|
---|
1632 | p := PByte(ABidiInfo);
|
---|
1633 | index := 0;
|
---|
1634 | for i := 0 to ACount-1 do
|
---|
1635 | begin
|
---|
1636 | if not PUnicodeBidiInfo(p)^.IsRemoved then
|
---|
1637 | begin
|
---|
1638 | levels[index] := PUnicodeBidiInfo(p)^.BidiLevel;
|
---|
1639 | originalIndices[index] := i;
|
---|
1640 | inc(index);
|
---|
1641 | end;
|
---|
1642 | inc(p, AStride);
|
---|
1643 | end;
|
---|
1644 | result := GetUnicodeDisplayOrder(@levels[0], len);
|
---|
1645 | for i := 0 to len-1 do
|
---|
1646 | result[i] := originalIndices[result[i]];
|
---|
1647 | end;
|
---|
1648 | end;
|
---|
1649 |
|
---|
1650 | end.
|
---|
1651 |
|
---|