source: trunk/Packages/bgrabitmap/bgraunicode.pas

Last change on this file was 2, checked in by chronos, 5 years ago
File size: 67.5 KB
Line 
1unit BGRAUnicode;
2{ Implementation of Unicode bidi algorithm }
3{ Author: circular }
4{ License: modified LGPL }
5
6{$mode objfpc}{$H+}
7{$modeswitch advancedrecords}
8
9interface
10
11uses
12 Classes, SysUtils;
13
14type
15 TUnicodeBidiClass = (ubcBoundaryNeutral, ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals,
16 ubcCommonSeparator, ubcNonSpacingMark,
17 ubcLeftToRight, ubcEuropeanNumber, ubcEuropeanNumberSeparator, ubcEuropeanNumberTerminator,
18 ubcRightToLeft, ubcArabicLetter, ubcArabicNumber, ubcUnknown);
19
20const
21 ubcNeutral = [ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals];
22
23 BIDI_FLAG_REMOVED = 1; //RLE, LRE, RLO, LRO, PDF and BN are supposed to be removed
24 BIDI_FLAG_END_OF_PARAGRAPH = 2; //end of paragraph (paragraph spacing below)
25 BIDI_FLAG_END_OF_LINE = 4; //line break <br>
26
27type
28 PUnicodeBidiInfo = ^TUnicodeBidiInfo;
29
30 { TUnicodeBidiInfo }
31
32 TUnicodeBidiInfo = packed record
33 private
34 function GetEndOfLine: boolean;
35 function GetEndOfParagraph: boolean;
36 function GetRemoved: boolean;
37 function GetRightToLeft: boolean;
38 function GetParagraphRightToLeft: boolean;
39 public
40 ParagraphBidiLevel, BidiLevel: byte;
41 Flags, Dummy: Byte;
42 property IsRemoved: boolean read GetRemoved;
43 property IsRightToLeft: boolean read GetRightToLeft;
44 property IsParagraphRightToLeft: boolean read GetParagraphRightToLeft;
45 property IsEndOfLine: boolean read GetEndOfLine;
46 property IsEndOfParagraph: boolean read GetEndOfParagraph;
47 end;
48
49 TUnicodeBidiArray = packed array of TUnicodeBidiInfo;
50 TUnicodeDisplayOrder = array of integer;
51
52const
53 //maximum nesting level of isolates and bidi-formatting blocks (char bidi level can actually be higher due to char properties)
54 UNICODE_MAX_BIDI_DEPTH = 125;
55
56 UNICODE_LINE_SEPARATOR = $2028; //equivalent of <br>
57 UNICODE_PARAGRAPH_SEPARATOR = $2029; //equivalent of </p>
58 UNICODE_NEXT_LINE = $0085; //equivalent of CRLF
59
60 //characters that split lines into top-level bidi blocks
61 UNICODE_LEFT_TO_RIGHT_ISOLATE = $2066;
62 UNICODE_RIGHT_TO_LEFT_ISOLATE = $2067;
63 UNICODE_FIRST_STRONG_ISOLATE = $2068;
64 UNICODE_POP_DIRECTIONAL_ISOLATE = $2069;
65
66 //characters that split into bidi sub-blocks (called "formatting")
67 UNICODE_LEFT_TO_RIGHT_EMBEDDING = $202A;
68 UNICODE_RIGHT_TO_LEFT_EMBEDDING = $202B;
69 UNICODE_LEFT_TO_RIGHT_OVERRIDE = $202D;
70 UNICODE_RIGHT_TO_LEFT_OVERRIDE = $202E;
71 UNICODE_POP_DIRECTIONAL_FORMATTING = $202C;
72
73 //characters that mark direction without splitting the bidi block
74 UNICODE_LEFT_TO_RIGHT_MARK = $200E;
75 UNICODE_RIGHT_TO_LEFT_MARK = $200F;
76 UNICODE_ARABIC_LETTER_MARK = $061C;
77
78 //data separators
79 UNICODE_INFORMATION_SEPARATOR_FOUR = $001C; //end-of-file
80 UNICODE_INFORMATION_SEPARATOR_THREE = $001D; //section separator
81 UNICODE_INFORMATION_SEPARATOR_TWO = $001E; //record separator, kind of equivalent to paragraph separator
82 UNICODE_INFORMATION_SEPARATOR_ONE = $001F; //field separator, kind of equivalent to Tab
83
84 //zero-width
85 UNICODE_ZERO_WIDTH_SPACE = $200B;
86 UNICODE_ZERO_WIDTH_NON_JOINER = $200C;
87 UNICODE_ZERO_WIDTH_NO_BREAK_SPACE = $FEFF; //byte order mark
88 UNICODE_ZERO_WIDTH_JOINER = $200D;
89
90 //arabic letters
91 UNICODE_ARABIC_TATWEEL = $0640; //horizontal line that makes a ligature with most letters
92
93 //ideographic punctuation
94 UNICODE_IDEOGRAPHIC_COMMA = $3001;
95 UNICODE_IDEOGRAPHIC_FULL_STOP = $3002;
96 UNICODE_FULLWIDTH_COMMA = $FF0C;
97 UNICODE_HORIZONTAL_ELLIPSIS = $2026;
98
99 //bracket equivalence
100 UNICODE_RIGHT_POINTING_ANGLE_BRACKET = $232A;
101 UNICODE_RIGHT_ANGLE_BRACKET = $3009;
102
103type //bracket matching
104 TUnicodeBracketInfo = record
105 IsBracket: boolean;
106 OpeningBracket,ClosingBracket: cardinal;
107 end;
108
109function GetUnicodeBidiClass(u: cardinal): TUnicodeBidiClass;
110function GetUnicodeBracketInfo(u: cardinal): TUnicodeBracketInfo;
111function IsZeroWidthUnicode(u: cardinal): boolean;
112function IsUnicodeParagraphSeparator(u: cardinal): boolean;
113function IsUnicodeCrLf(u: cardinal): boolean;
114function IsUnicodeIsolateOrFormatting(u: cardinal): boolean;
115
116
117{ Analyze unicode and return bidi levels for each character.
118 baseDirection can be either UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE or UNICODE_FIRST_STRONG_ISOLATE }
119function AnalyzeBidiUnicode(u: PCardinal; ALength: integer; baseDirection: cardinal): TUnicodeBidiArray;
120
121{ Determine diplay order, provided the display surface is horizontally infinite }
122function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder; overload;
123function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder; overload;
124function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder; overload;
125procedure GenerateUnicodeFunctions; //to regenerate the code of GetUnicodeBidiClass and GetUnicodeBracketInfo
126
127implementation
128
129procedure GenerateUnicodeFunctions;
130const Indent = ' ';
131var
132 tIn,tOut: TextFile;
133
134 procedure IncludeClasses(AClasses: TStrings; AMinCode, AMaxCode: integer);
135 var
136 line,curBidi,newBidi: string;
137 codes: array of integer;
138 codeCount: integer;
139 cells: TStringList;
140 curCode: LongInt;
141
142 procedure FlushCase;
143 var i: integer;
144 buf: string;
145 bufLines: TStringList;
146 begin
147 if codeCount = 0 then exit;
148
149 bufLines := TStringList.Create;
150 i := 0;
151 buf := Indent+' ';
152 while i < codeCount do
153 begin
154 if i > 0 then buf += ', ';
155
156 if length(buf) > 95 then
157 begin
158 bufLines.Add(buf);
159 buf := Indent+' ';
160 end;
161
162 if (i+2 < codeCount) and (codes[i]+1 = codes[i+1]) and (codes[i+1]+1 = codes[i+2]) then
163 begin
164 buf += '$'+IntToHex(codes[i],2) + '..';
165 while (i+1 < codeCount) and (codes[i]+1 = codes[i+1]) do inc(i);
166 buf += '$'+IntToHex(codes[i],2);
167 end else
168 buf += '$'+IntToHex(codes[i],2);
169
170 inc(i);
171 end;
172
173 if trim(buf) <> '' then bufLines.Add(buf);
174
175 buf := '';
176 for i := 0 to bufLines.Count-1 do
177 begin
178 if i > 0 then buf += LineEnding;
179 buf += bufLines[i];
180 end;
181
182 bufLines.Free;
183
184 case curBidi of
185 'CS': WriteLn(tOut,buf+': result := ubcCommonSeparator;');
186 'L': WriteLn(tOut,buf+': result := ubcLeftToRight;');
187 'EN': WriteLn(tOut,buf+': result := ubcEuropeanNumber;');
188 'ES': WriteLn(tOut,buf+': result := ubcEuropeanNumberSeparator;');
189 'ET': WriteLn(tOut,buf+': result := ubcEuropeanNumberTerminator;');
190 'R': WriteLn(tOut,buf+': result := ubcRightToLeft;');
191 'AL': WriteLn(tOut,buf+': result := ubcArabicLetter;');
192 'AN': WriteLn(tOut,buf+': result := ubcArabicNumber;');
193 'NSM': WriteLn(tOut,buf+': result := ubcNonSpacingMark;');
194 'BN': WriteLn(tOut,buf+': result := ubcBoundaryNeutral;');
195 'B': WriteLn(tOut,buf+': result := ubcParagraphSeparator;');
196 'S': WriteLn(tOut,buf+': result := ubcSegmentSeparator;');
197 'WS': WriteLn(tOut,buf+': result := ubcWhiteSpace;');
198 'ON': WriteLn(tOut,buf+': result := ubcOtherNeutrals;');
199 end;
200 codeCount:= 0;
201 end;
202
203 begin
204 AssignFile(tIn, 'UnicodeData.txt');
205 Reset(tIn);
206
207 cells := TStringList.Create;
208 codeCount := 0;
209 curBidi := '?';
210 codes := nil;
211 while not eof(tIn) do
212 begin
213 ReadLn(tIn,line);
214 cells.Delimiter := ';';
215 cells.QuoteChar := '"';
216 cells.StrictDelimiter := true;
217 cells.DelimitedText := line;
218 if cells.Count >= 5 then
219 begin
220 newBidi := cells[4];
221 if AClasses.IndexOf(newBidi)<>-1 then
222 begin
223 if newBidi <> curBidi then
224 begin
225 FlushCase;
226 curBidi := newBidi;
227 end;
228 curCode := StrToInt('$'+cells[0]);
229 if (curCode >= AMinCode) and (curCode <= AMaxCode) then
230 begin
231 if codeCount >= length(codes) then
232 setlength(codes, codeCount*2 + 8);
233 codes[codeCount] := curCode;
234 inc(codeCount);
235 end;
236 end;
237 end;
238 end;
239 FlushCase;
240 cells.Free;
241
242 CloseFile(tIn);
243 end;
244
245 procedure ParseUnicodeBidiClasses;
246 var c: TStringList;
247
248 procedure Include(AMinCode,AMaxCode: integer);
249 begin
250 Writeln(tOut,Indent+'case u of');
251 c.CommaText := 'BN';
252 IncludeClasses(c, AMinCode,AMaxCode);
253 c.CommaText := 'S';
254 IncludeClasses(c, AMinCode,AMaxCode);
255 c.CommaText := 'B';
256 IncludeClasses(c, AMinCode,AMaxCode);
257 c.CommaText := 'WS';
258 IncludeClasses(c, AMinCode,AMaxCode);
259 c.CommaText := 'L,R,AL';
260 IncludeClasses(c, AMinCode,AMaxCode);
261 c.CommaText := 'EN';
262 IncludeClasses(c, AMinCode,AMaxCode);
263 c.CommaText := 'ES';
264 IncludeClasses(c, AMinCode,AMaxCode);
265 c.CommaText := 'ET';
266 IncludeClasses(c, AMinCode,AMaxCode);
267 c.CommaText := 'AN';
268 IncludeClasses(c, AMinCode,AMaxCode);
269 c.CommaText := 'CS,NSM';
270 IncludeClasses(c, AMinCode,AMaxCode);
271 c.CommaText := 'ON';
272 IncludeClasses(c, AMinCode,AMaxCode);
273 writeln(tout,Indent+'else result := ubcUnknown;');
274 writeln(tout,Indent+'end;');
275 end;
276
277 begin
278 Writeln(tOut,'function GetUnicodeBidiClass(u: cardinal): TUnicodeBidiClass;');
279 FormatSettings.ShortDateFormat := 'yyyy/mm/dd';
280 Writeln(tOut,'begin //generated '+DateToStr(Date));
281 c := TStringList.Create;
282 writeln(tOut,' case u of');
283 writeln(tOut,' $00000..$07FFF:');
284 writeln(tOut,' case u of');
285 writeln(tOut,' $00000..$003FF:');
286 Include($00000, $003FF);
287 writeln(tOut,' $00400..$007FF:');
288 Include($00400, $007FF);
289 writeln(tOut,' $00800..$00FFF:');
290 Include($00800, $00FFF);
291 writeln(tOut,' $01000..$01FFF:');
292 Include($01000, $01FFF);
293 writeln(tOut,' else');
294 Include($02000, $07FFF);
295 writeln(tOut,' end;');
296 writeln(tOut,' $08000..$0FFFF:');
297 Include($08000, $0FFFF);
298 writeln(tOut,' else');
299 writeln(tOut,' case u of');
300 writeln(tOut,' $10000..$10FFF:');
301 Include($10000, $10FFF);
302 writeln(tOut,' $11000..$117FF:');
303 Include($11000, $117FF);
304 writeln(tOut,' $11800..$17FFF:');
305 Include($11800, $17FFF);
306 writeln(tOut,' $18000..$FFFFF:');
307 Include($18000, $FFFFF);
308 writeln(tOut,' else result := ubcUnknown;');
309 writeln(tOut,' end');
310 writeln(tOut,' end');
311
312
313 c.Free;
314
315 writeln(tout,'end;');
316 writeln(tout);
317 end;
318
319 procedure ParseBidiBrackets;
320 var elem: TStringList;
321 line: string;
322 begin
323 Writeln(tOut,'type');
324 writeln(tout,' TUnicodeBracketInfo = record');
325 writeln(tout,' IsBracket: boolean;');
326 writeln(tout,' OpeningBracket,ClosingBracket: cardinal;');
327 writeln(tout,' end;');
328 Writeln(tOut,'function GetUnicodeBracketInfo(u: cardinal): TUnicodeBracketInfo;');
329 Writeln(tOut,' procedure Bracket(AOpening,AClosing: cardinal);');
330 Writeln(tOut,' begin');
331 Writeln(tOut,' result.IsBracket := true;');
332 Writeln(tOut,' result.OpeningBracket := AOpening;');
333 Writeln(tOut,' result.ClosingBracket := AClosing;');
334 Writeln(tOut,' end;');
335 Writeln(tOut,'begin');
336 Writeln(tOut,' case u of');
337
338 assignfile(tIn, 'BidiBrackets.txt');
339 reset(tin);
340 elem := TStringList.Create;
341 elem.Delimiter := ';';
342 elem.StrictDelimiter:= true;
343 while not eof(tin) do
344 begin
345 readln(tin, line);
346 elem.DelimitedText:= line;
347 if elem.Count >= 3 then
348 begin
349 if copy(trim(elem[2]),1,1) = 'o' then
350 writeln(tOut,' $'+trim(elem[0])+', $'+trim(elem[1])+': Bracket($'+trim(elem[0])+', $'+trim(elem[1])+');');
351 end;
352 end;
353 elem.Free;
354 closefile(tin);
355
356 writeln(tout,' else');
357 writeln(tout,' begin');
358 writeln(tout,' result.IsBracket := false;');
359 writeln(tout,' result.OpeningBracket := 0;');
360 writeln(tout,' result.ClosingBracket := 0;');
361 writeln(tout,' end;');
362 Writeln(tOut,' end;');
363 Writeln(tOut,'end;');
364 Writeln(tOut);
365 end;
366
367begin
368 AssignFile(tOut, 'UnicodeFunctions.generated.pas');
369 Rewrite(tOut);
370
371 ParseUnicodeBidiClasses;
372 ParseBidiBrackets;
373
374 CloseFile(tOut);
375end;
376
377function GetUnicodeBidiClass(u: cardinal): TUnicodeBidiClass;
378begin //generated 2018-06-12
379 case u of
380 $00000..$07FFF:
381 case u of
382 $00000..$003FF:
383 case u of
384 $00..$08, $0E..$1B, $7F..$84, $86..$9F, $AD: result := ubcBoundaryNeutral;
385 $09, $0B, $1F: result := ubcSegmentSeparator;
386 $0A, $0D, $1C..$1E, $85: result := ubcParagraphSeparator;
387 $0C, $20: result := ubcWhiteSpace;
388 $41..$5A, $61..$7A, $AA, $B5, $BA, $C0..$D6, $D8..$F6, $F8..$2B8, $2BB..$2C1, $2D0, $2D1,
389 $2E0..$2E4, $2EE, $370..$373, $376, $377, $37A..$37D, $37F, $386, $388..$38A, $38C, $38E..$3A1,
390 $3A3..$3F5, $3F7..$3FF: result := ubcLeftToRight;
391 $30..$39, $B2, $B3, $B9: result := ubcEuropeanNumber;
392 $2B, $2D: result := ubcEuropeanNumberSeparator;
393 $23..$25, $A2..$A5, $B0, $B1: result := ubcEuropeanNumberTerminator;
394 $2C, $2E, $2F, $3A, $A0: result := ubcCommonSeparator;
395 $300..$36F: result := ubcNonSpacingMark;
396 $21, $22, $26..$2A, $3B..$40, $5B..$60, $7B..$7E, $A1, $A6..$A9, $AB, $AC, $AE, $AF, $B4,
397 $B6..$B8, $BB..$BF, $D7, $F7, $2B9, $2BA, $2C2..$2CF, $2D2..$2DF, $2E5..$2ED, $2EF..$2FF,
398 $374, $375, $37E, $384, $385, $387, $3F6: result := ubcOtherNeutrals;
399 else result := ubcUnknown;
400 end;
401 $00400..$007FF:
402 case u of
403 $400..$482, $48A..$52F, $531..$556, $559..$589: result := ubcLeftToRight;
404 $5BE, $5C0, $5C3, $5C6, $5D0..$5EA, $5EF..$5F4: result := ubcRightToLeft;
405 $608, $60B, $60D, $61B, $61C, $61E..$64A, $66D..$66F, $671..$6D5, $6E5, $6E6, $6EE, $6EF,
406 $6FA..$70D, $70F, $710, $712..$72F, $74D..$7A5, $7B1: result := ubcArabicLetter;
407 $7C0..$7EA, $7F4, $7F5, $7FA, $7FE, $7FF: result := ubcRightToLeft;
408 $6F0..$6F9: result := ubcEuropeanNumber;
409 $58F, $609, $60A, $66A: result := ubcEuropeanNumberTerminator;
410 $600..$605, $660..$669, $66B, $66C, $6DD: result := ubcArabicNumber;
411 $483..$489, $591..$5BD, $5BF, $5C1, $5C2, $5C4, $5C5, $5C7: result := ubcNonSpacingMark;
412 $60C: result := ubcCommonSeparator;
413 $610..$61A, $64B..$65F, $670, $6D6..$6DC, $6DF..$6E4, $6E7, $6E8, $6EA..$6ED, $711, $730..$74A,
414 $7A6..$7B0, $7EB..$7F3, $7FD: result := ubcNonSpacingMark;
415 $58A, $58D, $58E, $606, $607, $60E, $60F, $6DE, $6E9, $7F6..$7F9: result := ubcOtherNeutrals;
416 else result := ubcUnknown;
417 end;
418 $00800..$00FFF:
419 case u of
420 $800..$815, $81A, $824, $828, $830..$83E, $840..$858, $85E: result := ubcRightToLeft;
421 $860..$86A, $8A0..$8B4, $8B6..$8BD: result := ubcArabicLetter;
422 $903..$939, $93B, $93D..$940, $949..$94C, $94E..$950, $958..$961, $964..$980, $982, $983,
423 $985..$98C, $98F, $990, $993..$9A8, $9AA..$9B0, $9B2, $9B6..$9B9, $9BD..$9C0, $9C7, $9C8,
424 $9CB, $9CC, $9CE, $9D7, $9DC, $9DD, $9DF..$9E1, $9E6..$9F1, $9F4..$9FA, $9FC, $9FD, $A03,
425 $A05..$A0A, $A0F, $A10, $A13..$A28, $A2A..$A30, $A32, $A33, $A35, $A36, $A38, $A39, $A3E..$A40,
426 $A59..$A5C, $A5E, $A66..$A6F, $A72..$A74, $A76, $A83, $A85..$A8D, $A8F..$A91, $A93..$AA8,
427 $AAA..$AB0, $AB2, $AB3, $AB5..$AB9, $ABD..$AC0, $AC9, $ACB, $ACC, $AD0, $AE0, $AE1, $AE6..$AF0,
428 $AF9, $B02, $B03, $B05..$B0C, $B0F, $B10, $B13..$B28, $B2A..$B30, $B32, $B33, $B35..$B39,
429 $B3D, $B3E, $B40, $B47, $B48, $B4B, $B4C, $B57, $B5C, $B5D, $B5F..$B61, $B66..$B77, $B83,
430 $B85..$B8A, $B8E..$B90, $B92..$B95, $B99, $B9A, $B9C, $B9E, $B9F, $BA3, $BA4, $BA8..$BAA,
431 $BAE..$BB9, $BBE, $BBF, $BC1, $BC2, $BC6..$BC8, $BCA..$BCC, $BD0, $BD7, $BE6..$BF2, $C01..$C03,
432 $C05..$C0C, $C0E..$C10, $C12..$C28, $C2A..$C39, $C3D, $C41..$C44, $C58..$C5A, $C60, $C61,
433 $C66..$C6F, $C7F, $C80, $C82..$C8C, $C8E..$C90, $C92..$CA8, $CAA..$CB3, $CB5..$CB9, $CBD..$CC4,
434 $CC6..$CC8, $CCA, $CCB, $CD5, $CD6, $CDE, $CE0, $CE1, $CE6..$CEF, $CF1, $CF2, $D02, $D03,
435 $D05..$D0C, $D0E..$D10, $D12..$D3A, $D3D..$D40, $D46..$D48, $D4A..$D4C, $D4E, $D4F, $D54..$D61,
436 $D66..$D7F, $D82, $D83, $D85..$D96, $D9A..$DB1, $DB3..$DBB, $DBD, $DC0..$DC6, $DCF..$DD1,
437 $DD8..$DDF, $DE6..$DEF, $DF2..$DF4, $E01..$E30, $E32, $E33, $E40..$E46, $E4F..$E5B, $E81,
438 $E82, $E84, $E87, $E88, $E8A, $E8D, $E94..$E97, $E99..$E9F, $EA1..$EA3, $EA5, $EA7, $EAA,
439 $EAB, $EAD..$EB0, $EB2, $EB3, $EBD, $EC0..$EC4, $EC6, $ED0..$ED9, $EDC..$EDF, $F00..$F17,
440 $F1A..$F34, $F36, $F38, $F3E..$F47, $F49..$F6C, $F7F, $F85, $F88..$F8C, $FBE..$FC5, $FC7..$FCC,
441 $FCE..$FDA: result := ubcLeftToRight;
442 $9F2, $9F3, $9FB, $AF1, $BF9, $E3F: result := ubcEuropeanNumberTerminator;
443 $8E2: result := ubcArabicNumber;
444 $816..$819, $81B..$823, $825..$827, $829..$82D, $859..$85B, $8D3..$8E1, $8E3..$902, $93A,
445 $93C, $941..$948, $94D, $951..$957, $962, $963, $981, $9BC, $9C1..$9C4, $9CD, $9E2, $9E3,
446 $9FE, $A01, $A02, $A3C, $A41, $A42, $A47, $A48, $A4B..$A4D, $A51, $A70, $A71, $A75, $A81,
447 $A82, $ABC, $AC1..$AC5, $AC7, $AC8, $ACD, $AE2, $AE3, $AFA..$AFF, $B01, $B3C, $B3F, $B41..$B44,
448 $B4D, $B56, $B62, $B63, $B82, $BC0, $BCD, $C00, $C04, $C3E..$C40, $C46..$C48, $C4A..$C4D,
449 $C55, $C56, $C62, $C63, $C81, $CBC, $CCC, $CCD, $CE2, $CE3, $D00, $D01, $D3B, $D3C, $D41..$D44,
450 $D4D, $D62, $D63, $DCA, $DD2..$DD4, $DD6, $E31, $E34..$E3A, $E47..$E4E, $EB1, $EB4..$EB9,
451 $EBB, $EBC, $EC8..$ECD, $F18, $F19, $F35, $F37, $F39, $F71..$F7E, $F80..$F84, $F86, $F87,
452 $F8D..$F97, $F99..$FBC, $FC6: result := ubcNonSpacingMark;
453 $BF3..$BF8, $BFA, $C78..$C7E, $F3A..$F3D: result := ubcOtherNeutrals;
454 else result := ubcUnknown;
455 end;
456 $01000..$01FFF:
457 case u of
458 $180E: result := ubcBoundaryNeutral;
459 $1680: result := ubcWhiteSpace;
460 $1000..$102C, $1031, $1038, $103B, $103C, $103F..$1057, $105A..$105D, $1061..$1070, $1075..$1081,
461 $1083, $1084, $1087..$108C, $108E..$109C, $109E..$10C5, $10C7, $10CD, $10D0..$1248, $124A..$124D,
462 $1250..$1256, $1258, $125A..$125D, $1260..$1288, $128A..$128D, $1290..$12B0, $12B2..$12B5,
463 $12B8..$12BE, $12C0, $12C2..$12C5, $12C8..$12D6, $12D8..$1310, $1312..$1315, $1318..$135A,
464 $1360..$137C, $1380..$138F, $13A0..$13F5, $13F8..$13FD, $1401..$167F, $1681..$169A, $16A0..$16F8,
465 $1700..$170C, $170E..$1711, $1720..$1731, $1735, $1736, $1740..$1751, $1760..$176C, $176E..$1770,
466 $1780..$17B3, $17B6, $17BE..$17C5, $17C7, $17C8, $17D4..$17DA, $17DC, $17E0..$17E9, $1810..$1819,
467 $1820..$1878, $1880..$1884, $1887..$18A8, $18AA, $18B0..$18F5, $1900..$191E, $1923..$1926,
468 $1929..$192B, $1930, $1931, $1933..$1938, $1946..$196D, $1970..$1974, $1980..$19AB, $19B0..$19C9,
469 $19D0..$19DA, $1A00..$1A16, $1A19, $1A1A, $1A1E..$1A55, $1A57, $1A61, $1A63, $1A64, $1A6D..$1A72,
470 $1A80..$1A89, $1A90..$1A99, $1AA0..$1AAD, $1B04..$1B33, $1B35, $1B3B, $1B3D..$1B41, $1B43..$1B4B,
471 $1B50..$1B6A, $1B74..$1B7C, $1B82..$1BA1, $1BA6, $1BA7, $1BAA, $1BAE..$1BE5, $1BE7, $1BEA..$1BEC,
472 $1BEE, $1BF2, $1BF3, $1BFC..$1C2B, $1C34, $1C35, $1C3B..$1C49, $1C4D..$1C88, $1C90..$1CBA,
473 $1CBD..$1CC7, $1CD3, $1CE1, $1CE9..$1CEC, $1CEE..$1CF3, $1CF5..$1CF7, $1D00..$1DBF, $1E00..$1F15,
474 $1F18..$1F1D, $1F20..$1F45, $1F48..$1F4D, $1F50..$1F57, $1F59, $1F5B, $1F5D, $1F5F..$1F7D,
475 $1F80..$1FB4, $1FB6..$1FBC, $1FBE, $1FC2..$1FC4, $1FC6..$1FCC, $1FD0..$1FD3, $1FD6..$1FDB,
476 $1FE0..$1FEC, $1FF2..$1FF4, $1FF6..$1FFC: result := ubcLeftToRight;
477 $17DB: result := ubcEuropeanNumberTerminator;
478 $102D..$1030, $1032..$1037, $1039, $103A, $103D, $103E, $1058, $1059, $105E..$1060, $1071..$1074,
479 $1082, $1085, $1086, $108D, $109D, $135D..$135F, $1712..$1714, $1732..$1734, $1752, $1753,
480 $1772, $1773, $17B4, $17B5, $17B7..$17BD, $17C6, $17C9..$17D3, $17DD, $180B..$180D, $1885,
481 $1886, $18A9, $1920..$1922, $1927, $1928, $1932, $1939..$193B, $1A17, $1A18, $1A1B, $1A56,
482 $1A58..$1A5E, $1A60, $1A62, $1A65..$1A6C, $1A73..$1A7C, $1A7F, $1AB0..$1ABE, $1B00..$1B03,
483 $1B34, $1B36..$1B3A, $1B3C, $1B42, $1B6B..$1B73, $1B80, $1B81, $1BA2..$1BA5, $1BA8, $1BA9,
484 $1BAB..$1BAD, $1BE6, $1BE8, $1BE9, $1BED, $1BEF..$1BF1, $1C2C..$1C33, $1C36, $1C37, $1CD0..$1CD2,
485 $1CD4..$1CE0, $1CE2..$1CE8, $1CED, $1CF4, $1CF8, $1CF9, $1DC0..$1DF9, $1DFB..$1DFF: result := ubcNonSpacingMark;
486 $1390..$1399, $1400, $169B, $169C, $17F0..$17F9, $1800..$180A, $1940, $1944, $1945, $19DE..$19FF,
487 $1FBD, $1FBF..$1FC1, $1FCD..$1FCF, $1FDD..$1FDF, $1FED..$1FEF, $1FFD, $1FFE: result := ubcOtherNeutrals;
488 else result := ubcUnknown;
489 end;
490 else
491 case u of
492 $200B..$200D, $2060..$2064, $206A..$206F: result := ubcBoundaryNeutral;
493 $2029: result := ubcParagraphSeparator;
494 $2000..$200A, $2028, $205F, $3000: result := ubcWhiteSpace;
495 $200E: result := ubcLeftToRight;
496 $200F: result := ubcRightToLeft;
497 $2071, $207F, $2090..$209C, $2102, $2107, $210A..$2113, $2115, $2119..$211D, $2124, $2126,
498 $2128, $212A..$212D, $212F..$2139, $213C..$213F, $2145..$2149, $214E, $214F, $2160..$2188,
499 $2336..$237A, $2395, $249C..$24E9, $26AC, $2800..$28FF, $2C00..$2C2E, $2C30..$2C5E, $2C60..$2CE4,
500 $2CEB..$2CEE, $2CF2, $2CF3, $2D00..$2D25, $2D27, $2D2D, $2D30..$2D67, $2D6F, $2D70, $2D80..$2D96,
501 $2DA0..$2DA6, $2DA8..$2DAE, $2DB0..$2DB6, $2DB8..$2DBE, $2DC0..$2DC6, $2DC8..$2DCE, $2DD0..$2DD6,
502 $2DD8..$2DDE, $3005..$3007, $3021..$3029, $302E, $302F, $3031..$3035, $3038..$303C, $3041..$3096,
503 $309D..$309F, $30A1..$30FA, $30FC..$30FF, $3105..$312F, $3131..$318E, $3190..$31BA, $31F0..$321C,
504 $3220..$324F, $3260..$327B, $327F..$32B0, $32C0..$32CB, $32D0..$32FE, $3300..$3376, $337B..$33DD,
505 $33E0..$33FE, $3400, $4DB5, $4E00: result := ubcLeftToRight;
506 $2070, $2074..$2079, $2080..$2089, $2488..$249B: result := ubcEuropeanNumber;
507 $207A, $207B, $208A, $208B, $2212: result := ubcEuropeanNumberSeparator;
508 $2030..$2034, $20A0..$20BF, $212E, $2213: result := ubcEuropeanNumberTerminator;
509 $202F, $2044: result := ubcCommonSeparator;
510 $20D0..$20F0, $2CEF..$2CF1, $2D7F, $2DE0..$2DFF, $302A..$302D, $3099, $309A: result := ubcNonSpacingMark;
511 $2010..$2027, $2035..$2043, $2045..$205E, $207C..$207E, $208C..$208E, $2100, $2101, $2103..$2106,
512 $2108, $2109, $2114, $2116..$2118, $211E..$2123, $2125, $2127, $2129, $213A, $213B, $2140..$2144,
513 $214A..$214D, $2150..$215F, $2189..$218B, $2190..$2211, $2214..$2335, $237B..$2394, $2396..$2426,
514 $2440..$244A, $2460..$2487, $24EA..$26AB, $26AD..$27FF, $2900..$2B73, $2B76..$2B95, $2B98..$2BC8,
515 $2BCA..$2BFE, $2CE5..$2CEA, $2CF9..$2CFF, $2E00..$2E4E, $2E80..$2E99, $2E9B..$2EF3, $2F00..$2FD5,
516 $2FF0..$2FFB, $3001..$3004, $3008..$3020, $3030, $3036, $3037, $303D..$303F, $309B, $309C,
517 $30A0, $30FB, $31C0..$31E3, $321D, $321E, $3250..$325F, $327C..$327E, $32B1..$32BF, $32CC..$32CF,
518 $3377..$337A, $33DE, $33DF, $33FF, $4DC0..$4DFF: result := ubcOtherNeutrals;
519 else result := ubcUnknown;
520 end;
521 end;
522 $08000..$0FFFF:
523 case u of
524 $FEFF: result := ubcBoundaryNeutral;
525 $9FEF, $A000..$A48C, $A4D0..$A60C, $A610..$A62B, $A640..$A66E, $A680..$A69D, $A6A0..$A6EF,
526 $A6F2..$A6F7, $A722..$A787, $A789..$A7B9, $A7F7..$A801, $A803..$A805, $A807..$A80A, $A80C..$A824,
527 $A827, $A830..$A837, $A840..$A873, $A880..$A8C3, $A8CE..$A8D9, $A8F2..$A8FE, $A900..$A925,
528 $A92E..$A946, $A952, $A953, $A95F..$A97C, $A983..$A9B2, $A9B4, $A9B5, $A9BA, $A9BB, $A9BD..$A9CD,
529 $A9CF..$A9D9, $A9DE..$A9E4, $A9E6..$A9FE, $AA00..$AA28, $AA2F, $AA30, $AA33, $AA34, $AA40..$AA42,
530 $AA44..$AA4B, $AA4D, $AA50..$AA59, $AA5C..$AA7B, $AA7D..$AAAF, $AAB1, $AAB5, $AAB6, $AAB9..$AABD,
531 $AAC0, $AAC2, $AADB..$AAEB, $AAEE..$AAF5, $AB01..$AB06, $AB09..$AB0E, $AB11..$AB16, $AB20..$AB26,
532 $AB28..$AB2E, $AB30..$AB65, $AB70..$ABE4, $ABE6, $ABE7, $ABE9..$ABEC, $ABF0..$ABF9, $AC00,
533 $D7A3, $D7B0..$D7C6, $D7CB..$D7FB, $D800, $DB7F, $DB80, $DBFF, $DC00, $DFFF, $E000, $F8FF..$FA6D,
534 $FA70..$FAD9, $FB00..$FB06, $FB13..$FB17: result := ubcLeftToRight;
535 $FB1D, $FB1F..$FB28, $FB2A..$FB36, $FB38..$FB3C, $FB3E, $FB40, $FB41, $FB43, $FB44, $FB46..$FB4F: result := ubcRightToLeft;
536 $FB50..$FBC1, $FBD3..$FD3D, $FD50..$FD8F, $FD92..$FDC7, $FDF0..$FDFC, $FE70..$FE74, $FE76..$FEFC: result := ubcArabicLetter;
537 $FF21..$FF3A, $FF41..$FF5A, $FF66..$FFBE, $FFC2..$FFC7, $FFCA..$FFCF, $FFD2..$FFD7, $FFDA..$FFDC: result := ubcLeftToRight;
538 $FF10..$FF19: result := ubcEuropeanNumber;
539 $FB29, $FE62, $FE63, $FF0B, $FF0D: result := ubcEuropeanNumberSeparator;
540 $A838, $A839, $FE5F, $FE69, $FE6A, $FF03..$FF05, $FFE0, $FFE1, $FFE5, $FFE6: result := ubcEuropeanNumberTerminator;
541 $A66F..$A672, $A674..$A67D, $A69E, $A69F, $A6F0, $A6F1, $A802, $A806, $A80B, $A825, $A826,
542 $A8C4, $A8C5, $A8E0..$A8F1, $A8FF, $A926..$A92D, $A947..$A951, $A980..$A982, $A9B3, $A9B6..$A9B9,
543 $A9BC, $A9E5, $AA29..$AA2E, $AA31, $AA32, $AA35, $AA36, $AA43, $AA4C, $AA7C, $AAB0, $AAB2..$AAB4,
544 $AAB7, $AAB8, $AABE, $AABF, $AAC1, $AAEC, $AAED, $AAF6, $ABE5, $ABE8, $ABED, $FB1E, $FE00..$FE0F,
545 $FE20..$FE2F: result := ubcNonSpacingMark;
546 $FE50, $FE52, $FE55, $FF0C, $FF0E, $FF0F, $FF1A: result := ubcCommonSeparator;
547 $A490..$A4C6, $A60D..$A60F, $A673, $A67E, $A67F, $A700..$A721, $A788, $A828..$A82B, $A874..$A877,
548 $FD3E, $FD3F, $FDFD, $FE10..$FE19, $FE30..$FE4F, $FE51, $FE54, $FE56..$FE5E, $FE60, $FE61,
549 $FE64..$FE66, $FE68, $FE6B, $FF01, $FF02, $FF06..$FF0A, $FF1B..$FF20, $FF3B..$FF40, $FF5B..$FF65,
550 $FFE2..$FFE4, $FFE8..$FFEE, $FFF9..$FFFD: result := ubcOtherNeutrals;
551 else result := ubcUnknown;
552 end;
553 else
554 case u of
555 $10000..$10FFF:
556 case u of
557 $10000..$1000B, $1000D..$10026, $10028..$1003A, $1003C, $1003D, $1003F..$1004D, $10050..$1005D,
558 $10080..$100FA, $10100, $10102, $10107..$10133, $10137..$1013F, $1018D, $1018E, $101D0..$101FC,
559 $10280..$1029C, $102A0..$102D0, $10300..$10323, $1032D..$1034A, $10350..$10375, $10380..$1039D,
560 $1039F..$103C3, $103C8..$103D5, $10400..$1049D, $104A0..$104A9, $104B0..$104D3, $104D8..$104FB,
561 $10500..$10527, $10530..$10563, $1056F, $10600..$10736, $10740..$10755, $10760..$10767: result := ubcLeftToRight;
562 $10800..$10805, $10808, $1080A..$10835, $10837, $10838, $1083C, $1083F..$10855, $10857..$1089E,
563 $108A7..$108AF, $108E0..$108F2, $108F4, $108F5, $108FB..$1091B, $10920..$10939, $1093F,
564 $10980..$109B7, $109BC..$109CF, $109D2..$10A00, $10A10..$10A13, $10A15..$10A17, $10A19..$10A35,
565 $10A40..$10A48, $10A50..$10A58, $10A60..$10A9F, $10AC0..$10AE4, $10AEB..$10AF6, $10B00..$10B35,
566 $10B40..$10B55, $10B58..$10B72, $10B78..$10B91, $10B99..$10B9C, $10BA9..$10BAF, $10C00..$10C48,
567 $10C80..$10CB2, $10CC0..$10CF2, $10CFA..$10CFF: result := ubcRightToLeft;
568 $10D00..$10D23: result := ubcArabicLetter;
569 $10F00..$10F27: result := ubcRightToLeft;
570 $10F30..$10F45, $10F51..$10F59: result := ubcArabicLetter;
571 $102E1..$102FB: result := ubcEuropeanNumber;
572 $10D30..$10D39, $10E60..$10E7E: result := ubcArabicNumber;
573 $101FD, $102E0, $10376..$1037A, $10A01..$10A03, $10A05, $10A06, $10A0C..$10A0F, $10A38..$10A3A,
574 $10A3F, $10AE5, $10AE6, $10D24..$10D27, $10F46..$10F50: result := ubcNonSpacingMark;
575 $10101, $10140..$1018C, $10190..$1019B, $101A0, $1091F, $10B39..$10B3F: result := ubcOtherNeutrals;
576 else result := ubcUnknown;
577 end;
578 $11000..$117FF:
579 case u of
580 $11000, $11002..$11037, $11047..$1104D, $11066..$1106F, $11082..$110B2, $110B7, $110B8,
581 $110BB..$110C1, $110CD, $110D0..$110E8, $110F0..$110F9, $11103..$11126, $1112C, $11136..$11146,
582 $11150..$11172, $11174..$11176, $11182..$111B5, $111BF..$111C8, $111CD, $111D0..$111DF,
583 $111E1..$111F4, $11200..$11211, $11213..$1122E, $11232, $11233, $11235, $11238..$1123D,
584 $11280..$11286, $11288, $1128A..$1128D, $1128F..$1129D, $1129F..$112A9, $112B0..$112DE,
585 $112E0..$112E2, $112F0..$112F9, $11302, $11303, $11305..$1130C, $1130F, $11310, $11313..$11328,
586 $1132A..$11330, $11332, $11333, $11335..$11339, $1133D..$1133F, $11341..$11344, $11347,
587 $11348, $1134B..$1134D, $11350, $11357, $1135D..$11363, $11400..$11437, $11440, $11441,
588 $11445, $11447..$11459, $1145B, $1145D, $11480..$114B2, $114B9, $114BB..$114BE, $114C1,
589 $114C4..$114C7, $114D0..$114D9, $11580..$115B1, $115B8..$115BB, $115BE, $115C1..$115DB,
590 $11600..$11632, $1163B, $1163C, $1163E, $11641..$11644, $11650..$11659, $11680..$116AA,
591 $116AC, $116AE, $116AF, $116B6, $116C0..$116C9, $11700..$1171A, $11720, $11721, $11726,
592 $11730..$1173F: result := ubcLeftToRight;
593 $11001, $11038..$11046, $1107F..$11081, $110B3..$110B6, $110B9, $110BA, $11100..$11102,
594 $11127..$1112B, $1112D..$11134, $11173, $11180, $11181, $111B6..$111BE, $111C9..$111CC,
595 $1122F..$11231, $11234, $11236, $11237, $1123E, $112DF, $112E3..$112EA, $11300, $11301,
596 $1133B, $1133C, $11340, $11366..$1136C, $11370..$11374, $11438..$1143F, $11442..$11444,
597 $11446, $1145E, $114B3..$114B8, $114BA, $114BF, $114C0, $114C2, $114C3, $115B2..$115B5,
598 $115BC, $115BD, $115BF, $115C0, $115DC, $115DD, $11633..$1163A, $1163D, $1163F, $11640,
599 $116AB, $116AD, $116B0..$116B5, $116B7, $1171D..$1171F, $11722..$11725, $11727..$1172B: result := ubcNonSpacingMark;
600 $11052..$11065, $11660..$1166C: result := ubcOtherNeutrals;
601 else result := ubcUnknown;
602 end;
603 $11800..$17FFF:
604 case u of
605 $11800..$1182E, $11838, $1183B, $118A0..$118F2, $118FF, $11A00, $11A07, $11A08, $11A0B..$11A32,
606 $11A39, $11A3A, $11A3F..$11A46, $11A50, $11A57, $11A58, $11A5C..$11A83, $11A86..$11A89,
607 $11A97, $11A9A..$11AA2, $11AC0..$11AF8, $11C00..$11C08, $11C0A..$11C2F, $11C3E..$11C45,
608 $11C50..$11C6C, $11C70..$11C8F, $11CA9, $11CB1, $11CB4, $11D00..$11D06, $11D08, $11D09,
609 $11D0B..$11D30, $11D46, $11D50..$11D59, $11D60..$11D65, $11D67, $11D68, $11D6A..$11D8E,
610 $11D93, $11D94, $11D96, $11D98, $11DA0..$11DA9, $11EE0..$11EF2, $11EF5..$11EF8, $12000..$12399,
611 $12400..$1246E, $12470..$12474, $12480..$12543, $13000..$1342E, $14400..$14646, $16800..$16A38,
612 $16A40..$16A5E, $16A60..$16A69, $16A6E, $16A6F, $16AD0..$16AED, $16AF5, $16B00..$16B2F,
613 $16B37..$16B45, $16B50..$16B59, $16B5B..$16B61, $16B63..$16B77, $16B7D..$16B8F, $16E40..$16E9A,
614 $16F00..$16F44, $16F50..$16F7E, $16F93..$16F9F, $16FE0, $16FE1, $17000: result := ubcLeftToRight;
615 $1182F..$11837, $11839, $1183A, $11A01..$11A06, $11A09, $11A0A, $11A33..$11A38, $11A3B..$11A3E,
616 $11A47, $11A51..$11A56, $11A59..$11A5B, $11A8A..$11A96, $11A98, $11A99, $11C30..$11C36,
617 $11C38..$11C3D, $11C92..$11CA7, $11CAA..$11CB0, $11CB2, $11CB3, $11CB5, $11CB6, $11D31..$11D36,
618 $11D3A, $11D3C, $11D3D, $11D3F..$11D45, $11D47, $11D90, $11D91, $11D95, $11D97, $11EF3,
619 $11EF4, $16AF0..$16AF4, $16B30..$16B36, $16F8F..$16F92: result := ubcNonSpacingMark;
620 else result := ubcUnknown;
621 end;
622 $18000..$FFFFF:
623 case u of
624 $1BCA0..$1BCA3, $1D173..$1D17A, $E0001, $E0020..$E007F: result := ubcBoundaryNeutral;
625 $187F1, $18800..$18AF2, $1B000..$1B11E, $1B170..$1B2FB, $1BC00..$1BC6A, $1BC70..$1BC7C,
626 $1BC80..$1BC88, $1BC90..$1BC99, $1BC9C, $1BC9F, $1D000..$1D0F5, $1D100..$1D126, $1D129..$1D166,
627 $1D16A..$1D172, $1D183, $1D184, $1D18C..$1D1A9, $1D1AE..$1D1E8, $1D2E0..$1D2F3, $1D360..$1D378,
628 $1D400..$1D454, $1D456..$1D49C, $1D49E, $1D49F, $1D4A2, $1D4A5, $1D4A6, $1D4A9..$1D4AC,
629 $1D4AE..$1D4B9, $1D4BB, $1D4BD..$1D4C3, $1D4C5..$1D505, $1D507..$1D50A, $1D50D..$1D514,
630 $1D516..$1D51C, $1D51E..$1D539, $1D53B..$1D53E, $1D540..$1D544, $1D546, $1D54A..$1D550,
631 $1D552..$1D6A5, $1D6A8..$1D6DA, $1D6DC..$1D714, $1D716..$1D74E, $1D750..$1D788, $1D78A..$1D7C2,
632 $1D7C4..$1D7CB, $1D800..$1D9FF, $1DA37..$1DA3A, $1DA6D..$1DA74, $1DA76..$1DA83, $1DA85..$1DA8B: result := ubcLeftToRight;
633 $1E800..$1E8C4, $1E8C7..$1E8CF, $1E900..$1E943, $1E950..$1E959, $1E95E, $1E95F: result := ubcRightToLeft;
634 $1EC71..$1ECB4, $1EE00..$1EE03, $1EE05..$1EE1F, $1EE21, $1EE22, $1EE24, $1EE27, $1EE29..$1EE32,
635 $1EE34..$1EE37, $1EE39, $1EE3B, $1EE42, $1EE47, $1EE49, $1EE4B, $1EE4D..$1EE4F, $1EE51,
636 $1EE52, $1EE54, $1EE57, $1EE59, $1EE5B, $1EE5D, $1EE5F, $1EE61, $1EE62, $1EE64, $1EE67..$1EE6A,
637 $1EE6C..$1EE72, $1EE74..$1EE77, $1EE79..$1EE7C, $1EE7E, $1EE80..$1EE89, $1EE8B..$1EE9B,
638 $1EEA1..$1EEA3, $1EEA5..$1EEA9, $1EEAB..$1EEBB: result := ubcArabicLetter;
639 $1F110..$1F12E, $1F130..$1F169, $1F170..$1F1AC, $1F1E6..$1F202, $1F210..$1F23B, $1F240..$1F248,
640 $1F250, $1F251, $20000, $2A6D6, $2A700, $2B734, $2B740, $2B81D, $2B820, $2CEA1, $2CEB0,
641 $2EBE0, $2F800..$2FA1D, $F0000, $FFFFD: result := ubcLeftToRight;
642 $1D7CE..$1D7FF, $1F100..$1F10A: result := ubcEuropeanNumber;
643 $1BC9D, $1BC9E, $1D167..$1D169, $1D17B..$1D182, $1D185..$1D18B, $1D1AA..$1D1AD, $1D242..$1D244,
644 $1DA00..$1DA36, $1DA3B..$1DA6C, $1DA75, $1DA84, $1DA9B..$1DA9F, $1DAA1..$1DAAF, $1E000..$1E006,
645 $1E008..$1E018, $1E01B..$1E021, $1E023, $1E024, $1E026..$1E02A, $1E8D0..$1E8D6, $1E944..$1E94A,
646 $E0100..$E01EF: result := ubcNonSpacingMark;
647 $1D200..$1D241, $1D245, $1D300..$1D356, $1D6DB, $1D715, $1D74F, $1D789, $1D7C3, $1EEF0,
648 $1EEF1, $1F000..$1F02B, $1F030..$1F093, $1F0A0..$1F0AE, $1F0B1..$1F0BF, $1F0C1..$1F0CF,
649 $1F0D1..$1F0F5, $1F10B, $1F10C, $1F12F, $1F16A, $1F16B, $1F260..$1F265, $1F300..$1F6D4,
650 $1F6E0..$1F6EC, $1F6F0..$1F6F9, $1F700..$1F773, $1F780..$1F7D8, $1F800..$1F80B, $1F810..$1F847,
651 $1F850..$1F859, $1F860..$1F887, $1F890..$1F8AD, $1F900..$1F90B, $1F910..$1F93E, $1F940..$1F970,
652 $1F973..$1F976, $1F97A, $1F97C..$1F9A2, $1F9B0..$1F9B9, $1F9C0..$1F9C2, $1F9D0..$1F9FF,
653 $1FA60..$1FA6D: result := ubcOtherNeutrals;
654 else result := ubcUnknown;
655 end;
656 else result := ubcUnknown;
657 end
658 end
659end;
660
661{$PUSH}{$WARNINGS OFF}
662function GetUnicodeBracketInfo(u: cardinal): TUnicodeBracketInfo;
663 procedure Bracket(AOpening,AClosing: cardinal);
664 begin
665 result.IsBracket := true;
666 result.OpeningBracket := AOpening;
667 result.ClosingBracket := AClosing;
668 end;
669begin
670 case u of
671 $0028, $0029: Bracket($0028, $0029);
672 $005B, $005D: Bracket($005B, $005D);
673 $007B, $007D: Bracket($007B, $007D);
674 $0F3A, $0F3B: Bracket($0F3A, $0F3B);
675 $0F3C, $0F3D: Bracket($0F3C, $0F3D);
676 $169B, $169C: Bracket($169B, $169C);
677 $2045, $2046: Bracket($2045, $2046);
678 $207D, $207E: Bracket($207D, $207E);
679 $208D, $208E: Bracket($208D, $208E);
680 $2308, $2309: Bracket($2308, $2309);
681 $230A, $230B: Bracket($230A, $230B);
682 $2329, $232A: Bracket($2329, $232A);
683 $2768, $2769: Bracket($2768, $2769);
684 $276A, $276B: Bracket($276A, $276B);
685 $276C, $276D: Bracket($276C, $276D);
686 $276E, $276F: Bracket($276E, $276F);
687 $2770, $2771: Bracket($2770, $2771);
688 $2772, $2773: Bracket($2772, $2773);
689 $2774, $2775: Bracket($2774, $2775);
690 $27C5, $27C6: Bracket($27C5, $27C6);
691 $27E6, $27E7: Bracket($27E6, $27E7);
692 $27E8, $27E9: Bracket($27E8, $27E9);
693 $27EA, $27EB: Bracket($27EA, $27EB);
694 $27EC, $27ED: Bracket($27EC, $27ED);
695 $27EE, $27EF: Bracket($27EE, $27EF);
696 $2983, $2984: Bracket($2983, $2984);
697 $2985, $2986: Bracket($2985, $2986);
698 $2987, $2988: Bracket($2987, $2988);
699 $2989, $298A: Bracket($2989, $298A);
700 $298B, $298C: Bracket($298B, $298C);
701 $298D, $2990: Bracket($298D, $2990);
702 $298F, $298E: Bracket($298F, $298E);
703 $2991, $2992: Bracket($2991, $2992);
704 $2993, $2994: Bracket($2993, $2994);
705 $2995, $2996: Bracket($2995, $2996);
706 $2997, $2998: Bracket($2997, $2998);
707 $29D8, $29D9: Bracket($29D8, $29D9);
708 $29DA, $29DB: Bracket($29DA, $29DB);
709 $29FC, $29FD: Bracket($29FC, $29FD);
710 $2E22, $2E23: Bracket($2E22, $2E23);
711 $2E24, $2E25: Bracket($2E24, $2E25);
712 $2E26, $2E27: Bracket($2E26, $2E27);
713 $2E28, $2E29: Bracket($2E28, $2E29);
714 $3008, $3009: Bracket($3008, $3009);
715 $300A, $300B: Bracket($300A, $300B);
716 $300C, $300D: Bracket($300C, $300D);
717 $300E, $300F: Bracket($300E, $300F);
718 $3010, $3011: Bracket($3010, $3011);
719 $3014, $3015: Bracket($3014, $3015);
720 $3016, $3017: Bracket($3016, $3017);
721 $3018, $3019: Bracket($3018, $3019);
722 $301A, $301B: Bracket($301A, $301B);
723 $FE59, $FE5A: Bracket($FE59, $FE5A);
724 $FE5B, $FE5C: Bracket($FE5B, $FE5C);
725 $FE5D, $FE5E: Bracket($FE5D, $FE5E);
726 $FF08, $FF09: Bracket($FF08, $FF09);
727 $FF3B, $FF3D: Bracket($FF3B, $FF3D);
728 $FF5B, $FF5D: Bracket($FF5B, $FF5D);
729 $FF5F, $FF60: Bracket($FF5F, $FF60);
730 $FF62, $FF63: Bracket($FF62, $FF63);
731 else
732 begin
733 result.IsBracket := false;
734 result.OpeningBracket := 0;
735 result.ClosingBracket := 0;
736 end;
737 end;
738end;
739{$POP}
740
741function IsZeroWidthUnicode(u: cardinal): boolean;
742begin
743 case u of
744 UNICODE_ZERO_WIDTH_SPACE, UNICODE_ZERO_WIDTH_NON_JOINER,
745 UNICODE_ZERO_WIDTH_JOINER, UNICODE_ZERO_WIDTH_NO_BREAK_SPACE,
746 UNICODE_LEFT_TO_RIGHT_MARK,UNICODE_RIGHT_TO_LEFT_MARK,
747 UNICODE_ARABIC_LETTER_MARK: result := true;
748 else result := false;
749 end;
750end;
751
752function IsUnicodeParagraphSeparator(u: cardinal): boolean;
753begin
754 case u of
755 $0A, $0D, UNICODE_NEXT_LINE, UNICODE_PARAGRAPH_SEPARATOR,
756 UNICODE_INFORMATION_SEPARATOR_FOUR, UNICODE_INFORMATION_SEPARATOR_THREE, UNICODE_INFORMATION_SEPARATOR_TWO: result := true;
757 else result := false;
758 end;
759end;
760
761function IsUnicodeCrLf(u: cardinal): boolean;
762begin
763 result := (u=10) or (u=13);
764end;
765
766function IsUnicodeIsolateOrFormatting(u: cardinal): boolean;
767begin
768 case u of
769 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE,
770 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
771 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(true)
772 else exit(false);
773 end;
774end;
775
776{ TUnicodeBidiInfo }
777
778function TUnicodeBidiInfo.GetEndOfLine: boolean;
779begin
780 result := (Flags and BIDI_FLAG_END_OF_LINE) <> 0;
781end;
782
783function TUnicodeBidiInfo.GetEndOfParagraph: boolean;
784begin
785 result := (Flags and BIDI_FLAG_END_OF_PARAGRAPH) <> 0;
786end;
787
788function TUnicodeBidiInfo.GetRemoved: boolean;
789begin
790 result := (Flags and BIDI_FLAG_REMOVED) <> 0;
791end;
792
793function TUnicodeBidiInfo.GetRightToLeft: boolean;
794begin
795 result := Odd(BidiLevel);
796end;
797
798function TUnicodeBidiInfo.GetParagraphRightToLeft: boolean;
799begin
800 result := Odd(ParagraphBidiLevel);
801end;
802
803function AnalyzeBidiUnicode(u: PCardinal; ALength: integer; baseDirection: cardinal): TUnicodeBidiArray;
804type
805 TUnicodeAnalysisElement = record
806 bidiClass: TUnicodeBidiClass;
807 prevInIsolate, nextInIsolate: integer; //next index in current isolate
808 end;
809 TUnicodeAnalysisArray = array of TUnicodeAnalysisElement;
810
811var
812 a: TUnicodeAnalysisArray;
813
814 procedure ResolveWeakTypes(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
815 var
816 curIndex,backIndex: Integer;
817 latestStrongClass, prevClass: TUnicodeBidiClass;
818 begin
819 //rules W1 and W2
820 prevClass := startOfSequence;
821 latestStrongClass:= prevClass;
822 curIndex := startIndex;
823 while curIndex <> afterEndIndex do
824 begin
825 if not result[curIndex].IsRemoved then
826 begin
827 case a[curIndex].bidiClass of
828 ubcNonSpacingMark: a[curIndex].bidiClass:= prevClass;
829 ubcEuropeanNumber: if latestStrongClass = ubcArabicLetter then a[curIndex].bidiClass:= ubcArabicNumber;
830 end;
831 case u[curIndex] of
832 UNICODE_LEFT_TO_RIGHT_ISOLATE,
833 UNICODE_RIGHT_TO_LEFT_ISOLATE,
834 UNICODE_FIRST_STRONG_ISOLATE,
835 UNICODE_POP_DIRECTIONAL_ISOLATE: prevClass := ubcOtherNeutrals;
836 else prevClass := a[curIndex].bidiClass;
837 end;
838 if prevClass in [ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then latestStrongClass:= prevClass;
839 end;
840 curIndex := a[curIndex].nextInIsolate;
841 end;
842
843 // rule W4 and W5
844 prevClass := startOfSequence;
845 curIndex := startIndex;
846 while curIndex <> afterEndIndex do
847 begin
848 if not result[curIndex].IsRemoved then
849 begin
850 case a[curIndex].bidiClass of
851 ubcArabicLetter: a[curIndex].bidiClass := ubcRightToLeft;
852 ubcEuropeanNumber:
853 begin
854 backIndex := curIndex;
855 while backIndex > startIndex do
856 begin
857 backIndex -= 1;
858 if result[backIndex].IsRemoved then continue;
859 if a[backIndex].bidiClass = ubcEuropeanNumberTerminator then
860 a[backIndex].bidiClass := ubcEuropeanNumber
861 else break;
862 end;
863 end;
864 ubcEuropeanNumberSeparator:
865 if (prevClass = ubcEuropeanNumber) and (a[curIndex].nextInIsolate <> afterEndIndex) and
866 (a[a[curIndex].nextInIsolate].bidiClass = ubcEuropeanNumber) then
867 a[curIndex].bidiClass:= ubcEuropeanNumber;
868 ubcCommonSeparator:
869 if (prevClass in[ubcEuropeanNumber,ubcArabicNumber]) and (a[curIndex].nextInIsolate <> afterEndIndex) and
870 (a[a[curIndex].nextInIsolate].bidiClass = prevClass) then
871 a[curIndex].bidiClass:= prevClass;
872 ubcEuropeanNumberTerminator:
873 if prevClass = ubcEuropeanNumber then
874 a[curIndex].bidiClass:= ubcEuropeanNumber;
875 end;
876 prevClass := a[curIndex].bidiClass;
877 end;
878
879 curIndex := a[curIndex].nextInIsolate;
880 end;
881
882 // rule W6 and W7
883 curIndex := startIndex;
884 latestStrongClass := startOfSequence;
885 while curIndex <> afterEndIndex do
886 begin
887 if not result[curIndex].IsRemoved then
888 begin
889 case a[curIndex].bidiClass of
890 ubcEuropeanNumberSeparator,ubcEuropeanNumberTerminator,ubcCommonSeparator: a[curIndex].bidiClass := ubcOtherNeutrals;
891 ubcLeftToRight,ubcRightToLeft,ubcArabicLetter: latestStrongClass:= a[curIndex].bidiClass;
892 ubcEuropeanNumber: if latestStrongClass = ubcLeftToRight then a[curIndex].bidiClass := ubcLeftToRight;
893 end;
894 end;
895 curIndex := a[curIndex].nextInIsolate;
896 end;
897 end;
898
899 procedure ResolveNeutrals(startIndex, afterEndIndex: integer; startOfSequence, endOfSequence: TUnicodeBidiClass);
900 var
901 curIndex,prevIndex,previewIndex: Integer;
902 curRTL, include, rightToLeftEmbedding: Boolean;
903 bidiClass: TUnicodeBidiClass;
904 begin
905 rightToLeftEmbedding := odd(result[startIndex].BidiLevel);
906 curIndex := startIndex;
907 curRTL := startOfSequence in [ubcRightToLeft,ubcArabicLetter];
908 while curIndex <> afterEndIndex do
909 begin
910 case a[curIndex].bidiClass of
911 ubcLeftToRight: curRTL := false;
912 ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber: curRTL := true;
913 else
914 if curRTL <> rightToLeftEmbedding then
915 begin
916 //determine whether following neutral chars are included in reverse direction
917 prevIndex := curIndex;
918 previewIndex := a[curIndex].nextInIsolate;
919 include := false;
920 while previewIndex <> afterEndIndex do //uses endOfSequence for overflow
921 begin
922 if previewIndex = afterEndIndex then
923 bidiClass:= endOfSequence
924 else
925 bidiClass:= a[previewIndex].bidiClass;
926 case bidiClass of
927 ubcLeftToRight:
928 begin
929 include := not curRTL;
930 break;
931 end;
932 ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber:
933 begin
934 include := curRTL;
935 break;
936 end;
937 end;
938 prevIndex := previewIndex;
939 previewIndex := a[previewIndex].nextInIsolate;
940 end;
941 if previewIndex = afterEndIndex then previewIndex := prevIndex;
942 if include then
943 begin
944 while curIndex <> previewIndex do
945 begin
946 if a[curIndex].bidiClass = ubcBoundaryNeutral then
947 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
948
949 if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
950 begin
951 if curRTL then a[curIndex].bidiClass := ubcRightToLeft
952 else a[curIndex].bidiClass := ubcLeftToRight;
953 end;
954
955 curIndex := a[curIndex].nextInIsolate;
956 end;
957 end else
958 curRTL := rightToLeftEmbedding;
959 end;
960 end;
961
962 if a[curIndex].bidiClass = ubcBoundaryNeutral then
963 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
964
965 if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
966 begin
967 if curRTL then a[curIndex].bidiClass := ubcRightToLeft
968 else a[curIndex].bidiClass := ubcLeftToRight;
969 end;
970
971 curIndex := a[curIndex].nextInIsolate;
972 end;
973 end;
974
975 procedure ResolveBrackets(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
976 type TBracketPair = record
977 openIndex,closeIndex: integer;
978 end;
979 var
980 bracketPairs: array of TBracketPair;
981 bracketPairCount: integer;
982 rightToLeft: boolean;
983
984 procedure SortBracketPairs;
985 var
986 i,j,k: Integer;
987 temp: TBracketPair;
988 begin
989 for i := 1 to bracketPairCount-1 do
990 begin
991 for j := 0 to i-1 do
992 if bracketPairs[j].openIndex > bracketPairs[i].openIndex then
993 begin
994 temp := bracketPairs[i];
995 for k := i downto j+1 do
996 bracketPairs[k] := bracketPairs[k-1];
997 bracketPairs[j] := temp;
998 end;
999 end;
1000 end;
1001
1002 procedure FindBrackets; // rule BD16
1003 const MAX_BRACKET_STACK = 63;
1004 var
1005 bracketStack: array[0..MAX_BRACKET_STACK-1] of record
1006 bracketCharInfo: TUnicodeBracketInfo;
1007 index: integer;
1008 end;
1009 bracketStackPos,peekPos: integer;
1010 curIndex: integer;
1011 curBracket: TUnicodeBracketInfo;
1012 begin
1013 bracketPairCount := 0;
1014 bracketStackPos := 0;
1015 bracketStack[0].index := -1; //avoid warning
1016 curIndex := startIndex;
1017 while curIndex <> afterEndIndex do
1018 begin
1019 if not (a[curIndex].bidiClass in [ubcLeftToRight,ubcRightToLeft]) then
1020 begin
1021 curBracket := GetUnicodeBracketInfo(u[curIndex]);
1022 if curBracket.IsBracket then
1023 begin
1024 // found opening bracket
1025 if curBracket.OpeningBracket = u[curIndex] then
1026 begin
1027 if bracketStackPos <= high(bracketStack) then
1028 begin
1029 bracketStack[bracketStackPos].bracketCharInfo := curBracket;
1030 bracketStack[bracketStackPos].index := curIndex;
1031 bracketStackPos += 1;
1032 end else
1033 break;
1034 end else
1035 begin
1036 for peekPos := bracketStackPos-1 downto 0 do
1037 if (bracketStack[peekPos].bracketCharInfo.ClosingBracket = u[curIndex]) or
1038 ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_POINTING_ANGLE_BRACKET)) or
1039 ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_POINTING_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_ANGLE_BRACKET)) then
1040 begin
1041 bracketStackPos := peekPos;
1042 if bracketPairCount >= length(bracketPairs) then
1043 setlength(bracketPairs, bracketPairCount*2 + 8);
1044 bracketPairs[bracketPairCount].openIndex := bracketStack[peekPos].index;
1045 bracketPairs[bracketPairCount].closeIndex := curIndex;
1046 inc(bracketPairCount);
1047 break;
1048 end;
1049 end;
1050 end;
1051 end;
1052 curIndex := a[curIndex].nextInIsolate;
1053 end;
1054 end;
1055
1056 procedure SetCharClass(index: integer; newClass: TUnicodeBidiClass);
1057 begin
1058 a[index].bidiClass:= newClass;
1059 index := a[index].nextInIsolate;
1060 while (index <> afterEndIndex) and (GetUnicodeBidiClass(u[index]) = ubcNonSpacingMark) do
1061 begin
1062 a[index].bidiClass := newClass;
1063 index := a[index].nextInIsolate;
1064 end;
1065 end;
1066
1067 procedure ResolveBrackets; // rule N0
1068 var
1069 i, curIndex: Integer;
1070 sameDirection, oppositeDirection, oppositeContext: boolean;
1071 begin
1072 for i := 0 to bracketPairCount-1 do
1073 begin
1074 curIndex := bracketPairs[i].openIndex+1;
1075 sameDirection:= false;
1076 oppositeDirection:= false;
1077 while curIndex <> bracketPairs[i].closeIndex do
1078 begin
1079 Assert((curIndex >= startIndex) and (curIndex < length(a)), 'Expecting valid index');
1080 case a[curIndex].bidiClass of
1081 ubcLeftToRight:
1082 if not rightToLeft then
1083 begin
1084 sameDirection := true;
1085 break;
1086 end else oppositeDirection:= true;
1087 ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
1088 if rightToLeft then
1089 begin
1090 sameDirection := true;
1091 break;
1092 end else oppositeDirection:= true;
1093 end;
1094 curIndex := a[curIndex].nextInIsolate;
1095 end;
1096 if sameDirection then
1097 begin
1098 if rightToLeft then
1099 begin
1100 SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
1101 SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
1102 end else
1103 begin
1104 SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
1105 SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
1106 end;
1107 end else
1108 if oppositeDirection then
1109 begin
1110 curIndex := a[bracketPairs[i].openIndex].prevInIsolate;
1111 oppositeContext := false;
1112 while curIndex >= startIndex do
1113 begin
1114 case a[curIndex].bidiClass of
1115 ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
1116 begin
1117 oppositeContext := not rightToLeft;
1118 break;
1119 end;
1120 ubcLeftToRight:
1121 begin
1122 oppositeContext := rightToLeft;
1123 break;
1124 end;
1125 end;
1126 curIndex := a[curIndex].prevInIsolate;
1127 end;
1128 if rightToLeft xor oppositeContext then
1129 begin
1130 SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
1131 SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
1132 end else
1133 begin
1134 SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
1135 SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
1136 end;
1137 end;
1138 end;
1139 end;
1140
1141 begin
1142 rightToLeft:= startOfSequence in[ubcRightToLeft,ubcArabicLetter];
1143 FindBrackets;
1144 SortBracketPairs;
1145 ResolveBrackets;
1146 end;
1147
1148 procedure AnalyzeSequence(startIndex, afterEndIndex: integer; sos, eos: TUnicodeBidiClass);
1149 begin
1150 if afterEndIndex = startIndex then exit;
1151 ResolveWeakTypes(startIndex, afterEndIndex, sos, eos);
1152 ResolveBrackets(startIndex, afterEndIndex, sos, eos);
1153 ResolveNeutrals(startIndex, afterEndIndex, sos, eos);
1154 end;
1155
1156 procedure SameLevelRuns(startIndex: integer);
1157 var
1158 curBidiLevel: byte;
1159 latestIndex,curIndex, curStartIndex: Integer;
1160 curSos,eos: TUnicodeBidiClass;
1161 begin
1162 curIndex := startIndex;
1163 while (curIndex<>-1) and result[curIndex].IsRemoved do
1164 curIndex := a[curIndex].nextInIsolate;
1165 if curIndex = -1 then exit;
1166
1167 curStartIndex:= curIndex;
1168 curBidiLevel := result[curIndex].bidiLevel;
1169 if odd(curBidiLevel) then curSos := ubcRightToLeft else curSos := ubcLeftToRight;
1170 latestIndex := -1;
1171 while curIndex <> -1 do
1172 begin
1173 if not result[curIndex].IsRemoved then
1174 begin
1175 if (latestIndex <> -1) and (result[curIndex].bidiLevel <> curBidiLevel) then
1176 begin
1177 if result[curIndex].bidiLevel > curBidiLevel then
1178 begin
1179 if odd(result[curIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
1180 end else
1181 begin
1182 if odd(curBidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
1183 end;
1184
1185 AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
1186
1187 curSos := eos;
1188 curBidiLevel:= result[curIndex].bidiLevel;
1189 curStartIndex:= curIndex;
1190 end;
1191 latestIndex := curIndex;
1192 end;
1193
1194 if (a[curIndex].nextInIsolate = -1) and (latestIndex<>-1) then
1195 begin
1196 if odd(result[latestIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
1197 AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
1198 break;
1199 end;
1200
1201 curIndex := a[curIndex].nextInIsolate;
1202 end;
1203 end;
1204
1205 //analyse bidi formatting of an embedding or an override block
1206 procedure AnalyzeFormattingBlocks(startIndex, lastIndex: integer; minBidiLevel: byte; formattingCode: cardinal);
1207 var curIndex, nextIndex, levelIncrease: integer;
1208 subFormatBeforeStart, subFormatStart, formatNesting: integer;
1209 subFormatCode: cardinal;
1210 begin
1211 case formattingCode of
1212 UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
1213 if odd(minBidiLevel) then minBidiLevel += 1;
1214 UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
1215 if not odd(minBidiLevel) then minBidiLevel += 1;
1216 end;
1217 nextIndex := startIndex;
1218 repeat
1219 Assert(nextIndex >= 0, 'Expecting valid index');
1220 curIndex := nextIndex;
1221 nextIndex := a[curIndex].nextInIsolate;
1222 result[curIndex].bidiLevel := minBidiLevel;
1223
1224 //apply override
1225 if formattingCode = UNICODE_LEFT_TO_RIGHT_OVERRIDE then a[curIndex].bidiClass := ubcLeftToRight
1226 else if formattingCode = UNICODE_RIGHT_TO_LEFT_OVERRIDE then a[curIndex].bidiClass := ubcRightToLeft;
1227
1228 case u[curIndex] of
1229 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
1230 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE:
1231 begin
1232 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
1233 case u[curIndex] of
1234 UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
1235 if odd(minBidiLevel) then levelIncrease := 1
1236 else levelIncrease := 2;
1237 UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
1238 if odd(minBidiLevel) then levelIncrease := 2
1239 else levelIncrease := 1;
1240 else levelIncrease:= 2;
1241 end;
1242 if minBidiLevel <= UNICODE_MAX_BIDI_DEPTH-levelIncrease-1 then
1243 begin
1244 subFormatCode:= u[curIndex];
1245 subFormatBeforeStart := curIndex;
1246 subFormatStart := nextIndex;
1247 formatNesting:= 1;
1248 while formatNesting > 0 do
1249 begin
1250 //sub-format ends because no more chars
1251 if curIndex = lastIndex then
1252 begin
1253 if curIndex <> subFormatBeforeStart then
1254 AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
1255 break;
1256 end;
1257
1258 Assert(nextIndex >= 0, 'Expecting valid index');
1259 case u[nextIndex] of
1260 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
1261 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: inc(formatNesting);
1262 UNICODE_POP_DIRECTIONAL_FORMATTING:
1263 begin
1264 dec(formatNesting);
1265 if formatNesting = 0 then
1266 begin
1267 //sub-format ends because enough matching pop chars found
1268 if curIndex <> subFormatBeforeStart then
1269 AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
1270
1271 curIndex := nextIndex;
1272 nextIndex := a[curIndex].nextInIsolate;
1273 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
1274 break;
1275 end;
1276 end;
1277 end;
1278
1279 curIndex := nextIndex;
1280 nextIndex := a[curIndex].nextInIsolate;
1281 end;
1282 end;
1283 end;
1284 UNICODE_POP_DIRECTIONAL_FORMATTING: //ignored when no matching formatting code
1285 begin
1286 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
1287 end;
1288 end;
1289 until curIndex = lastIndex;
1290 end;
1291
1292 procedure ResolveImplicitLevels(startIndex: integer); // rule I1 and I2
1293 var
1294 curIndex: Integer;
1295 begin
1296 curIndex := startIndex;
1297 while curIndex <> -1 do
1298 begin
1299 case a[curIndex].bidiClass of
1300 ubcRightToLeft,ubcArabicLetter:
1301 if not Odd(result[curIndex].bidiLevel) then result[curIndex].bidiLevel += 1;
1302 ubcEuropeanNumber,ubcArabicNumber:
1303 if Odd(result[curIndex].bidiLevel) then result[curIndex].bidiLevel += 1
1304 else result[curIndex].bidiLevel += 2;
1305 ubcLeftToRight: if Odd(result[curIndex].bidiLevel) then result[curIndex].bidiLevel += 1;
1306 end;
1307 curIndex := a[curIndex].nextInIsolate;
1308 end;
1309 end;
1310
1311 procedure ResetEndOfParagraphLevels(startIndex: integer); // rule L1
1312 var
1313 prevIndex,curIndex: Integer;
1314
1315 procedure TweakWhiteSpaceBefore(index: integer);
1316 var
1317 isWhiteSpaceOrIsolate: boolean;
1318 begin
1319 while index <> -1 do
1320 begin
1321 case u[index] of
1322 UNICODE_FIRST_STRONG_ISOLATE, UNICODE_POP_DIRECTIONAL_ISOLATE,
1323 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE:
1324 isWhiteSpaceOrIsolate:= true;
1325 else
1326 isWhiteSpaceOrIsolate:= GetUnicodeBidiClass(u[index]) = ubcWhiteSpace;
1327 end;
1328 if isWhiteSpaceOrIsolate then
1329 result[index].bidiLevel := result[index].ParagraphBidiLevel
1330 else
1331 break;
1332 index := a[index].prevInIsolate;
1333 end;
1334 end;
1335
1336 begin
1337 prevIndex := -1;
1338 curIndex := startIndex;
1339 while curIndex <> -1 do
1340 begin
1341 case GetUnicodeBidiClass(u[curIndex]) of
1342 ubcSegmentSeparator, ubcParagraphSeparator:
1343 begin
1344 result[curIndex].bidiLevel := result[curIndex].ParagraphBidiLevel;
1345 TweakWhiteSpaceBefore(prevIndex);
1346 end;
1347 end;
1348 prevIndex := curIndex;
1349 curIndex := a[curIndex].nextInIsolate;
1350 end;
1351 TweakWhiteSpaceBefore(prevIndex);
1352 end;
1353
1354 function DetermineIsolateDirectionFromFirstStrongClass(startIndex: integer): cardinal;
1355 var
1356 curIndex: Integer;
1357 firstStrongClass: TUnicodeBidiClass;
1358 begin
1359 curIndex := startIndex;
1360 firstStrongClass := ubcUnknown;
1361 while curIndex <> -1 do
1362 begin
1363 Assert(curIndex >= 0, 'Expecting valid index');
1364 if firstStrongClass = ubcUnknown then
1365 begin
1366 if a[curIndex].bidiClass in [ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then
1367 begin
1368 firstStrongClass := a[curIndex].bidiClass;
1369 break;
1370 end;
1371 end;
1372 curIndex := a[curIndex].nextInIsolate;
1373 end;
1374
1375 if firstStrongClass in[ubcRightToLeft,ubcArabicLetter] then
1376 result := UNICODE_RIGHT_TO_LEFT_ISOLATE
1377 else
1378 result := UNICODE_LEFT_TO_RIGHT_ISOLATE;
1379 end;
1380
1381 procedure LinkCharsInIsolate(startIndex: integer; charCount: integer; out endIndex : integer);
1382 var
1383 curIndex,isolateStackPos,
1384 prevIndex: Integer;
1385 begin
1386 a[startIndex].prevInIsolate := -1;
1387 prevIndex := -1;
1388 curIndex := startIndex;
1389 isolateStackPos:= 0;
1390 while curIndex < startIndex+charCount do
1391 begin
1392 if u[curIndex] = UNICODE_POP_DIRECTIONAL_ISOLATE then
1393 if isolateStackPos > 0 then dec(isolateStackPos);
1394
1395 if isolateStackPos = 0 then
1396 begin
1397 if prevIndex<>-1 then a[prevIndex].nextInIsolate := curIndex;
1398 a[curIndex].prevInIsolate := prevIndex;
1399
1400 prevIndex := curIndex;
1401 end;
1402
1403 case u[curIndex] of
1404 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE: inc(isolateStackPos);
1405 end;
1406 inc(curIndex);
1407 end;
1408 a[prevIndex].nextInIsolate := -1;
1409 endIndex := prevIndex;
1410 end;
1411
1412 //split isolates in order to format them independently
1413 procedure AnalyzeIsolates(startIndex: integer; charCount: integer; isolateDirection: cardinal; minBidiLevel: byte = 0;
1414 isParagraph: boolean = false);
1415 var curIndex, endIndex: integer;
1416 nextIndex: integer;
1417 subBidiLevel, levelIncrease: byte;
1418 subIsolateStart: integer;
1419 subIsolateDirection: cardinal;
1420 begin
1421 if charCount = 0 then exit;
1422 Assert(startIndex>=0, 'Invalid start index');
1423
1424 LinkCharsInIsolate(startIndex, charCount, endIndex);
1425
1426 if isolateDirection = UNICODE_FIRST_STRONG_ISOLATE then
1427 isolateDirection := DetermineIsolateDirectionFromFirstStrongClass(startIndex);
1428
1429 case isolateDirection of
1430 UNICODE_LEFT_TO_RIGHT_ISOLATE: if Odd(minBidiLevel) then minBidiLevel += 1;
1431 UNICODE_RIGHT_TO_LEFT_ISOLATE: if not Odd(minBidiLevel) then minBidiLevel += 1;
1432 else
1433 raise EInvalidOperation.Create('Unknown isolate direction');
1434 end;
1435
1436 if isParagraph then
1437 begin
1438 curIndex := startIndex;
1439 while curIndex <> -1 do
1440 begin
1441 result[curIndex].ParagraphBidiLevel := minBidiLevel;
1442 curIndex := a[curIndex].nextInIsolate;
1443 end;
1444 end;
1445
1446 case isolateDirection of
1447 UNICODE_LEFT_TO_RIGHT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_LEFT_TO_RIGHT_EMBEDDING);
1448 UNICODE_RIGHT_TO_LEFT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_RIGHT_TO_LEFT_EMBEDDING);
1449 end;
1450
1451 SameLevelRuns(startIndex);
1452 ResolveImplicitLevels(startIndex);
1453
1454 if isParagraph then
1455 ResetEndOfParagraphLevels(startIndex);
1456
1457 //analyse sub-isolates
1458 curIndex := startIndex;
1459 while curIndex <> -1 do
1460 begin
1461 Assert(curIndex >= 0, 'Expecting valid index');
1462 case u[curIndex] of
1463 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE:
1464 begin
1465 subBidiLevel := result[curIndex].bidiLevel;
1466 nextIndex := a[curIndex].nextInIsolate;
1467 if nextIndex <> -1 then
1468 begin
1469 if result[nextIndex].bidiLevel > subBidiLevel then
1470 subBidiLevel:= result[nextIndex].bidiLevel;
1471 end;
1472 if ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
1473 (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) or
1474 ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
1475 (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) then
1476 levelIncrease := 1
1477 else
1478 levelIncrease:= 2;
1479 if subBidiLevel+levelIncrease <= UNICODE_MAX_BIDI_DEPTH-1 then
1480 begin
1481 subIsolateDirection := u[curIndex];
1482 subIsolateStart:= curIndex+1;
1483 curIndex := nextIndex;
1484
1485 //sub-isolates ends because no more chars
1486 if curIndex = -1 then
1487 begin
1488 AnalyzeIsolates(subIsolateStart, startIndex+charCount-subIsolateStart, subIsolateDirection, subBidiLevel+1);
1489 break;
1490 end else
1491 begin
1492 AnalyzeIsolates(subIsolateStart, curIndex-subIsolateStart, subIsolateDirection, subBidiLevel+1);
1493 continue;
1494 end;
1495 end;
1496 end;
1497 end;
1498 curIndex := a[curIndex].nextInIsolate;
1499 end;
1500 end;
1501
1502 //split UTF8 string into paragraphs
1503 procedure SplitParagraphs;
1504 var
1505 lineStartIndex, curIndex: integer;
1506 begin
1507 curIndex := 0;
1508 lineStartIndex := curIndex;
1509 while curIndex < ALength do
1510 begin
1511 if a[curIndex].bidiClass = ubcParagraphSeparator then
1512 begin
1513 //skip second CRLF char
1514 if ((u[curIndex] = 13) or (u[curIndex] = 10)) and (curIndex+1 < ALength) and
1515 ((u[curIndex+1] = 13) or (u[curIndex+1] = 10)) and (u[curIndex+1] <> u[curIndex]) then
1516 inc(curIndex);
1517
1518 result[curIndex].Flags := result[curIndex].Flags or BIDI_FLAG_END_OF_PARAGRAPH;
1519
1520 AnalyzeIsolates(lineStartIndex, curIndex+1-lineStartIndex, baseDirection, 0, true);
1521 lineStartIndex := curIndex+1;
1522 end;
1523 inc(curIndex);
1524 end;
1525 if curIndex > lineStartIndex then
1526 begin
1527 result[curIndex-1].Flags := result[curIndex-1].Flags or BIDI_FLAG_END_OF_PARAGRAPH;
1528 AnalyzeIsolates(lineStartIndex, curIndex-lineStartIndex, baseDirection, 0, true);
1529 end;
1530 end;
1531
1532var i: integer;
1533begin
1534 setlength(a, ALength);
1535 setlength(result, ALength);
1536 if ALength > 0 then
1537 begin
1538 for i := 0 to high(a) do
1539 begin
1540 a[i].bidiClass := GetUnicodeBidiClass(u[i]);
1541 if u[i] = UNICODE_LINE_SEPARATOR then //line separator within paragraph
1542 result[i].Flags := result[i].Flags or BIDI_FLAG_END_OF_LINE
1543 end;
1544 SplitParagraphs;
1545 end;
1546end;
1547
1548function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder;
1549begin
1550 if length(AInfo)=0 then
1551 result := nil
1552 else
1553 result := GetUnicodeDisplayOrder(@AInfo[0], sizeof(TUnicodeBidiInfo), length(AInfo));
1554end;
1555
1556function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder;
1557
1558 procedure DetermineDisplayOrderRec(AOffset: integer; AStartIndex, ABlockCount: integer; AEmbeddingLevel: byte);
1559 var minLevel: byte;
1560 blockIndex,subStartIndex,subCount, subOffset: integer;
1561 begin
1562 //writeln('DetermineDisplayOrderRec('+inttostr(AOffset)+'/'+inttostr(ACount)+',' + inttostr(AStartIndex) +',*' +inttostr(ABlockCount)+','+inttostr(AEmbeddingLevel)+')');
1563 blockIndex := 0;
1564 subStartIndex := 0; //avoid warning
1565 while blockIndex < ABlockCount do
1566 begin
1567 Assert(AOffset < ACount, 'Offset out of bounds');
1568 if ALevels[AOffset] = AEmbeddingLevel then
1569 begin
1570 if odd(AEmbeddingLevel) then
1571 result[AStartIndex+ABlockCount-1-blockIndex] := AOffset
1572 else
1573 result[AStartIndex+blockIndex] := AOffset;
1574 inc(AOffset);
1575 inc(blockIndex);
1576 end else
1577 begin
1578 if not odd(AEmbeddingLevel) then
1579 subStartIndex := AStartIndex+blockIndex;
1580 subOffset := AOffset;
1581 minLevel := ALevels[AOffset];
1582 inc(AOffset);
1583 inc(blockIndex);
1584 subCount := 1;
1585 while true do
1586 begin
1587 if (blockIndex < ABlockCount) and (ALevels[AOffset] > AEmbeddingLevel) then
1588 begin
1589 Assert(AOffset < ACount, 'Offset out of bounds');
1590 if ALevels[AOffset] < minLevel then
1591 minLevel:= ALevels[AOffset];
1592 inc(AOffset);
1593 inc(blockIndex);
1594 inc(subCount);
1595 end else
1596 begin
1597 if odd(AEmbeddingLevel) then
1598 subStartIndex := AStartIndex+ABlockCount-1-(blockIndex-1);
1599 DetermineDisplayOrderRec(subOffset, subStartIndex, subCount, minLevel);
1600 break;
1601 end;
1602 end;
1603 end;
1604 end;
1605 end;
1606
1607begin
1608 setlength(result, ACount);
1609 DetermineDisplayOrderRec(0, 0, ACount, 0);
1610end;
1611
1612function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder;
1613var
1614 levels: packed array of byte;
1615 originalIndices: array of integer;
1616 index,len, i: integer;
1617 p: PByte;
1618begin
1619 len := 0;
1620 p := PByte(ABidiInfo);
1621 for i := 0 to ACount-1 do
1622 begin
1623 if not PUnicodeBidiInfo(p)^.IsRemoved then inc(len);
1624 inc(p, AStride);
1625 end;
1626 if len = 0 then
1627 result := nil
1628 else
1629 begin
1630 setlength(levels, len);
1631 setlength(originalIndices, len);
1632 p := PByte(ABidiInfo);
1633 index := 0;
1634 for i := 0 to ACount-1 do
1635 begin
1636 if not PUnicodeBidiInfo(p)^.IsRemoved then
1637 begin
1638 levels[index] := PUnicodeBidiInfo(p)^.BidiLevel;
1639 originalIndices[index] := i;
1640 inc(index);
1641 end;
1642 inc(p, AStride);
1643 end;
1644 result := GetUnicodeDisplayOrder(@levels[0], len);
1645 for i := 0 to len-1 do
1646 result[i] := originalIndices[result[i]];
1647 end;
1648end;
1649
1650end.
1651
Note: See TracBrowser for help on using the repository browser.