1 | unit UTokenizer;
|
---|
2 |
|
---|
3 | {$mode Delphi}{$H+}
|
---|
4 |
|
---|
5 | interface
|
---|
6 |
|
---|
7 | uses
|
---|
8 | Classes, SysUtils;
|
---|
9 |
|
---|
10 | type
|
---|
11 |
|
---|
12 | TTokenType = (ttIdentifier, ttWhiteSpace, ttString, ttNumber, ttBrackets,
|
---|
13 | ttSymbol);
|
---|
14 |
|
---|
15 | TTokenParseState = (psNone, psString, psStringEnd, psNumber,
|
---|
16 | psIdentifier, psRowComment, psSpecialSymbol, psBlockComment1,
|
---|
17 | psBlockComment2, psBlockComment1End, psWhiteSpace);
|
---|
18 |
|
---|
19 | { TVoidParser }
|
---|
20 |
|
---|
21 | TVoidParser = class
|
---|
22 | private
|
---|
23 | ParseState: TTokenParseState;
|
---|
24 | public
|
---|
25 | TokenValue: string;
|
---|
26 | TokenType: TTokenType;
|
---|
27 | Position: TPoint;
|
---|
28 | TokenStartPosition: TPoint;
|
---|
29 | Source: TStream;
|
---|
30 | procedure Open(ASource: TStream);
|
---|
31 | procedure ParseNextToken;
|
---|
32 | end;
|
---|
33 |
|
---|
34 | implementation
|
---|
35 |
|
---|
36 | { TVoidParser }
|
---|
37 |
|
---|
38 | procedure TVoidParser.Open(ASource: TStream);
|
---|
39 | begin
|
---|
40 | Position := Point(0, 1);
|
---|
41 | Source := ASource;
|
---|
42 | ParseState := psNone;
|
---|
43 | ParseNextToken;
|
---|
44 | end;
|
---|
45 |
|
---|
46 | procedure TVoidParser.ParseNextToken;
|
---|
47 | var
|
---|
48 | Character: Char;
|
---|
49 | begin
|
---|
50 | repeat
|
---|
51 | if Source.Position >= Source.Size then Break;
|
---|
52 | Character := Char(TMemoryStream(Source).ReadByte);
|
---|
53 |
|
---|
54 | if Character = #13 then Position.X := 0
|
---|
55 | else if Character = #10 then Inc(Position.Y)
|
---|
56 | else Inc(Position.X);
|
---|
57 |
|
---|
58 | case ParseState of
|
---|
59 | psNone: begin
|
---|
60 | TokenStartPosition := Position;
|
---|
61 | if Character in [#13, #10, ' '] then begin
|
---|
62 | TokenType := ttWhiteSpace;
|
---|
63 | ParseState := psWhiteSpace;
|
---|
64 | end else
|
---|
65 | if Character = '''' then begin
|
---|
66 | TokenValue := '';
|
---|
67 | ParseState := psString;
|
---|
68 | end else
|
---|
69 | if ((Character >= 'A') and (Character <= 'Z')) or
|
---|
70 | ((Character >= 'a') and (Character <= 'z')) then begin
|
---|
71 | TokenValue := Character;
|
---|
72 | TokenType := ttIdentifier;
|
---|
73 | ParseState := psIdentifier;
|
---|
74 | end else
|
---|
75 | if ((Character >= '0') and (Character <= '9')) then begin
|
---|
76 | TokenType := ttNumber;;
|
---|
77 | TokenValue := Character;
|
---|
78 | ParseState := psNumber;
|
---|
79 | end else
|
---|
80 | if Character = '{' then begin
|
---|
81 | ParseState := psBlockComment2;
|
---|
82 | end else
|
---|
83 | if Character in ['/', '(', ')', '+', '-', '*', ':', ';', ','] then begin
|
---|
84 | TokenValue := Character;
|
---|
85 | ParseState := psSpecialSymbol;
|
---|
86 | end;
|
---|
87 | end;
|
---|
88 | psNumber: begin
|
---|
89 | if (Character >= '0') and (Character <= '9') then
|
---|
90 | TokenValue := TokenValue + Character else
|
---|
91 | begin
|
---|
92 | ParseState := psNone;
|
---|
93 | Source.Position := Source.Position - 1;
|
---|
94 | Dec(Position.X);
|
---|
95 | Break;
|
---|
96 | end;
|
---|
97 | end;
|
---|
98 | psRowComment: begin
|
---|
99 | if Character in [#13, #10] then begin
|
---|
100 | ParseState := psNone;
|
---|
101 | Source.Position := Source.Position - 1;
|
---|
102 | Dec(Position.X);
|
---|
103 | end;
|
---|
104 | end;
|
---|
105 | psWhiteSpace: begin
|
---|
106 | if Character > ' ' then begin
|
---|
107 | ParseState := psNone;
|
---|
108 | TokenValue := '';
|
---|
109 | TokenType := ttWhiteSpace;
|
---|
110 | Source.Position := Source.Position - 1;
|
---|
111 | Dec(Position.X);
|
---|
112 | Break;
|
---|
113 | end;
|
---|
114 | end;
|
---|
115 | psSpecialSymbol: begin
|
---|
116 | if (TokenValue = '/') and (Character = '/') then begin
|
---|
117 | ParseState := psRowComment;
|
---|
118 | end else
|
---|
119 | if (TokenValue = '(') and (Character = '*') then begin
|
---|
120 | ParseState := psBlockComment1;
|
---|
121 | end else
|
---|
122 | if ((TokenValue = '>') and (Character = '=')) or
|
---|
123 | ((TokenValue = '<') and (Character = '=')) or
|
---|
124 | ((TokenValue = '<') and (Character = '>')) or
|
---|
125 | ((TokenValue = ':') and (Character = '=')) then begin
|
---|
126 | TokenValue := TokenValue + Character;
|
---|
127 | TokenType := ttSymbol;
|
---|
128 | ParseState := psNone;
|
---|
129 | Break;
|
---|
130 | end else begin
|
---|
131 | TokenType := ttSymbol;
|
---|
132 | ParseState := psNone;
|
---|
133 | Source.Position := Source.Position - 1;
|
---|
134 | Dec(Position.X);
|
---|
135 | Break;
|
---|
136 | end;
|
---|
137 | end;
|
---|
138 | psBlockComment1: begin
|
---|
139 | if Character = '*' then begin
|
---|
140 | ParseState := psBlockComment1End;
|
---|
141 | end;
|
---|
142 | end;
|
---|
143 | psBlockComment1End: begin
|
---|
144 | if Character = ')' then begin
|
---|
145 | ParseState := psNone;
|
---|
146 | end else begin
|
---|
147 | ParseState := psBlockComment1;
|
---|
148 | Source.Position := Source.Position - 1;
|
---|
149 | Dec(Position.X);
|
---|
150 | end;
|
---|
151 | end;
|
---|
152 | psBlockComment2: begin
|
---|
153 | if Character = '}' then
|
---|
154 | ParseState := psNone;
|
---|
155 | end;
|
---|
156 | psString: begin
|
---|
157 | if Character = '''' then begin
|
---|
158 | ParseState := psStringEnd;
|
---|
159 | end else TokenValue := TokenValue + Character;
|
---|
160 | end;
|
---|
161 | psStringEnd: begin
|
---|
162 | if Character = '''' then begin
|
---|
163 | TokenValue := TokenValue + Character;
|
---|
164 | ParseState := psString;
|
---|
165 | end else begin
|
---|
166 | TokenType := ttString;
|
---|
167 | ParseState := psNone;
|
---|
168 | Source.Position := Source.Position - 1;
|
---|
169 | Dec(Position.X);
|
---|
170 | Break;
|
---|
171 | end;
|
---|
172 | end;
|
---|
173 | psIdentifier: begin
|
---|
174 | if ((Character >= 'A') and (Character <= 'Z')) or
|
---|
175 | ((Character >= 'a') and (Character <= 'z')) or
|
---|
176 | ((Character >= '0') and (Character <= '9')) then begin
|
---|
177 | TokenValue := TokenValue + Character;
|
---|
178 | end else begin
|
---|
179 | TokenType := ttIdentifier;
|
---|
180 | ParseState := psNone;
|
---|
181 | Source.Position := Source.Position - 1;
|
---|
182 | Dec(Position.X);
|
---|
183 | Break;
|
---|
184 | end;
|
---|
185 | end;
|
---|
186 | end;
|
---|
187 | until False;
|
---|
188 | end;
|
---|
189 |
|
---|
190 | end.
|
---|