source: branches/xpascal/Tokenizer.pas

Last change on this file was 233, checked in by chronos, 17 months ago
  • Added: Support for procedures.
  • Added: Project pascal file can be opened from main menu. Last file name is remembered.
  • Modified: Improved XML output of source structure.
File size: 8.0 KB
Line 
1unit Tokenizer;
2
3interface
4
5uses
6 Classes, SysUtils;
7
8type
9 { TTokenizerPos }
10
11 TTokenizerPos = record
12 Index: Integer;
13 Pos: TPoint;
14 procedure Increment;
15 procedure IncrementLine;
16 end;
17
18 TTokenKind = (tkUnknown, tkIdentifier, tkSpecialSymbol, tkNumber, tkString,
19 tkKeyword);
20
21 TToken = record
22 Kind: TTokenKind;
23 Text: string;
24 function Create(Kind: TTokenKind; Text: string): TToken;
25 end;
26
27 TErrorEvent = procedure (Pos: TPoint; Text: string) of object;
28
29 TTokenizerState = (tsNone, tsIdentifier, tsString, tsStringEnd, tsNumber,
30 tsSpecialSymbol, tsLineComment);
31
32 { TTokenizer }
33
34 TTokenizer = class
35 private
36 FOnError: TErrorEvent;
37 State: TTokenizerState;
38 public
39 Pos: TTokenizerPos;
40 Source: string;
41 function IsAlpha(C: Char): Boolean;
42 function IsNumeric(C: Char): Boolean;
43 function IsAlphaNumeric(C: Char): Boolean;
44 function IsWhiteSpace(C: Char): Boolean;
45 function IsSpecialSymbol(C: Char): Boolean;
46 function IsSpecialSymbol2(Text: string): Boolean;
47 function IsIdentifier(Text: string): Boolean;
48 function IsOperator(Text: string): Boolean;
49 function IsKeyword(Text: string): Boolean;
50 procedure Init;
51 function GetNext: TToken;
52 function CheckNext(Text: string; Kind: TTokenKind): Boolean;
53 function CheckNextAndRead(Text: string; Kind: TTokenKind): Boolean;
54 function CheckNextKind(Kind: TTokenKind): Boolean;
55 function CheckNextKindAndRead(Kind: TTokenKind): Boolean;
56 procedure Expect(Text: string; Kind: TTokenKind);
57 procedure Error(Text: string);
58 property OnError: TErrorEvent read FOnError write FOnError;
59 end;
60
61
62implementation
63
64resourcestring
65 SUnknownToken = 'Unknown token %s';
66 SUnsupportedTokenizerState = 'Unsupported tokenizer state.';
67 SExpectedButFound = 'Expected %s but %s found.';
68
69{ TToken }
70
71function TToken.Create(Kind: TTokenKind; Text: string): TToken;
72begin
73 Result.Kind := Kind;
74 Result.Text := Text;
75end;
76
77{ TTokenizerPos }
78
79procedure TTokenizerPos.Increment;
80begin
81 Inc(Index);
82 Inc(Pos.X);
83end;
84
85procedure TTokenizerPos.IncrementLine;
86begin
87 Inc(Pos.Y);
88 Pos.X := 1;
89 Inc(Index);
90end;
91
92{ TTokenizer }
93
94function TTokenizer.IsAlpha(C: Char): Boolean;
95begin
96 Result := (C in ['A'..'Z']) or (C in ['a'..'z'])
97end;
98
99function TTokenizer.IsNumeric(C: Char): Boolean;
100begin
101 Result := C in ['0'..'9'];
102end;
103
104function TTokenizer.IsAlphaNumeric(C: Char): Boolean;
105begin
106 Result := IsAlpha(C) or IsNumeric(C);
107end;
108
109function TTokenizer.IsWhiteSpace(C: Char): Boolean;
110begin
111 Result := (C = ' ') or (C = #9);
112end;
113
114function TTokenizer.IsSpecialSymbol(C: Char): Boolean;
115begin
116 Result := (C = ';') or (C = '.') or (C = '(') or (C = ')') or (C = '=') or
117 (C = ':') or (C = '+') or (C = '-') or (C = ',') or (C = '/') or
118 (C = '<') or (C = '>') or (C = '*');
119end;
120
121function TTokenizer.IsSpecialSymbol2(Text: string): Boolean;
122begin
123 Result := (Text = ':=') or (Text = '//') or (Text = '<>') or (Text = '<=') or
124 (Text = '>=');
125end;
126
127function TTokenizer.IsIdentifier(Text: string): Boolean;
128var
129 I: Integer;
130begin
131 Result := True;
132 for I := 1 to Length(Text) do begin
133 if (I = 1) then begin
134 if not IsAlpha(Text[I]) then begin
135 Result := False;
136 Break;
137 end;
138 end else begin
139 if not IsAlphaNumeric(Text[I]) then begin
140 Result := False;
141 Break;
142 end;
143 end;
144 end;
145end;
146
147function TTokenizer.IsOperator(Text: string): Boolean;
148begin
149 Result := (Text = '+') or (Text = '-') or (Text = '=') or (Text = '<>') or
150 (Text = '*') or (Text = '/') or (Text = 'div') or (Text = '<=') or
151 (Text = '>=') or (Text = 'mod') or (Text = 'shl') or (Text = 'shr') or
152 (Text = 'and') or (Text = 'or') or (Text = 'xor') or (Text = 'not') or
153 (Text = '>') or (Text = '<');
154end;
155
156function TTokenizer.IsKeyword(Text: string): Boolean;
157begin
158 Result := (Text = 'begin') or (Text = 'end') or (Text = 'program') or
159 (Text = 'var') or (Text = 'const') or (Text = 'if') or (Text = 'then') or
160 (Text = 'else') or (Text = 'while') or (Text = 'do') or (Text = 'for') or
161 (Text = 'to') or (Text = 'repeat') or (Text = 'until') or (Text = 'break') or
162 (Text = 'continue') or (Text = 'function') or (Text = 'procedure');
163end;
164
165procedure TTokenizer.Init;
166begin
167 Pos.Index := 1;
168 Pos.Pos := Point(1, 1);
169end;
170
171function TTokenizer.GetNext: TToken;
172var
173 C: Char;
174begin
175 State := tsNone;
176 Result.Text := '';
177 Result.Kind := tkUnknown;
178 while Pos.Index < Length(Source) do begin
179 C := Source[Pos.Index];
180 if State = tsNone then begin
181 if IsAlpha(C) then begin
182 State := tsIdentifier;
183 Result.Kind := tkIdentifier;
184 Result.Text := C;
185 Pos.Increment;
186 end else
187 if IsWhiteSpace(C) then begin
188 Pos.Increment;
189 end else
190 if C = #10 then begin
191 Pos.IncrementLine;
192 end else
193 if IsSpecialSymbol(C) then begin
194 Result.Kind := tkSpecialSymbol;
195 Result.Text := C;
196 Pos.Increment;
197 State := tsSpecialSymbol;
198 end else
199 if C = '''' then begin
200 Pos.Increment;
201 Result.Kind := tkString;
202 State := tsString;
203 end else
204 if IsNumeric(C) then begin
205 State := tsNumber;
206 Result.Kind := tkNumber;
207 Result.Text := C;
208 Pos.Increment;
209 end else begin
210 Error(Format(SUnknownToken, [C]));
211 Break;
212 end;
213 end else
214 if State = tsIdentifier then begin
215 if IsAlphanumeric(C) then begin
216 Result.Text := Result.Text + C;
217 Pos.Increment;
218 end else begin
219 if IsKeyword(Result.Text) then
220 Result.Kind := tkKeyword;
221 Break;
222 end;
223 end else
224 if State = tsNumber then begin
225 if IsNumeric(C) then begin
226 Result.Text := Result.Text + C;
227 Pos.Increment;
228 end else begin
229 Break;
230 end;
231 end else
232 if State = tsString then begin
233 if C = '''' then begin
234 State := tsStringEnd;
235 Pos.Increment;
236 end else begin
237 Result.Text := Result.Text + C;
238 Pos.Increment;
239 end;
240 end else
241 if State = tsStringEnd then begin
242 if C = '''' then begin
243 State := tsString;
244 Result.Text := Result.Text + C;
245 Pos.Increment;
246 end else begin
247 Break;
248 end;
249 end else
250 if State = tsLineComment then begin
251 if C = #10 then begin
252 State := tsNone;
253 end else Pos.Increment;
254 end else
255 if State = tsSpecialSymbol then begin
256 if IsSpecialSymbol2(Result.Text + C) then begin
257 Result.Text := Result.Text + C;
258 Pos.Increment;
259 if Result.Text = '//' then begin
260 Result.Text := '';
261 State := tsLineComment;
262 end else Break;
263 end else begin
264 Break;
265 end;
266 end else
267 raise Exception.Create(SUnsupportedTokenizerState);
268 end;
269end;
270
271function TTokenizer.CheckNext(Text: string; Kind: TTokenKind): Boolean;
272var
273 LastPos: TTokenizerPos;
274 Token: TToken;
275begin
276 LastPos := Pos;
277 Token := GetNext;
278 Result := (Token.Text = Text) and (Token.Kind = Kind);
279 Pos := LastPos;
280end;
281
282function TTokenizer.CheckNextAndRead(Text: string; Kind: TTokenKind): Boolean;
283var
284 LastPos: TTokenizerPos;
285 Token: TToken;
286begin
287 LastPos := Pos;
288 Token := GetNext;
289 Result := (Token.Text = Text) and (Token.Kind = Kind);
290 if not Result then Pos := LastPos;
291end;
292
293function TTokenizer.CheckNextKind(Kind: TTokenKind): Boolean;
294var
295 LastPos: TTokenizerPos;
296 Token: TToken;
297begin
298 LastPos := Pos;
299 Token := GetNext;
300 Result := Token.Kind = Kind;
301 Pos := LastPos;
302end;
303
304function TTokenizer.CheckNextKindAndRead(Kind: TTokenKind): Boolean;
305var
306 LastPos: TTokenizerPos;
307 Token: TToken;
308begin
309 LastPos := Pos;
310 Token := GetNext;
311 Result := Token.Kind = Kind;
312 if not Result then Pos := LastPos;
313end;
314
315procedure TTokenizer.Expect(Text: string; Kind: TTokenKind);
316var
317 Token: TToken;
318begin
319 Token := GetNext;
320 if (Token.Text <> Text) or (Token.Kind <> Kind) then
321 Error(Format(SExpectedButFound, [Text, Token.Text]));
322end;
323
324procedure TTokenizer.Error(Text: string);
325begin
326 if Assigned(OnError) then
327 OnError(Pos.Pos, Text);
328end;
329
330end.
331
Note: See TracBrowser for help on using the repository browser.