source: trunk/Compiler/Analyze/UParser.pas@ 2

Last change on this file since 2 was 2, checked in by george, 11 years ago
File size: 11.7 KB
Line 
1unit UParser;
2
3{$MODE Delphi}
4
5interface
6
7uses
8 SysUtils, Variants, Classes, Contnrs,
9 Dialogs, USourceCode, FileUtil;
10
11type
12 TErrorMessageEvent = procedure(Text: string; Position: TPoint; FileName: string) of object;
13 TDebugLogEvent = procedure(Text: string) of object;
14
15 TParserState = (psNone, psIdentifier, psConstantNumber, psConstantString,
16 psOperator, psEndOfFile, psLineComment, psBlockComment1, psBlockComment2,
17 psUnknown, psWhiteSpace, psConstantStringEnd, psBlockComment1First,
18 psCompilerDirective, psNoneShift, psConstantHexNumber);
19
20 TTokenType = (ttNone, ttIdentifier, ttConstantNumber, ttConstantString,
21 ttOperator, ttEndOfFile, ttLineComment, ttBlockComment1, ttBlockComment2,
22 ttUnknown, ttWhiteSpace, ttCompilerDirective);
23
24 TToken = class
25 Token: string;
26 CodePosition: TPoint;
27 TokenType: TTokenType;
28 end;
29
30 { TBaseParser }
31
32 TBaseParser = class
33 private
34 FFileName: string;
35 FOnDebugLog: TDebugLogEvent;
36 FOnErrorMessage: TErrorMessageEvent;
37 FNextToken: string;
38 FNextTokenType: TTokenType;
39 FParserState: TParserState;
40 PreviousChar: Char;
41 CurrentChar: Char;
42 TokenCodePosition: TPoint;
43 procedure GetNextToken;
44 public
45 ProgramCode: TProgram;
46 CodeStreamPosition: Integer;
47 CodePosition: TPoint;
48 SourceCodeText: TStringList;
49 Tokens: TObjectList; // TObjectList<TToken>
50 TokenIndex: Integer;
51 constructor Create;
52 destructor Destroy; override;
53 function IsAlphanumeric(Character: char): boolean;
54 function IsNumeric(Character: char): boolean;
55 function IsHex(Character: char): boolean;
56 function IsWhiteSpace(Character: char): boolean;
57 function IsAlphabetic(Character: char): boolean;
58 function IsIdentificator(Text: string): boolean;
59 function IsKeyword(Text: string): boolean;
60 function IsString(Text: string): Boolean;
61 function IsOperator(Text: string): boolean;
62 function ReadToken: string;
63 function NextToken: string;
64 function NextTokenType: TTokenType;
65 procedure Expect(Code: string);
66 procedure ErrorMessage(const Text: string; const Arguments: array of const;
67 TokenOffset: Integer = -1);
68 property OnErrorMessage: TErrorMessageEvent read FOnErrorMessage write FOnErrorMessage;
69 property OnDebugLog: TDebugLogEvent read FOnDebugLog write FOnDebugLog;
70 procedure Process;
71 procedure Log(Text: string);
72 property FileName: string read FFileName write FFileName;
73 end;
74
75resourcestring
76 SExpectedButFound = 'Expected "%s" but "%s" found.';
77
78implementation
79
80{ TBaseParser }
81
82procedure TBaseParser.ErrorMessage(const Text: string; const Arguments: array of const;
83 TokenOffset: Integer = -1);
84begin
85 if Assigned(FOnErrorMessage) then
86 if (TokenIndex + TokenOffset) < Tokens.Count then begin
87 FOnErrorMessage(Format(Text, Arguments),
88 TToken(Tokens[TokenIndex + TokenOffset]).CodePosition, FileName);
89 end;
90end;
91
92procedure TBaseParser.Expect(Code: string);
93begin
94 Log('Expect: ' + Code);
95 if NextToken <> Code then begin
96 ErrorMessage(SExpectedButFound, [Code, NextToken], 0);
97
98 // Recovery: try to find nearest same code
99 while (NextToken <> Code) and (NextTokenType <> ttEndOfFile) do
100 ReadToken;
101 end;
102 ReadToken;
103end;
104
105function TBaseParser.IsAlphabetic(Character: char): boolean;
106begin
107 Result := (Character in ['a'..'z']) or (Character in ['A'..'Z']);
108end;
109
110constructor TBaseParser.Create;
111begin
112 Tokens := TObjectList.Create;
113end;
114
115destructor TBaseParser.Destroy;
116begin
117 Tokens.Free;
118 inherited Destroy;
119end;
120
121function TBaseParser.IsAlphanumeric(Character: char): boolean;
122begin
123 Result := IsAlphabetic(Character) or IsNumeric(Character);
124end;
125
126function TBaseParser.IsNumeric(Character: char): boolean;
127begin
128 Result := Character in ['0'..'9'];
129end;
130
131function TBaseParser.IsHex(Character: char): boolean;
132begin
133 Result := IsNumeric(Character) or (Character in ['A'..'F']);
134end;
135
136function TBaseParser.IsKeyword(Text: string): boolean;
137var
138 I: integer;
139begin
140 Result := False;
141 for I := 0 to High(Keywords) do
142 if Keywords[I] = Text then
143 Result := True;
144end;
145
146function TBaseParser.IsString(Text: string): Boolean;
147begin
148 raise Exception.Create('Not implemented');
149end;
150
151function TBaseParser.IsOperator(Text: string): boolean;
152var
153 I: integer;
154begin
155 Result := False;
156 for I := 0 to High(Operators) do
157 if Operators[I] = Text then
158 Result := True;
159end;
160
161function TBaseParser.IsIdentificator(Text: string): boolean;
162var
163 I: integer;
164begin
165 Result := True;
166 if Length(Text) = 0 then
167 Result := False;
168 if IsKeyWord(Text) then
169 Result := False;
170 if Length(Text) > 0 then
171 if not (Text[1] in ['a'..'z', 'A'..'Z', '%', '_']) then
172 Result := False;
173 for I := 2 to Length(Text) do
174 if not (Text[i] in ['a'..'z', 'A'..'Z', '0'..'9', '_']) then
175 Result := False;
176end;
177
178function TBaseParser.IsWhiteSpace(Character: char): boolean;
179begin
180 Result := (Character = ' ') or (Character = #13) or (Character = #10);
181end;
182
183procedure TBaseParser.Process;
184var
185 NewToken: TToken;
186begin
187 CodePosition := Point(0, 1);
188 CurrentChar := #0;
189 PreviousChar := #0;
190 FNextToken := '';
191 FNextTokenType := ttNone;
192 CodeStreamPosition := 1;
193 Tokens.Clear;
194 TokenIndex := 0;
195 while CodeStreamPosition < Length(SourceCodeText.Text) do begin
196 NewToken := TToken.Create;
197 GetNextToken;
198 NewToken.CodePosition := TokenCodePosition;
199 NewToken.TokenType := FNextTokenType;
200 NewToken.Token := FNextToken;
201 Tokens.Add(NewToken);
202 end;
203end;
204
205procedure TBaseParser.Log(Text: string);
206begin
207 if Assigned(FOnDebugLog) then
208 FOnDebugLog(Text);
209end;
210
211procedure TBaseParser.GetNextToken;
212var
213 I: integer;
214 II: integer;
215 J: integer;
216const
217 SpecChar: set of char = [';', '.', ',', ':', '(', ')', '[', ']',
218 '+', '-', '/', '*', '^', '=', '<', '>', '@'];
219 DoubleSpecChar: array[0..6] of string = (':=', '..', '<=', '>=', '<>',
220 '+=', '-=');
221begin
222 FNextToken := '';
223 FNextTokenType := ttNone;
224 FParserState := psNone;
225 with SourceCodeText do
226 while True do begin
227 if CodeStreamPosition < Length(Text) then begin
228 CurrentChar := Text[CodeStreamPosition];
229 end else begin
230 FNextToken := '';
231 FParserState := psEndOfFile;
232 FNextTokenType := ttEndOfFile;
233 Break;
234 end;
235
236 if (FParserState = psNone) or (FParserState = psNoneShift) then begin
237 TokenCodePosition := CodePosition;
238 if IsWhiteSpace(CurrentChar) then
239 FParserState := psWhiteSpace
240 else
241 if CurrentChar = '{' then begin
242 FParserState := psBlockComment1First;
243 end else
244 if CurrentChar = '''' then begin
245 FParserState := psConstantString;
246 end else
247 if CurrentChar = '$' then begin
248 FParserState := psConstantHexNumber;
249 end else
250 if CurrentChar in SpecChar then begin
251 FParserState := psOperator;
252 FNextToken := FNextToken + CurrentChar;
253 end else
254 if IsAlphabetic(CurrentChar) then begin
255 FParserState := psIdentifier;
256 FNextToken := FNextToken + CurrentChar;
257 end else
258 if IsNumeric(CurrentChar) then begin
259 FPArserSTate := psConstantNumber;
260 FNextToken := FNextToken + CurrentChar;
261 end else FParserState := psUnknown;
262 end else
263 if FParserState = psLineComment then begin
264 if (CurrentChar = #13) or (CurrentChar = #10) then
265 FParserState := psNoneShift;
266 end else
267 if FParserState = psBlockComment1First then begin
268 if CurrentChar = '$' then FParserState := psCompilerDirective
269 else FParserSTate := psBlockComment1;
270 end else
271 if FParserState = psBlockComment1 then begin
272 if (CurrentChar = '}') then begin
273 FParserState := psNoneShift;
274 end;
275 end else
276 if FParserState = psCompilerDirective then begin
277 if (CurrentChar = '}') then begin
278 FParserState := psNoneShift;
279 FNextTokenType := ttCompilerDirective;
280 FNextToken := '';
281 //Break;
282 end else FNextToken := FNextToken + CurrentChar;
283 end else
284 if FParserState = psBlockComment2 then begin
285 if (PreviousChar = '*') and (CurrentChar = ')') then
286 FParserState := psNoneShift;
287 end else
288 if FParserState = psConstantString then
289 begin
290 if (CurrentChar = '''') then begin
291 FParserState := psConstantStringEnd;
292 end else FNextToken := FNextToken + CurrentChar;
293 end else
294 if FParserState = psConstantStringEnd then
295 begin
296 if (CurrentChar = '''') then begin
297 FParserState := psConstantString;
298 end else FParserState := psNone;
299 FNextTokenType := ttConstantString;
300 Break;
301 end else
302 if FParserState = psConstantHexNumber then
303 begin
304 if not IsHex(CurrentChar) then begin
305 FParserState := psNone;
306 FNextTokenType := ttConstantNumber;
307 Break;
308 end else FNextToken := FNextToken + CurrentChar;
309 end else
310 if FParserState = psConstantNumber then
311 begin
312 if not IsNumeric(CurrentChar) then begin
313 FParserState := psNone;
314 FNextTokenType := ttConstantNumber;
315 Break;
316 end else FNextToken := FNextToken + CurrentChar;
317 end else
318 if FParserState = psOperator then
319 begin
320 if (CurrentChar = '*') and (PreviousChar = '(') then
321 begin
322 FNextToken := '';
323 FParserState := psBlockComment2;
324 end else
325 if (CurrentChar = '/') and (PreviousChar = '/') then
326 begin
327 FNextToken := '';
328 FParserState := psLineComment;
329 end else
330 if not (CurrentChar in SpecChar) then begin
331 FNextTokenType := ttOperator;
332 Break;
333 end
334 else begin
335 J := 0;
336 while (J < Length(DoubleSpecChar)) and
337 ((PreviousChar + CurrentChar) <> DoubleSpecChar[J]) do
338 Inc(J);
339 if J < Length(DoubleSpecChar) then
340 FNextToken := FNextToken + CurrentChar
341 else begin
342 FNextTokenType := ttOperator;
343 Break;
344 end;
345 end;
346 end else
347 if FParserState = psIdentifier then
348 begin
349 if (not IsAlphanumeric(CurrentChar)) and (CurrentChar <> '_') then begin
350 FNextTokenType := ttIdentifier;
351 Break;
352 end else FNextToken := FNextToken + CurrentChar;
353 end
354 else
355 if FParserState = psWhiteSpace then begin
356 FParserState := psNone;
357 end;
358
359 if FParserState <> psNone then begin
360 // Update cursor position
361 Inc(CodePosition.X);
362 if (CurrentChar = LineEnding) then begin
363 CodePosition.X := 0;
364 Inc(CodePosition.Y);
365 end;
366
367 Inc(CodeStreamPosition);
368 PreviousChar := CurrentChar;
369 end;
370 end;
371end;
372
373function TBaseParser.ReadToken: string;
374begin
375 if TokenIndex < Tokens.Count then begin
376 Result := TToken(Tokens[TokenIndex]).Token;
377 Inc(TokenIndex);
378 end else Result := '';
379 Log('ReadCode: ' + Result);
380end;
381
382function TBaseParser.NextToken: string;
383begin
384 if TokenIndex < Tokens.Count then begin
385 Result := TToken(Tokens[TokenIndex]).Token;
386 end else Result := '';
387 Log('NextToken: ' + Result);
388end;
389
390function TBaseParser.NextTokenType: TTokenType;
391begin
392 if TokenIndex < Tokens.Count then begin
393 Result := TToken(Tokens[TokenIndex]).TokenType;
394 end else Result := ttEndOfFile;
395end;
396
397end.
398
Note: See TracBrowser for help on using the repository browser.