source: trunk/Compiler/Analyzer.pas

Last change on this file was 75, checked in by chronos, 6 months ago
  • Modified: Removed U prefix from unit names.
  • Modified: Updated Common package.
File size: 12.4 KB
Line 
1unit Analyzer;
2
3interface
4
5uses
6 SysUtils, Variants, Classes, Dialogs, SourceCodePascal, FileUtil,
7 Generics.Collections;
8
9type
10 TErrorMessageEvent = procedure(Text: string; Position: TPoint; FileName: string) of object;
11 TDebugLogEvent = procedure(Text: string) of object;
12
13 TParserState = (psNone, psIdentifier, psConstantNumber, psConstantString,
14 psOperator, psEndOfFile, psLineComment, psBlockComment1, psBlockComment2,
15 psUnknown, psWhiteSpace, psConstantStringEnd, psBlockComment1First,
16 psCompilerDirective, psNoneShift, psConstantHexNumber);
17
18 TTokenType = (ttNone, ttIdentifier, ttConstantNumber, ttConstantString,
19 ttOperator, ttEndOfFile, ttLineComment, ttBlockComment1, ttBlockComment2,
20 ttUnknown, ttWhiteSpace, ttCompilerDirective);
21
22 TToken = class
23 Token: string;
24 CodePosition: TPoint;
25 TokenType: TTokenType;
26 end;
27
28 TGetSourceEvent = function (Name: string; var SourceCode: string): Boolean of object;
29
30 { TAnalyzer }
31
32 TAnalyzer = class
33 private
34 FFileName: string;
35 FOnDebugLog: TDebugLogEvent;
36 FOnErrorMessage: TErrorMessageEvent;
37 FNextToken: string;
38 FNextTokenType: TTokenType;
39 FOnGetSource: TGetSourceEvent;
40 FParserState: TParserState;
41 PreviousChar: Char;
42 CurrentChar: Char;
43 TokenCodePosition: TPoint;
44 LineEndingChar: Char;
45 procedure GetNextToken;
46 public
47 SysName: string;
48 Name: string;
49 ProgramCode: TProgram;
50 CodeStreamPosition: Integer;
51 CodePosition: TPoint;
52 SourceCode2: string;
53 Tokens: TObjectList<TToken>;
54 TokenIndex: Integer;
55 constructor Create;
56 destructor Destroy; override;
57 function IsAlphanumeric(Character: char): boolean;
58 function IsNumeric(Character: char): boolean;
59 function IsHex(Character: char): boolean;
60 function IsWhiteSpace(Character: char): boolean;
61 function IsAlphabetic(Character: char): boolean;
62 function IsIdentificator(Text: string): boolean;
63 function IsKeyword(Text: string): boolean;
64 function IsString(Text: string): Boolean;
65 function IsOperator(Text: string): boolean;
66 function ReadToken: string;
67 function NextToken: string;
68 function NextTokenType: TTokenType;
69 procedure Expect(Code: string);
70 procedure ErrorMessage(const Text: string; const Arguments: array of const;
71 TokenOffset: Integer = -1);
72 property OnErrorMessage: TErrorMessageEvent read FOnErrorMessage write FOnErrorMessage;
73 property OnDebugLog: TDebugLogEvent read FOnDebugLog write FOnDebugLog;
74 procedure Process;
75 procedure Log(Text: string);
76 function ParseModule(ProgramCode: TProgram): TSourceModule; virtual; abstract;
77 property FileName: string read FFileName write FFileName;
78 property OnGetSource: TGetSourceEvent read FOnGetSource
79 write FOnGetSource;
80 end;
81
82 { TAnalyzers }
83
84 TAnalyzers = class(TObjectList<TAnalyzer>)
85 function SearchBySysName(Name: string): TAnalyzer;
86 procedure LoadToStrings(Strings: TStrings);
87 end;
88
89resourcestring
90 SExpectedButFound = 'Expected "%s" but "%s" found.';
91
92
93implementation
94
95{ TAnalyzers }
96
97function TAnalyzers.SearchBySysName(Name: string): TAnalyzer;
98var
99 I: Integer;
100begin
101 I := 0;
102 while (I < Count) and (TAnalyzer(Items[I]).SysName <> Name) do Inc(I);
103 if I < Count then Result := TAnalyzer(Items[I])
104 else Result := nil;
105end;
106
107procedure TAnalyzers.LoadToStrings(Strings: TStrings);
108var
109 I: Integer;
110begin
111 try
112 Strings.BeginUpdate;
113 Strings.Clear;
114 for I := 0 to Count - 1 do
115 Strings.AddObject(TAnalyzer(Items[I]).Name, Items[I]);
116 finally
117 Strings.EndUpdate;
118 end;
119end;
120
121{ TAnalyzer }
122
123procedure TAnalyzer.ErrorMessage(const Text: string; const Arguments: array of const;
124 TokenOffset: Integer = -1);
125begin
126 if Assigned(FOnErrorMessage) then
127 if (TokenIndex + TokenOffset) < Tokens.Count then begin
128 FOnErrorMessage(Format(Text, Arguments),
129 TToken(Tokens[TokenIndex + TokenOffset]).CodePosition, FileName);
130 Log('Error: ' + Format(Text, Arguments));
131 end;
132end;
133
134procedure TAnalyzer.Expect(Code: string);
135begin
136 Log('Expect: ' + Code);
137 if NextToken <> Code then begin
138 ErrorMessage(SExpectedButFound, [Code, NextToken], 0);
139
140 // Recovery: try to find nearest same code
141 while (NextToken <> Code) and (NextTokenType <> ttEndOfFile) do
142 ReadToken;
143 end;
144 ReadToken;
145end;
146
147function TAnalyzer.IsAlphabetic(Character: char): boolean;
148begin
149 Result := (Character in ['a'..'z']) or (Character in ['A'..'Z']);
150end;
151
152constructor TAnalyzer.Create;
153begin
154 Tokens := TObjectList<TToken>.Create;
155 {$IFDEF windows}
156 LineEndingChar := LineEnding[1];
157 {$ELSE}
158 LineEndingChar := LineEnding;
159 {$ENDIF}
160end;
161
162destructor TAnalyzer.Destroy;
163begin
164 FreeAndNil(Tokens);
165 inherited;
166end;
167
168function TAnalyzer.IsAlphanumeric(Character: char): boolean;
169begin
170 Result := IsAlphabetic(Character) or IsNumeric(Character);
171end;
172
173function TAnalyzer.IsNumeric(Character: char): boolean;
174begin
175 Result := Character in ['0'..'9'];
176end;
177
178function TAnalyzer.IsHex(Character: char): boolean;
179begin
180 Result := IsNumeric(Character) or (Character in ['A'..'F']);
181end;
182
183function TAnalyzer.IsKeyword(Text: string): boolean;
184var
185 I: integer;
186begin
187 Result := False;
188 for I := 0 to High(Keywords) do
189 if Keywords[I] = Text then
190 Result := True;
191end;
192
193function TAnalyzer.IsString(Text: string): Boolean;
194begin
195 raise Exception.Create('Not implemented');
196end;
197
198function TAnalyzer.IsOperator(Text: string): boolean;
199var
200 I: integer;
201begin
202 Result := False;
203 for I := 0 to High(Operators) do
204 if Operators[I] = Text then
205 Result := True;
206end;
207
208function TAnalyzer.IsIdentificator(Text: string): boolean;
209var
210 I: integer;
211begin
212 Result := True;
213 if Length(Text) = 0 then
214 Result := False;
215 if IsKeyWord(Text) then
216 Result := False;
217 if Length(Text) > 0 then
218 if not (Text[1] in ['a'..'z', 'A'..'Z', '%', '_']) then
219 Result := False;
220 for I := 2 to Length(Text) do
221 if not (Text[i] in ['a'..'z', 'A'..'Z', '0'..'9', '_']) then
222 Result := False;
223end;
224
225function TAnalyzer.IsWhiteSpace(Character: char): boolean;
226begin
227 Result := (Character = ' ') or (Character = #13) or (Character = #10);
228end;
229
230procedure TAnalyzer.Process;
231var
232 NewToken: TToken;
233begin
234 CodePosition := Point(0, 1);
235 CurrentChar := #0;
236 PreviousChar := #0;
237 FNextToken := '';
238 FNextTokenType := ttNone;
239 CodeStreamPosition := 1;
240 Tokens.Clear;
241 TokenIndex := 0;
242 while CodeStreamPosition < Length(SourceCode2) do begin
243 NewToken := TToken.Create;
244 GetNextToken;
245 NewToken.CodePosition := TokenCodePosition;
246 NewToken.TokenType := FNextTokenType;
247 NewToken.Token := FNextToken;
248 Tokens.Add(NewToken);
249 end;
250end;
251
252procedure TAnalyzer.Log(Text: string);
253begin
254 if Assigned(FOnDebugLog) then
255 FOnDebugLog(Text);
256end;
257
258procedure TAnalyzer.GetNextToken;
259var
260 I: integer;
261 II: integer;
262 J: integer;
263const
264 SpecChar: set of char = [';', '.', ',', ':', '(', ')', '[', ']',
265 '+', '-', '/', '*', '^', '=', '<', '>', '@'];
266 DoubleSpecChar: array[0..6] of string = (':=', '..', '<=', '>=', '<>',
267 '+=', '-=');
268begin
269 FNextToken := '';
270 FNextTokenType := ttNone;
271 FParserState := psNone;
272
273 while True do begin
274 if CodeStreamPosition < Length(SourceCode2) then begin
275 CurrentChar := SourceCode2[CodeStreamPosition];
276 end else begin
277 FNextToken := '';
278 FParserState := psEndOfFile;
279 FNextTokenType := ttEndOfFile;
280 Break;
281 end;
282
283 if (FParserState = psNone) or (FParserState = psNoneShift) then begin
284 TokenCodePosition := CodePosition;
285 if IsWhiteSpace(CurrentChar) then
286 FParserState := psWhiteSpace
287 else
288 if CurrentChar = '{' then begin
289 FParserState := psBlockComment1First;
290 end else
291 if CurrentChar = '''' then begin
292 FParserState := psConstantString;
293 end else
294 if CurrentChar = '$' then begin
295 FParserState := psConstantHexNumber;
296 end else
297 if CurrentChar in SpecChar then begin
298 FParserState := psOperator;
299 FNextToken := FNextToken + CurrentChar;
300 end else
301 if IsAlphabetic(CurrentChar) then begin
302 FParserState := psIdentifier;
303 FNextToken := FNextToken + CurrentChar;
304 end else
305 if IsNumeric(CurrentChar) then begin
306 FPArserSTate := psConstantNumber;
307 FNextToken := FNextToken + CurrentChar;
308 end else FParserState := psUnknown;
309 end else
310 if FParserState = psLineComment then begin
311 if (CurrentChar = #13) or (CurrentChar = #10) then
312 FParserState := psNoneShift;
313 end else
314 if FParserState = psBlockComment1First then begin
315 if CurrentChar = '$' then FParserState := psCompilerDirective
316 else FParserSTate := psBlockComment1;
317 end else
318 if FParserState = psBlockComment1 then begin
319 if (CurrentChar = '}') then begin
320 FParserState := psNoneShift;
321 end;
322 end else
323 if FParserState = psCompilerDirective then begin
324 if (CurrentChar = '}') then begin
325 FParserState := psNoneShift;
326 FNextTokenType := ttCompilerDirective;
327 FNextToken := '';
328 //Break;
329 end else FNextToken := FNextToken + CurrentChar;
330 end else
331 if FParserState = psBlockComment2 then begin
332 if (PreviousChar = '*') and (CurrentChar = ')') then
333 FParserState := psNoneShift;
334 end else
335 if FParserState = psConstantString then
336 begin
337 if (CurrentChar = '''') then begin
338 FParserState := psConstantStringEnd;
339 end else FNextToken := FNextToken + CurrentChar;
340 end else
341 if FParserState = psConstantStringEnd then
342 begin
343 if (CurrentChar = '''') then begin
344 FParserState := psConstantString;
345 end else FParserState := psNone;
346 FNextTokenType := ttConstantString;
347 Break;
348 end else
349 if FParserState = psConstantHexNumber then
350 begin
351 if not IsHex(CurrentChar) then begin
352 FParserState := psNone;
353 FNextTokenType := ttConstantNumber;
354 Break;
355 end else FNextToken := FNextToken + CurrentChar;
356 end else
357 if FParserState = psConstantNumber then
358 begin
359 if not IsNumeric(CurrentChar) then begin
360 FParserState := psNone;
361 FNextTokenType := ttConstantNumber;
362 Break;
363 end else FNextToken := FNextToken + CurrentChar;
364 end else
365 if FParserState = psOperator then
366 begin
367 if (CurrentChar = '*') and (PreviousChar = '(') then
368 begin
369 FNextToken := '';
370 FParserState := psBlockComment2;
371 end else
372 if (CurrentChar = '/') and (PreviousChar = '/') then
373 begin
374 FNextToken := '';
375 FParserState := psLineComment;
376 end else
377 if not (CurrentChar in SpecChar) then begin
378 FNextTokenType := ttOperator;
379 Break;
380 end
381 else begin
382 J := 0;
383 while (J < Length(DoubleSpecChar)) and
384 ((PreviousChar + CurrentChar) <> DoubleSpecChar[J]) do
385 Inc(J);
386 if J < Length(DoubleSpecChar) then
387 FNextToken := FNextToken + CurrentChar
388 else begin
389 FNextTokenType := ttOperator;
390 Break;
391 end;
392 end;
393 end else
394 if FParserState = psIdentifier then
395 begin
396 if (not IsAlphanumeric(CurrentChar)) and (CurrentChar <> '_') then begin
397 FNextTokenType := ttIdentifier;
398 Break;
399 end else FNextToken := FNextToken + CurrentChar;
400 end
401 else
402 if FParserState = psWhiteSpace then begin
403 FParserState := psNone;
404 end;
405
406 if FParserState <> psNone then begin
407 // Update cursor position
408 Inc(CodePosition.X);
409 if (CurrentChar = LineEndingChar) then begin
410 CodePosition.X := 0;
411 Inc(CodePosition.Y);
412 end;
413
414 Inc(CodeStreamPosition);
415 PreviousChar := CurrentChar;
416 end;
417 end;
418end;
419
420function TAnalyzer.ReadToken: string;
421begin
422 if TokenIndex < Tokens.Count then begin
423 Result := TToken(Tokens[TokenIndex]).Token;
424 Inc(TokenIndex);
425 end else Result := '';
426 Log('ReadCode: ' + Result);
427end;
428
429function TAnalyzer.NextToken: string;
430begin
431 if TokenIndex < Tokens.Count then begin
432 Result := TToken(Tokens[TokenIndex]).Token;
433 end else Result := '';
434 Log('NextToken: ' + Result);
435end;
436
437function TAnalyzer.NextTokenType: TTokenType;
438begin
439 if TokenIndex < Tokens.Count then begin
440 Result := TToken(Tokens[TokenIndex]).TokenType;
441 end else Result := ttEndOfFile;
442end;
443
444end.
445
Note: See TracBrowser for help on using the repository browser.