1 | unit Analyzer;
|
---|
2 |
|
---|
3 | interface
|
---|
4 |
|
---|
5 | uses
|
---|
6 | SysUtils, Variants, Classes, Dialogs, SourceCodePascal, FileUtil,
|
---|
7 | Generics.Collections;
|
---|
8 |
|
---|
9 | type
|
---|
10 | TErrorMessageEvent = procedure(Text: string; Position: TPoint; FileName: string) of object;
|
---|
11 | TDebugLogEvent = procedure(Text: string) of object;
|
---|
12 |
|
---|
13 | TParserState = (psNone, psIdentifier, psConstantNumber, psConstantString,
|
---|
14 | psOperator, psEndOfFile, psLineComment, psBlockComment1, psBlockComment2,
|
---|
15 | psUnknown, psWhiteSpace, psConstantStringEnd, psBlockComment1First,
|
---|
16 | psCompilerDirective, psNoneShift, psConstantHexNumber);
|
---|
17 |
|
---|
18 | TTokenType = (ttNone, ttIdentifier, ttConstantNumber, ttConstantString,
|
---|
19 | ttOperator, ttEndOfFile, ttLineComment, ttBlockComment1, ttBlockComment2,
|
---|
20 | ttUnknown, ttWhiteSpace, ttCompilerDirective);
|
---|
21 |
|
---|
22 | TToken = class
|
---|
23 | Token: string;
|
---|
24 | CodePosition: TPoint;
|
---|
25 | TokenType: TTokenType;
|
---|
26 | end;
|
---|
27 |
|
---|
28 | TGetSourceEvent = function (Name: string; var SourceCode: string): Boolean of object;
|
---|
29 |
|
---|
30 | { TAnalyzer }
|
---|
31 |
|
---|
32 | TAnalyzer = class
|
---|
33 | private
|
---|
34 | FFileName: string;
|
---|
35 | FOnDebugLog: TDebugLogEvent;
|
---|
36 | FOnErrorMessage: TErrorMessageEvent;
|
---|
37 | FNextToken: string;
|
---|
38 | FNextTokenType: TTokenType;
|
---|
39 | FOnGetSource: TGetSourceEvent;
|
---|
40 | FParserState: TParserState;
|
---|
41 | PreviousChar: Char;
|
---|
42 | CurrentChar: Char;
|
---|
43 | TokenCodePosition: TPoint;
|
---|
44 | LineEndingChar: Char;
|
---|
45 | procedure GetNextToken;
|
---|
46 | public
|
---|
47 | SysName: string;
|
---|
48 | Name: string;
|
---|
49 | ProgramCode: TProgram;
|
---|
50 | CodeStreamPosition: Integer;
|
---|
51 | CodePosition: TPoint;
|
---|
52 | SourceCode2: string;
|
---|
53 | Tokens: TObjectList<TToken>;
|
---|
54 | TokenIndex: Integer;
|
---|
55 | constructor Create;
|
---|
56 | destructor Destroy; override;
|
---|
57 | function IsAlphanumeric(Character: char): boolean;
|
---|
58 | function IsNumeric(Character: char): boolean;
|
---|
59 | function IsHex(Character: char): boolean;
|
---|
60 | function IsWhiteSpace(Character: char): boolean;
|
---|
61 | function IsAlphabetic(Character: char): boolean;
|
---|
62 | function IsIdentificator(Text: string): boolean;
|
---|
63 | function IsKeyword(Text: string): boolean;
|
---|
64 | function IsString(Text: string): Boolean;
|
---|
65 | function IsOperator(Text: string): boolean;
|
---|
66 | function ReadToken: string;
|
---|
67 | function NextToken: string;
|
---|
68 | function NextTokenType: TTokenType;
|
---|
69 | procedure Expect(Code: string);
|
---|
70 | procedure ErrorMessage(const Text: string; const Arguments: array of const;
|
---|
71 | TokenOffset: Integer = -1);
|
---|
72 | property OnErrorMessage: TErrorMessageEvent read FOnErrorMessage write FOnErrorMessage;
|
---|
73 | property OnDebugLog: TDebugLogEvent read FOnDebugLog write FOnDebugLog;
|
---|
74 | procedure Process;
|
---|
75 | procedure Log(Text: string);
|
---|
76 | function ParseModule(ProgramCode: TProgram): TSourceModule; virtual; abstract;
|
---|
77 | property FileName: string read FFileName write FFileName;
|
---|
78 | property OnGetSource: TGetSourceEvent read FOnGetSource
|
---|
79 | write FOnGetSource;
|
---|
80 | end;
|
---|
81 |
|
---|
82 | { TAnalyzers }
|
---|
83 |
|
---|
84 | TAnalyzers = class(TObjectList<TAnalyzer>)
|
---|
85 | function SearchBySysName(Name: string): TAnalyzer;
|
---|
86 | procedure LoadToStrings(Strings: TStrings);
|
---|
87 | end;
|
---|
88 |
|
---|
89 | resourcestring
|
---|
90 | SExpectedButFound = 'Expected "%s" but "%s" found.';
|
---|
91 |
|
---|
92 |
|
---|
93 | implementation
|
---|
94 |
|
---|
95 | { TAnalyzers }
|
---|
96 |
|
---|
97 | function TAnalyzers.SearchBySysName(Name: string): TAnalyzer;
|
---|
98 | var
|
---|
99 | I: Integer;
|
---|
100 | begin
|
---|
101 | I := 0;
|
---|
102 | while (I < Count) and (TAnalyzer(Items[I]).SysName <> Name) do Inc(I);
|
---|
103 | if I < Count then Result := TAnalyzer(Items[I])
|
---|
104 | else Result := nil;
|
---|
105 | end;
|
---|
106 |
|
---|
107 | procedure TAnalyzers.LoadToStrings(Strings: TStrings);
|
---|
108 | var
|
---|
109 | I: Integer;
|
---|
110 | begin
|
---|
111 | try
|
---|
112 | Strings.BeginUpdate;
|
---|
113 | Strings.Clear;
|
---|
114 | for I := 0 to Count - 1 do
|
---|
115 | Strings.AddObject(TAnalyzer(Items[I]).Name, Items[I]);
|
---|
116 | finally
|
---|
117 | Strings.EndUpdate;
|
---|
118 | end;
|
---|
119 | end;
|
---|
120 |
|
---|
121 | { TAnalyzer }
|
---|
122 |
|
---|
123 | procedure TAnalyzer.ErrorMessage(const Text: string; const Arguments: array of const;
|
---|
124 | TokenOffset: Integer = -1);
|
---|
125 | begin
|
---|
126 | if Assigned(FOnErrorMessage) then
|
---|
127 | if (TokenIndex + TokenOffset) < Tokens.Count then begin
|
---|
128 | FOnErrorMessage(Format(Text, Arguments),
|
---|
129 | TToken(Tokens[TokenIndex + TokenOffset]).CodePosition, FileName);
|
---|
130 | Log('Error: ' + Format(Text, Arguments));
|
---|
131 | end;
|
---|
132 | end;
|
---|
133 |
|
---|
134 | procedure TAnalyzer.Expect(Code: string);
|
---|
135 | begin
|
---|
136 | Log('Expect: ' + Code);
|
---|
137 | if NextToken <> Code then begin
|
---|
138 | ErrorMessage(SExpectedButFound, [Code, NextToken], 0);
|
---|
139 |
|
---|
140 | // Recovery: try to find nearest same code
|
---|
141 | while (NextToken <> Code) and (NextTokenType <> ttEndOfFile) do
|
---|
142 | ReadToken;
|
---|
143 | end;
|
---|
144 | ReadToken;
|
---|
145 | end;
|
---|
146 |
|
---|
147 | function TAnalyzer.IsAlphabetic(Character: char): boolean;
|
---|
148 | begin
|
---|
149 | Result := (Character in ['a'..'z']) or (Character in ['A'..'Z']);
|
---|
150 | end;
|
---|
151 |
|
---|
152 | constructor TAnalyzer.Create;
|
---|
153 | begin
|
---|
154 | Tokens := TObjectList<TToken>.Create;
|
---|
155 | {$IFDEF windows}
|
---|
156 | LineEndingChar := LineEnding[1];
|
---|
157 | {$ELSE}
|
---|
158 | LineEndingChar := LineEnding;
|
---|
159 | {$ENDIF}
|
---|
160 | end;
|
---|
161 |
|
---|
162 | destructor TAnalyzer.Destroy;
|
---|
163 | begin
|
---|
164 | FreeAndNil(Tokens);
|
---|
165 | inherited;
|
---|
166 | end;
|
---|
167 |
|
---|
168 | function TAnalyzer.IsAlphanumeric(Character: char): boolean;
|
---|
169 | begin
|
---|
170 | Result := IsAlphabetic(Character) or IsNumeric(Character);
|
---|
171 | end;
|
---|
172 |
|
---|
173 | function TAnalyzer.IsNumeric(Character: char): boolean;
|
---|
174 | begin
|
---|
175 | Result := Character in ['0'..'9'];
|
---|
176 | end;
|
---|
177 |
|
---|
178 | function TAnalyzer.IsHex(Character: char): boolean;
|
---|
179 | begin
|
---|
180 | Result := IsNumeric(Character) or (Character in ['A'..'F']);
|
---|
181 | end;
|
---|
182 |
|
---|
183 | function TAnalyzer.IsKeyword(Text: string): boolean;
|
---|
184 | var
|
---|
185 | I: integer;
|
---|
186 | begin
|
---|
187 | Result := False;
|
---|
188 | for I := 0 to High(Keywords) do
|
---|
189 | if Keywords[I] = Text then
|
---|
190 | Result := True;
|
---|
191 | end;
|
---|
192 |
|
---|
193 | function TAnalyzer.IsString(Text: string): Boolean;
|
---|
194 | begin
|
---|
195 | raise Exception.Create('Not implemented');
|
---|
196 | end;
|
---|
197 |
|
---|
198 | function TAnalyzer.IsOperator(Text: string): boolean;
|
---|
199 | var
|
---|
200 | I: integer;
|
---|
201 | begin
|
---|
202 | Result := False;
|
---|
203 | for I := 0 to High(Operators) do
|
---|
204 | if Operators[I] = Text then
|
---|
205 | Result := True;
|
---|
206 | end;
|
---|
207 |
|
---|
208 | function TAnalyzer.IsIdentificator(Text: string): boolean;
|
---|
209 | var
|
---|
210 | I: integer;
|
---|
211 | begin
|
---|
212 | Result := True;
|
---|
213 | if Length(Text) = 0 then
|
---|
214 | Result := False;
|
---|
215 | if IsKeyWord(Text) then
|
---|
216 | Result := False;
|
---|
217 | if Length(Text) > 0 then
|
---|
218 | if not (Text[1] in ['a'..'z', 'A'..'Z', '%', '_']) then
|
---|
219 | Result := False;
|
---|
220 | for I := 2 to Length(Text) do
|
---|
221 | if not (Text[i] in ['a'..'z', 'A'..'Z', '0'..'9', '_']) then
|
---|
222 | Result := False;
|
---|
223 | end;
|
---|
224 |
|
---|
225 | function TAnalyzer.IsWhiteSpace(Character: char): boolean;
|
---|
226 | begin
|
---|
227 | Result := (Character = ' ') or (Character = #13) or (Character = #10);
|
---|
228 | end;
|
---|
229 |
|
---|
230 | procedure TAnalyzer.Process;
|
---|
231 | var
|
---|
232 | NewToken: TToken;
|
---|
233 | begin
|
---|
234 | CodePosition := Point(0, 1);
|
---|
235 | CurrentChar := #0;
|
---|
236 | PreviousChar := #0;
|
---|
237 | FNextToken := '';
|
---|
238 | FNextTokenType := ttNone;
|
---|
239 | CodeStreamPosition := 1;
|
---|
240 | Tokens.Clear;
|
---|
241 | TokenIndex := 0;
|
---|
242 | while CodeStreamPosition < Length(SourceCode2) do begin
|
---|
243 | NewToken := TToken.Create;
|
---|
244 | GetNextToken;
|
---|
245 | NewToken.CodePosition := TokenCodePosition;
|
---|
246 | NewToken.TokenType := FNextTokenType;
|
---|
247 | NewToken.Token := FNextToken;
|
---|
248 | Tokens.Add(NewToken);
|
---|
249 | end;
|
---|
250 | end;
|
---|
251 |
|
---|
252 | procedure TAnalyzer.Log(Text: string);
|
---|
253 | begin
|
---|
254 | if Assigned(FOnDebugLog) then
|
---|
255 | FOnDebugLog(Text);
|
---|
256 | end;
|
---|
257 |
|
---|
258 | procedure TAnalyzer.GetNextToken;
|
---|
259 | var
|
---|
260 | I: integer;
|
---|
261 | II: integer;
|
---|
262 | J: integer;
|
---|
263 | const
|
---|
264 | SpecChar: set of char = [';', '.', ',', ':', '(', ')', '[', ']',
|
---|
265 | '+', '-', '/', '*', '^', '=', '<', '>', '@'];
|
---|
266 | DoubleSpecChar: array[0..6] of string = (':=', '..', '<=', '>=', '<>',
|
---|
267 | '+=', '-=');
|
---|
268 | begin
|
---|
269 | FNextToken := '';
|
---|
270 | FNextTokenType := ttNone;
|
---|
271 | FParserState := psNone;
|
---|
272 |
|
---|
273 | while True do begin
|
---|
274 | if CodeStreamPosition < Length(SourceCode2) then begin
|
---|
275 | CurrentChar := SourceCode2[CodeStreamPosition];
|
---|
276 | end else begin
|
---|
277 | FNextToken := '';
|
---|
278 | FParserState := psEndOfFile;
|
---|
279 | FNextTokenType := ttEndOfFile;
|
---|
280 | Break;
|
---|
281 | end;
|
---|
282 |
|
---|
283 | if (FParserState = psNone) or (FParserState = psNoneShift) then begin
|
---|
284 | TokenCodePosition := CodePosition;
|
---|
285 | if IsWhiteSpace(CurrentChar) then
|
---|
286 | FParserState := psWhiteSpace
|
---|
287 | else
|
---|
288 | if CurrentChar = '{' then begin
|
---|
289 | FParserState := psBlockComment1First;
|
---|
290 | end else
|
---|
291 | if CurrentChar = '''' then begin
|
---|
292 | FParserState := psConstantString;
|
---|
293 | end else
|
---|
294 | if CurrentChar = '$' then begin
|
---|
295 | FParserState := psConstantHexNumber;
|
---|
296 | end else
|
---|
297 | if CurrentChar in SpecChar then begin
|
---|
298 | FParserState := psOperator;
|
---|
299 | FNextToken := FNextToken + CurrentChar;
|
---|
300 | end else
|
---|
301 | if IsAlphabetic(CurrentChar) then begin
|
---|
302 | FParserState := psIdentifier;
|
---|
303 | FNextToken := FNextToken + CurrentChar;
|
---|
304 | end else
|
---|
305 | if IsNumeric(CurrentChar) then begin
|
---|
306 | FPArserSTate := psConstantNumber;
|
---|
307 | FNextToken := FNextToken + CurrentChar;
|
---|
308 | end else FParserState := psUnknown;
|
---|
309 | end else
|
---|
310 | if FParserState = psLineComment then begin
|
---|
311 | if (CurrentChar = #13) or (CurrentChar = #10) then
|
---|
312 | FParserState := psNoneShift;
|
---|
313 | end else
|
---|
314 | if FParserState = psBlockComment1First then begin
|
---|
315 | if CurrentChar = '$' then FParserState := psCompilerDirective
|
---|
316 | else FParserSTate := psBlockComment1;
|
---|
317 | end else
|
---|
318 | if FParserState = psBlockComment1 then begin
|
---|
319 | if (CurrentChar = '}') then begin
|
---|
320 | FParserState := psNoneShift;
|
---|
321 | end;
|
---|
322 | end else
|
---|
323 | if FParserState = psCompilerDirective then begin
|
---|
324 | if (CurrentChar = '}') then begin
|
---|
325 | FParserState := psNoneShift;
|
---|
326 | FNextTokenType := ttCompilerDirective;
|
---|
327 | FNextToken := '';
|
---|
328 | //Break;
|
---|
329 | end else FNextToken := FNextToken + CurrentChar;
|
---|
330 | end else
|
---|
331 | if FParserState = psBlockComment2 then begin
|
---|
332 | if (PreviousChar = '*') and (CurrentChar = ')') then
|
---|
333 | FParserState := psNoneShift;
|
---|
334 | end else
|
---|
335 | if FParserState = psConstantString then
|
---|
336 | begin
|
---|
337 | if (CurrentChar = '''') then begin
|
---|
338 | FParserState := psConstantStringEnd;
|
---|
339 | end else FNextToken := FNextToken + CurrentChar;
|
---|
340 | end else
|
---|
341 | if FParserState = psConstantStringEnd then
|
---|
342 | begin
|
---|
343 | if (CurrentChar = '''') then begin
|
---|
344 | FParserState := psConstantString;
|
---|
345 | end else FParserState := psNone;
|
---|
346 | FNextTokenType := ttConstantString;
|
---|
347 | Break;
|
---|
348 | end else
|
---|
349 | if FParserState = psConstantHexNumber then
|
---|
350 | begin
|
---|
351 | if not IsHex(CurrentChar) then begin
|
---|
352 | FParserState := psNone;
|
---|
353 | FNextTokenType := ttConstantNumber;
|
---|
354 | Break;
|
---|
355 | end else FNextToken := FNextToken + CurrentChar;
|
---|
356 | end else
|
---|
357 | if FParserState = psConstantNumber then
|
---|
358 | begin
|
---|
359 | if not IsNumeric(CurrentChar) then begin
|
---|
360 | FParserState := psNone;
|
---|
361 | FNextTokenType := ttConstantNumber;
|
---|
362 | Break;
|
---|
363 | end else FNextToken := FNextToken + CurrentChar;
|
---|
364 | end else
|
---|
365 | if FParserState = psOperator then
|
---|
366 | begin
|
---|
367 | if (CurrentChar = '*') and (PreviousChar = '(') then
|
---|
368 | begin
|
---|
369 | FNextToken := '';
|
---|
370 | FParserState := psBlockComment2;
|
---|
371 | end else
|
---|
372 | if (CurrentChar = '/') and (PreviousChar = '/') then
|
---|
373 | begin
|
---|
374 | FNextToken := '';
|
---|
375 | FParserState := psLineComment;
|
---|
376 | end else
|
---|
377 | if not (CurrentChar in SpecChar) then begin
|
---|
378 | FNextTokenType := ttOperator;
|
---|
379 | Break;
|
---|
380 | end
|
---|
381 | else begin
|
---|
382 | J := 0;
|
---|
383 | while (J < Length(DoubleSpecChar)) and
|
---|
384 | ((PreviousChar + CurrentChar) <> DoubleSpecChar[J]) do
|
---|
385 | Inc(J);
|
---|
386 | if J < Length(DoubleSpecChar) then
|
---|
387 | FNextToken := FNextToken + CurrentChar
|
---|
388 | else begin
|
---|
389 | FNextTokenType := ttOperator;
|
---|
390 | Break;
|
---|
391 | end;
|
---|
392 | end;
|
---|
393 | end else
|
---|
394 | if FParserState = psIdentifier then
|
---|
395 | begin
|
---|
396 | if (not IsAlphanumeric(CurrentChar)) and (CurrentChar <> '_') then begin
|
---|
397 | FNextTokenType := ttIdentifier;
|
---|
398 | Break;
|
---|
399 | end else FNextToken := FNextToken + CurrentChar;
|
---|
400 | end
|
---|
401 | else
|
---|
402 | if FParserState = psWhiteSpace then begin
|
---|
403 | FParserState := psNone;
|
---|
404 | end;
|
---|
405 |
|
---|
406 | if FParserState <> psNone then begin
|
---|
407 | // Update cursor position
|
---|
408 | Inc(CodePosition.X);
|
---|
409 | if (CurrentChar = LineEndingChar) then begin
|
---|
410 | CodePosition.X := 0;
|
---|
411 | Inc(CodePosition.Y);
|
---|
412 | end;
|
---|
413 |
|
---|
414 | Inc(CodeStreamPosition);
|
---|
415 | PreviousChar := CurrentChar;
|
---|
416 | end;
|
---|
417 | end;
|
---|
418 | end;
|
---|
419 |
|
---|
420 | function TAnalyzer.ReadToken: string;
|
---|
421 | begin
|
---|
422 | if TokenIndex < Tokens.Count then begin
|
---|
423 | Result := TToken(Tokens[TokenIndex]).Token;
|
---|
424 | Inc(TokenIndex);
|
---|
425 | end else Result := '';
|
---|
426 | Log('ReadCode: ' + Result);
|
---|
427 | end;
|
---|
428 |
|
---|
429 | function TAnalyzer.NextToken: string;
|
---|
430 | begin
|
---|
431 | if TokenIndex < Tokens.Count then begin
|
---|
432 | Result := TToken(Tokens[TokenIndex]).Token;
|
---|
433 | end else Result := '';
|
---|
434 | Log('NextToken: ' + Result);
|
---|
435 | end;
|
---|
436 |
|
---|
437 | function TAnalyzer.NextTokenType: TTokenType;
|
---|
438 | begin
|
---|
439 | if TokenIndex < Tokens.Count then begin
|
---|
440 | Result := TToken(Tokens[TokenIndex]).TokenType;
|
---|
441 | end else Result := ttEndOfFile;
|
---|
442 | end;
|
---|
443 |
|
---|
444 | end.
|
---|
445 |
|
---|