| 1 | unit Analyzer;
|
|---|
| 2 |
|
|---|
| 3 | interface
|
|---|
| 4 |
|
|---|
| 5 | uses
|
|---|
| 6 | SysUtils, Variants, Classes, Dialogs, SourceCodePascal, FileUtil,
|
|---|
| 7 | Generics.Collections;
|
|---|
| 8 |
|
|---|
| 9 | type
|
|---|
| 10 | TErrorMessageEvent = procedure(Text: string; Position: TPoint; FileName: string) of object;
|
|---|
| 11 | TDebugLogEvent = procedure(Text: string) of object;
|
|---|
| 12 |
|
|---|
| 13 | TParserState = (psNone, psIdentifier, psConstantNumber, psConstantString,
|
|---|
| 14 | psOperator, psEndOfFile, psLineComment, psBlockComment1, psBlockComment2,
|
|---|
| 15 | psUnknown, psWhiteSpace, psConstantStringEnd, psBlockComment1First,
|
|---|
| 16 | psCompilerDirective, psNoneShift, psConstantHexNumber);
|
|---|
| 17 |
|
|---|
| 18 | TTokenType = (ttNone, ttIdentifier, ttConstantNumber, ttConstantString,
|
|---|
| 19 | ttOperator, ttEndOfFile, ttLineComment, ttBlockComment1, ttBlockComment2,
|
|---|
| 20 | ttUnknown, ttWhiteSpace, ttCompilerDirective);
|
|---|
| 21 |
|
|---|
| 22 | TToken = class
|
|---|
| 23 | Token: string;
|
|---|
| 24 | CodePosition: TPoint;
|
|---|
| 25 | TokenType: TTokenType;
|
|---|
| 26 | end;
|
|---|
| 27 |
|
|---|
| 28 | TGetSourceEvent = function (Name: string; var SourceCode: string): Boolean of object;
|
|---|
| 29 |
|
|---|
| 30 | { TAnalyzer }
|
|---|
| 31 |
|
|---|
| 32 | TAnalyzer = class
|
|---|
| 33 | private
|
|---|
| 34 | FFileName: string;
|
|---|
| 35 | FOnDebugLog: TDebugLogEvent;
|
|---|
| 36 | FOnErrorMessage: TErrorMessageEvent;
|
|---|
| 37 | FNextToken: string;
|
|---|
| 38 | FNextTokenType: TTokenType;
|
|---|
| 39 | FOnGetSource: TGetSourceEvent;
|
|---|
| 40 | FParserState: TParserState;
|
|---|
| 41 | PreviousChar: Char;
|
|---|
| 42 | CurrentChar: Char;
|
|---|
| 43 | TokenCodePosition: TPoint;
|
|---|
| 44 | LineEndingChar: Char;
|
|---|
| 45 | procedure GetNextToken;
|
|---|
| 46 | public
|
|---|
| 47 | SysName: string;
|
|---|
| 48 | Name: string;
|
|---|
| 49 | ProgramCode: TProgram;
|
|---|
| 50 | CodeStreamPosition: Integer;
|
|---|
| 51 | CodePosition: TPoint;
|
|---|
| 52 | SourceCode2: string;
|
|---|
| 53 | Tokens: TObjectList<TToken>;
|
|---|
| 54 | TokenIndex: Integer;
|
|---|
| 55 | constructor Create;
|
|---|
| 56 | destructor Destroy; override;
|
|---|
| 57 | function IsAlphanumeric(Character: char): boolean;
|
|---|
| 58 | function IsNumeric(Character: char): boolean;
|
|---|
| 59 | function IsHex(Character: char): boolean;
|
|---|
| 60 | function IsWhiteSpace(Character: char): boolean;
|
|---|
| 61 | function IsAlphabetic(Character: char): boolean;
|
|---|
| 62 | function IsIdentificator(Text: string): boolean;
|
|---|
| 63 | function IsKeyword(Text: string): boolean;
|
|---|
| 64 | function IsString(Text: string): Boolean;
|
|---|
| 65 | function IsOperator(Text: string): boolean;
|
|---|
| 66 | function ReadToken: string;
|
|---|
| 67 | function NextToken: string;
|
|---|
| 68 | function NextTokenType: TTokenType;
|
|---|
| 69 | procedure Expect(Code: string);
|
|---|
| 70 | procedure ErrorMessage(const Text: string; const Arguments: array of const;
|
|---|
| 71 | TokenOffset: Integer = -1);
|
|---|
| 72 | property OnErrorMessage: TErrorMessageEvent read FOnErrorMessage write FOnErrorMessage;
|
|---|
| 73 | property OnDebugLog: TDebugLogEvent read FOnDebugLog write FOnDebugLog;
|
|---|
| 74 | procedure Process;
|
|---|
| 75 | procedure Log(Text: string);
|
|---|
| 76 | function ParseModule(ProgramCode: TProgram): TSourceModule; virtual; abstract;
|
|---|
| 77 | property FileName: string read FFileName write FFileName;
|
|---|
| 78 | property OnGetSource: TGetSourceEvent read FOnGetSource
|
|---|
| 79 | write FOnGetSource;
|
|---|
| 80 | end;
|
|---|
| 81 |
|
|---|
| 82 | { TAnalyzers }
|
|---|
| 83 |
|
|---|
| 84 | TAnalyzers = class(TObjectList<TAnalyzer>)
|
|---|
| 85 | function SearchBySysName(Name: string): TAnalyzer;
|
|---|
| 86 | procedure LoadToStrings(Strings: TStrings);
|
|---|
| 87 | end;
|
|---|
| 88 |
|
|---|
| 89 | resourcestring
|
|---|
| 90 | SExpectedButFound = 'Expected "%s" but "%s" found.';
|
|---|
| 91 |
|
|---|
| 92 |
|
|---|
| 93 | implementation
|
|---|
| 94 |
|
|---|
| 95 | { TAnalyzers }
|
|---|
| 96 |
|
|---|
| 97 | function TAnalyzers.SearchBySysName(Name: string): TAnalyzer;
|
|---|
| 98 | var
|
|---|
| 99 | I: Integer;
|
|---|
| 100 | begin
|
|---|
| 101 | I := 0;
|
|---|
| 102 | while (I < Count) and (TAnalyzer(Items[I]).SysName <> Name) do Inc(I);
|
|---|
| 103 | if I < Count then Result := TAnalyzer(Items[I])
|
|---|
| 104 | else Result := nil;
|
|---|
| 105 | end;
|
|---|
| 106 |
|
|---|
| 107 | procedure TAnalyzers.LoadToStrings(Strings: TStrings);
|
|---|
| 108 | var
|
|---|
| 109 | I: Integer;
|
|---|
| 110 | begin
|
|---|
| 111 | try
|
|---|
| 112 | Strings.BeginUpdate;
|
|---|
| 113 | Strings.Clear;
|
|---|
| 114 | for I := 0 to Count - 1 do
|
|---|
| 115 | Strings.AddObject(TAnalyzer(Items[I]).Name, Items[I]);
|
|---|
| 116 | finally
|
|---|
| 117 | Strings.EndUpdate;
|
|---|
| 118 | end;
|
|---|
| 119 | end;
|
|---|
| 120 |
|
|---|
| 121 | { TAnalyzer }
|
|---|
| 122 |
|
|---|
| 123 | procedure TAnalyzer.ErrorMessage(const Text: string; const Arguments: array of const;
|
|---|
| 124 | TokenOffset: Integer = -1);
|
|---|
| 125 | begin
|
|---|
| 126 | if Assigned(FOnErrorMessage) then
|
|---|
| 127 | if (TokenIndex + TokenOffset) < Tokens.Count then begin
|
|---|
| 128 | FOnErrorMessage(Format(Text, Arguments),
|
|---|
| 129 | TToken(Tokens[TokenIndex + TokenOffset]).CodePosition, FileName);
|
|---|
| 130 | Log('Error: ' + Format(Text, Arguments));
|
|---|
| 131 | end;
|
|---|
| 132 | end;
|
|---|
| 133 |
|
|---|
| 134 | procedure TAnalyzer.Expect(Code: string);
|
|---|
| 135 | begin
|
|---|
| 136 | Log('Expect: ' + Code);
|
|---|
| 137 | if NextToken <> Code then begin
|
|---|
| 138 | ErrorMessage(SExpectedButFound, [Code, NextToken], 0);
|
|---|
| 139 |
|
|---|
| 140 | // Recovery: try to find nearest same code
|
|---|
| 141 | while (NextToken <> Code) and (NextTokenType <> ttEndOfFile) do
|
|---|
| 142 | ReadToken;
|
|---|
| 143 | end;
|
|---|
| 144 | ReadToken;
|
|---|
| 145 | end;
|
|---|
| 146 |
|
|---|
| 147 | function TAnalyzer.IsAlphabetic(Character: char): boolean;
|
|---|
| 148 | begin
|
|---|
| 149 | Result := (Character in ['a'..'z']) or (Character in ['A'..'Z']);
|
|---|
| 150 | end;
|
|---|
| 151 |
|
|---|
| 152 | constructor TAnalyzer.Create;
|
|---|
| 153 | begin
|
|---|
| 154 | Tokens := TObjectList<TToken>.Create;
|
|---|
| 155 | {$IFDEF windows}
|
|---|
| 156 | LineEndingChar := LineEnding[1];
|
|---|
| 157 | {$ELSE}
|
|---|
| 158 | LineEndingChar := LineEnding;
|
|---|
| 159 | {$ENDIF}
|
|---|
| 160 | end;
|
|---|
| 161 |
|
|---|
| 162 | destructor TAnalyzer.Destroy;
|
|---|
| 163 | begin
|
|---|
| 164 | FreeAndNil(Tokens);
|
|---|
| 165 | inherited;
|
|---|
| 166 | end;
|
|---|
| 167 |
|
|---|
| 168 | function TAnalyzer.IsAlphanumeric(Character: char): boolean;
|
|---|
| 169 | begin
|
|---|
| 170 | Result := IsAlphabetic(Character) or IsNumeric(Character);
|
|---|
| 171 | end;
|
|---|
| 172 |
|
|---|
| 173 | function TAnalyzer.IsNumeric(Character: char): boolean;
|
|---|
| 174 | begin
|
|---|
| 175 | Result := Character in ['0'..'9'];
|
|---|
| 176 | end;
|
|---|
| 177 |
|
|---|
| 178 | function TAnalyzer.IsHex(Character: char): boolean;
|
|---|
| 179 | begin
|
|---|
| 180 | Result := IsNumeric(Character) or (Character in ['A'..'F']);
|
|---|
| 181 | end;
|
|---|
| 182 |
|
|---|
| 183 | function TAnalyzer.IsKeyword(Text: string): boolean;
|
|---|
| 184 | var
|
|---|
| 185 | I: integer;
|
|---|
| 186 | begin
|
|---|
| 187 | Result := False;
|
|---|
| 188 | for I := 0 to High(Keywords) do
|
|---|
| 189 | if Keywords[I] = Text then
|
|---|
| 190 | Result := True;
|
|---|
| 191 | end;
|
|---|
| 192 |
|
|---|
| 193 | function TAnalyzer.IsString(Text: string): Boolean;
|
|---|
| 194 | begin
|
|---|
| 195 | raise Exception.Create('Not implemented');
|
|---|
| 196 | end;
|
|---|
| 197 |
|
|---|
| 198 | function TAnalyzer.IsOperator(Text: string): boolean;
|
|---|
| 199 | var
|
|---|
| 200 | I: integer;
|
|---|
| 201 | begin
|
|---|
| 202 | Result := False;
|
|---|
| 203 | for I := 0 to High(Operators) do
|
|---|
| 204 | if Operators[I] = Text then
|
|---|
| 205 | Result := True;
|
|---|
| 206 | end;
|
|---|
| 207 |
|
|---|
| 208 | function TAnalyzer.IsIdentificator(Text: string): boolean;
|
|---|
| 209 | var
|
|---|
| 210 | I: integer;
|
|---|
| 211 | begin
|
|---|
| 212 | Result := True;
|
|---|
| 213 | if Length(Text) = 0 then
|
|---|
| 214 | Result := False;
|
|---|
| 215 | if IsKeyWord(Text) then
|
|---|
| 216 | Result := False;
|
|---|
| 217 | if Length(Text) > 0 then
|
|---|
| 218 | if not (Text[1] in ['a'..'z', 'A'..'Z', '%', '_']) then
|
|---|
| 219 | Result := False;
|
|---|
| 220 | for I := 2 to Length(Text) do
|
|---|
| 221 | if not (Text[i] in ['a'..'z', 'A'..'Z', '0'..'9', '_']) then
|
|---|
| 222 | Result := False;
|
|---|
| 223 | end;
|
|---|
| 224 |
|
|---|
| 225 | function TAnalyzer.IsWhiteSpace(Character: char): boolean;
|
|---|
| 226 | begin
|
|---|
| 227 | Result := (Character = ' ') or (Character = #13) or (Character = #10);
|
|---|
| 228 | end;
|
|---|
| 229 |
|
|---|
| 230 | procedure TAnalyzer.Process;
|
|---|
| 231 | var
|
|---|
| 232 | NewToken: TToken;
|
|---|
| 233 | begin
|
|---|
| 234 | CodePosition := Point(0, 1);
|
|---|
| 235 | CurrentChar := #0;
|
|---|
| 236 | PreviousChar := #0;
|
|---|
| 237 | FNextToken := '';
|
|---|
| 238 | FNextTokenType := ttNone;
|
|---|
| 239 | CodeStreamPosition := 1;
|
|---|
| 240 | Tokens.Clear;
|
|---|
| 241 | TokenIndex := 0;
|
|---|
| 242 | while CodeStreamPosition < Length(SourceCode2) do begin
|
|---|
| 243 | NewToken := TToken.Create;
|
|---|
| 244 | GetNextToken;
|
|---|
| 245 | NewToken.CodePosition := TokenCodePosition;
|
|---|
| 246 | NewToken.TokenType := FNextTokenType;
|
|---|
| 247 | NewToken.Token := FNextToken;
|
|---|
| 248 | Tokens.Add(NewToken);
|
|---|
| 249 | end;
|
|---|
| 250 | end;
|
|---|
| 251 |
|
|---|
| 252 | procedure TAnalyzer.Log(Text: string);
|
|---|
| 253 | begin
|
|---|
| 254 | if Assigned(FOnDebugLog) then
|
|---|
| 255 | FOnDebugLog(Text);
|
|---|
| 256 | end;
|
|---|
| 257 |
|
|---|
| 258 | procedure TAnalyzer.GetNextToken;
|
|---|
| 259 | var
|
|---|
| 260 | I: integer;
|
|---|
| 261 | II: integer;
|
|---|
| 262 | J: integer;
|
|---|
| 263 | const
|
|---|
| 264 | SpecChar: set of char = [';', '.', ',', ':', '(', ')', '[', ']',
|
|---|
| 265 | '+', '-', '/', '*', '^', '=', '<', '>', '@'];
|
|---|
| 266 | DoubleSpecChar: array[0..6] of string = (':=', '..', '<=', '>=', '<>',
|
|---|
| 267 | '+=', '-=');
|
|---|
| 268 | begin
|
|---|
| 269 | FNextToken := '';
|
|---|
| 270 | FNextTokenType := ttNone;
|
|---|
| 271 | FParserState := psNone;
|
|---|
| 272 |
|
|---|
| 273 | while True do begin
|
|---|
| 274 | if CodeStreamPosition < Length(SourceCode2) then begin
|
|---|
| 275 | CurrentChar := SourceCode2[CodeStreamPosition];
|
|---|
| 276 | end else begin
|
|---|
| 277 | FNextToken := '';
|
|---|
| 278 | FParserState := psEndOfFile;
|
|---|
| 279 | FNextTokenType := ttEndOfFile;
|
|---|
| 280 | Break;
|
|---|
| 281 | end;
|
|---|
| 282 |
|
|---|
| 283 | if (FParserState = psNone) or (FParserState = psNoneShift) then begin
|
|---|
| 284 | TokenCodePosition := CodePosition;
|
|---|
| 285 | if IsWhiteSpace(CurrentChar) then
|
|---|
| 286 | FParserState := psWhiteSpace
|
|---|
| 287 | else
|
|---|
| 288 | if CurrentChar = '{' then begin
|
|---|
| 289 | FParserState := psBlockComment1First;
|
|---|
| 290 | end else
|
|---|
| 291 | if CurrentChar = '''' then begin
|
|---|
| 292 | FParserState := psConstantString;
|
|---|
| 293 | end else
|
|---|
| 294 | if CurrentChar = '$' then begin
|
|---|
| 295 | FParserState := psConstantHexNumber;
|
|---|
| 296 | end else
|
|---|
| 297 | if CurrentChar in SpecChar then begin
|
|---|
| 298 | FParserState := psOperator;
|
|---|
| 299 | FNextToken := FNextToken + CurrentChar;
|
|---|
| 300 | end else
|
|---|
| 301 | if IsAlphabetic(CurrentChar) then begin
|
|---|
| 302 | FParserState := psIdentifier;
|
|---|
| 303 | FNextToken := FNextToken + CurrentChar;
|
|---|
| 304 | end else
|
|---|
| 305 | if IsNumeric(CurrentChar) then begin
|
|---|
| 306 | FPArserSTate := psConstantNumber;
|
|---|
| 307 | FNextToken := FNextToken + CurrentChar;
|
|---|
| 308 | end else FParserState := psUnknown;
|
|---|
| 309 | end else
|
|---|
| 310 | if FParserState = psLineComment then begin
|
|---|
| 311 | if (CurrentChar = #13) or (CurrentChar = #10) then
|
|---|
| 312 | FParserState := psNoneShift;
|
|---|
| 313 | end else
|
|---|
| 314 | if FParserState = psBlockComment1First then begin
|
|---|
| 315 | if CurrentChar = '$' then FParserState := psCompilerDirective
|
|---|
| 316 | else FParserSTate := psBlockComment1;
|
|---|
| 317 | end else
|
|---|
| 318 | if FParserState = psBlockComment1 then begin
|
|---|
| 319 | if (CurrentChar = '}') then begin
|
|---|
| 320 | FParserState := psNoneShift;
|
|---|
| 321 | end;
|
|---|
| 322 | end else
|
|---|
| 323 | if FParserState = psCompilerDirective then begin
|
|---|
| 324 | if (CurrentChar = '}') then begin
|
|---|
| 325 | FParserState := psNoneShift;
|
|---|
| 326 | FNextTokenType := ttCompilerDirective;
|
|---|
| 327 | FNextToken := '';
|
|---|
| 328 | //Break;
|
|---|
| 329 | end else FNextToken := FNextToken + CurrentChar;
|
|---|
| 330 | end else
|
|---|
| 331 | if FParserState = psBlockComment2 then begin
|
|---|
| 332 | if (PreviousChar = '*') and (CurrentChar = ')') then
|
|---|
| 333 | FParserState := psNoneShift;
|
|---|
| 334 | end else
|
|---|
| 335 | if FParserState = psConstantString then
|
|---|
| 336 | begin
|
|---|
| 337 | if (CurrentChar = '''') then begin
|
|---|
| 338 | FParserState := psConstantStringEnd;
|
|---|
| 339 | end else FNextToken := FNextToken + CurrentChar;
|
|---|
| 340 | end else
|
|---|
| 341 | if FParserState = psConstantStringEnd then
|
|---|
| 342 | begin
|
|---|
| 343 | if (CurrentChar = '''') then begin
|
|---|
| 344 | FParserState := psConstantString;
|
|---|
| 345 | end else FParserState := psNone;
|
|---|
| 346 | FNextTokenType := ttConstantString;
|
|---|
| 347 | Break;
|
|---|
| 348 | end else
|
|---|
| 349 | if FParserState = psConstantHexNumber then
|
|---|
| 350 | begin
|
|---|
| 351 | if not IsHex(CurrentChar) then begin
|
|---|
| 352 | FParserState := psNone;
|
|---|
| 353 | FNextTokenType := ttConstantNumber;
|
|---|
| 354 | Break;
|
|---|
| 355 | end else FNextToken := FNextToken + CurrentChar;
|
|---|
| 356 | end else
|
|---|
| 357 | if FParserState = psConstantNumber then
|
|---|
| 358 | begin
|
|---|
| 359 | if not IsNumeric(CurrentChar) then begin
|
|---|
| 360 | FParserState := psNone;
|
|---|
| 361 | FNextTokenType := ttConstantNumber;
|
|---|
| 362 | Break;
|
|---|
| 363 | end else FNextToken := FNextToken + CurrentChar;
|
|---|
| 364 | end else
|
|---|
| 365 | if FParserState = psOperator then
|
|---|
| 366 | begin
|
|---|
| 367 | if (CurrentChar = '*') and (PreviousChar = '(') then
|
|---|
| 368 | begin
|
|---|
| 369 | FNextToken := '';
|
|---|
| 370 | FParserState := psBlockComment2;
|
|---|
| 371 | end else
|
|---|
| 372 | if (CurrentChar = '/') and (PreviousChar = '/') then
|
|---|
| 373 | begin
|
|---|
| 374 | FNextToken := '';
|
|---|
| 375 | FParserState := psLineComment;
|
|---|
| 376 | end else
|
|---|
| 377 | if not (CurrentChar in SpecChar) then begin
|
|---|
| 378 | FNextTokenType := ttOperator;
|
|---|
| 379 | Break;
|
|---|
| 380 | end
|
|---|
| 381 | else begin
|
|---|
| 382 | J := 0;
|
|---|
| 383 | while (J < Length(DoubleSpecChar)) and
|
|---|
| 384 | ((PreviousChar + CurrentChar) <> DoubleSpecChar[J]) do
|
|---|
| 385 | Inc(J);
|
|---|
| 386 | if J < Length(DoubleSpecChar) then
|
|---|
| 387 | FNextToken := FNextToken + CurrentChar
|
|---|
| 388 | else begin
|
|---|
| 389 | FNextTokenType := ttOperator;
|
|---|
| 390 | Break;
|
|---|
| 391 | end;
|
|---|
| 392 | end;
|
|---|
| 393 | end else
|
|---|
| 394 | if FParserState = psIdentifier then
|
|---|
| 395 | begin
|
|---|
| 396 | if (not IsAlphanumeric(CurrentChar)) and (CurrentChar <> '_') then begin
|
|---|
| 397 | FNextTokenType := ttIdentifier;
|
|---|
| 398 | Break;
|
|---|
| 399 | end else FNextToken := FNextToken + CurrentChar;
|
|---|
| 400 | end
|
|---|
| 401 | else
|
|---|
| 402 | if FParserState = psWhiteSpace then begin
|
|---|
| 403 | FParserState := psNone;
|
|---|
| 404 | end;
|
|---|
| 405 |
|
|---|
| 406 | if FParserState <> psNone then begin
|
|---|
| 407 | // Update cursor position
|
|---|
| 408 | Inc(CodePosition.X);
|
|---|
| 409 | if (CurrentChar = LineEndingChar) then begin
|
|---|
| 410 | CodePosition.X := 0;
|
|---|
| 411 | Inc(CodePosition.Y);
|
|---|
| 412 | end;
|
|---|
| 413 |
|
|---|
| 414 | Inc(CodeStreamPosition);
|
|---|
| 415 | PreviousChar := CurrentChar;
|
|---|
| 416 | end;
|
|---|
| 417 | end;
|
|---|
| 418 | end;
|
|---|
| 419 |
|
|---|
| 420 | function TAnalyzer.ReadToken: string;
|
|---|
| 421 | begin
|
|---|
| 422 | if TokenIndex < Tokens.Count then begin
|
|---|
| 423 | Result := TToken(Tokens[TokenIndex]).Token;
|
|---|
| 424 | Inc(TokenIndex);
|
|---|
| 425 | end else Result := '';
|
|---|
| 426 | Log('ReadCode: ' + Result);
|
|---|
| 427 | end;
|
|---|
| 428 |
|
|---|
| 429 | function TAnalyzer.NextToken: string;
|
|---|
| 430 | begin
|
|---|
| 431 | if TokenIndex < Tokens.Count then begin
|
|---|
| 432 | Result := TToken(Tokens[TokenIndex]).Token;
|
|---|
| 433 | end else Result := '';
|
|---|
| 434 | Log('NextToken: ' + Result);
|
|---|
| 435 | end;
|
|---|
| 436 |
|
|---|
| 437 | function TAnalyzer.NextTokenType: TTokenType;
|
|---|
| 438 | begin
|
|---|
| 439 | if TokenIndex < Tokens.Count then begin
|
|---|
| 440 | Result := TToken(Tokens[TokenIndex]).TokenType;
|
|---|
| 441 | end else Result := ttEndOfFile;
|
|---|
| 442 | end;
|
|---|
| 443 |
|
|---|
| 444 | end.
|
|---|
| 445 |
|
|---|