source: branches/Void/UTokenizer.pas

Last change on this file was 19, checked in by george, 15 years ago
  • Upraveno: Vylepšeny třídy pro analýzu gramatiky.
File size: 5.4 KB
Line 
1unit UTokenizer;
2
3{$mode Delphi}{$H+}
4
5interface
6
7uses
8 Classes, SysUtils;
9
10type
11
12 TTokenType = (ttIdentifier, ttWhiteSpace, ttString, ttNumber, ttBrackets,
13 ttSymbol);
14
15 TTokenParseState = (psNone, psString, psStringEnd, psNumber,
16 psIdentifier, psRowComment, psSpecialSymbol, psBlockComment1,
17 psBlockComment2, psBlockComment1End, psWhiteSpace);
18
19 { TVoidParser }
20
21 TVoidParser = class
22 private
23 ParseState: TTokenParseState;
24 public
25 TokenValue: string;
26 TokenType: TTokenType;
27 Position: TPoint;
28 TokenStartPosition: TPoint;
29 Source: TStream;
30 procedure Open(ASource: TStream);
31 procedure ParseNextToken;
32 end;
33
34implementation
35
36{ TVoidParser }
37
38procedure TVoidParser.Open(ASource: TStream);
39begin
40 Position := Point(0, 1);
41 Source := ASource;
42 ParseState := psNone;
43 ParseNextToken;
44end;
45
46procedure TVoidParser.ParseNextToken;
47var
48 Character: Char;
49begin
50 repeat
51 if Source.Position >= Source.Size then Break;
52 Character := Char(TMemoryStream(Source).ReadByte);
53
54 if Character = #13 then Position.X := 0
55 else if Character = #10 then Inc(Position.Y)
56 else Inc(Position.X);
57
58 case ParseState of
59 psNone: begin
60 TokenStartPosition := Position;
61 if Character in [#13, #10, ' '] then begin
62 TokenType := ttWhiteSpace;
63 ParseState := psWhiteSpace;
64 end else
65 if Character = '''' then begin
66 TokenValue := '';
67 ParseState := psString;
68 end else
69 if ((Character >= 'A') and (Character <= 'Z')) or
70 ((Character >= 'a') and (Character <= 'z')) then begin
71 TokenValue := Character;
72 TokenType := ttIdentifier;
73 ParseState := psIdentifier;
74 end else
75 if ((Character >= '0') and (Character <= '9')) then begin
76 TokenType := ttNumber;;
77 TokenValue := Character;
78 ParseState := psNumber;
79 end else
80 if Character = '{' then begin
81 ParseState := psBlockComment2;
82 end else
83 if Character in ['/', '(', ')', '+', '-', '*', ':', ';', ','] then begin
84 TokenValue := Character;
85 ParseState := psSpecialSymbol;
86 end;
87 end;
88 psNumber: begin
89 if (Character >= '0') and (Character <= '9') then
90 TokenValue := TokenValue + Character else
91 begin
92 ParseState := psNone;
93 Source.Position := Source.Position - 1;
94 Dec(Position.X);
95 Break;
96 end;
97 end;
98 psRowComment: begin
99 if Character in [#13, #10] then begin
100 ParseState := psNone;
101 Source.Position := Source.Position - 1;
102 Dec(Position.X);
103 end;
104 end;
105 psWhiteSpace: begin
106 if Character > ' ' then begin
107 ParseState := psNone;
108 TokenValue := '';
109 TokenType := ttWhiteSpace;
110 Source.Position := Source.Position - 1;
111 Dec(Position.X);
112 Break;
113 end;
114 end;
115 psSpecialSymbol: begin
116 if (TokenValue = '/') and (Character = '/') then begin
117 ParseState := psRowComment;
118 end else
119 if (TokenValue = '(') and (Character = '*') then begin
120 ParseState := psBlockComment1;
121 end else
122 if ((TokenValue = '>') and (Character = '=')) or
123 ((TokenValue = '<') and (Character = '=')) or
124 ((TokenValue = '<') and (Character = '>')) or
125 ((TokenValue = ':') and (Character = '=')) then begin
126 TokenValue := TokenValue + Character;
127 TokenType := ttSymbol;
128 ParseState := psNone;
129 Break;
130 end else begin
131 TokenType := ttSymbol;
132 ParseState := psNone;
133 Source.Position := Source.Position - 1;
134 Dec(Position.X);
135 Break;
136 end;
137 end;
138 psBlockComment1: begin
139 if Character = '*' then begin
140 ParseState := psBlockComment1End;
141 end;
142 end;
143 psBlockComment1End: begin
144 if Character = ')' then begin
145 ParseState := psNone;
146 end else begin
147 ParseState := psBlockComment1;
148 Source.Position := Source.Position - 1;
149 Dec(Position.X);
150 end;
151 end;
152 psBlockComment2: begin
153 if Character = '}' then
154 ParseState := psNone;
155 end;
156 psString: begin
157 if Character = '''' then begin
158 ParseState := psStringEnd;
159 end else TokenValue := TokenValue + Character;
160 end;
161 psStringEnd: begin
162 if Character = '''' then begin
163 TokenValue := TokenValue + Character;
164 ParseState := psString;
165 end else begin
166 TokenType := ttString;
167 ParseState := psNone;
168 Source.Position := Source.Position - 1;
169 Dec(Position.X);
170 Break;
171 end;
172 end;
173 psIdentifier: begin
174 if ((Character >= 'A') and (Character <= 'Z')) or
175 ((Character >= 'a') and (Character <= 'z')) or
176 ((Character >= '0') and (Character <= '9')) then begin
177 TokenValue := TokenValue + Character;
178 end else begin
179 TokenType := ttIdentifier;
180 ParseState := psNone;
181 Source.Position := Source.Position - 1;
182 Dec(Position.X);
183 Break;
184 end;
185 end;
186 end;
187 until False;
188end;
189
190end.
Note: See TracBrowser for help on using the repository browser.