8
8
*/
9
9
10
10
#include " scanner.hpp"
11
- #include < cctype>
12
- #include < stdexcept>
11
+ #include < cctype> // For character classification functions
12
+ #include < stdexcept> // For exception handling (if needed)
13
13
14
14
namespace TINY
15
15
{
16
- /* *
17
- * @namespace SCANNER
18
- * @brief Contains all components related to the lexical analysis (scanning) of TINY language.
19
- */
20
16
namespace SCANNER
21
17
{
22
18
19
+ // Constructor: Initializes the Scanner with the input source code
23
20
Scanner::Scanner (const std::string &input) : input(input) {}
24
21
22
+ // Extracts the next token from the input source code
25
23
Token Scanner::getNextToken ()
26
24
{
27
- skipWhitespace ();
25
+ // Skip any whitespace and comments before processing the next token
26
+ bool unclosedComment = skipWhitespaceAndComments ();
28
27
28
+ // If an unclosed comment was detected, return an UNKNOWN token with an error message
29
+ if (unclosedComment)
30
+ {
31
+ return Token (TokenType::UNKNOWN, " Unclosed comment" , line, column);
32
+ }
33
+
34
+ // Check if we've reached the end of the input
29
35
if (pos >= input.size ())
30
36
{
37
+ // Return an UNKNOWN token to indicate end of input (or define an EOF token if desired)
31
38
return Token (TokenType::UNKNOWN, " " , line, column);
32
39
}
33
40
41
+ // Get the next character from the input
34
42
char current = get ();
35
43
36
- // Single character tokens
44
+ // Handle single- character tokens using a switch statement
37
45
switch (current)
38
46
{
39
47
case ' +' :
@@ -51,11 +59,13 @@ namespace TINY
51
59
case ' ;' :
52
60
return Token (TokenType::SEMICOLON, " ;" , line, column);
53
61
case ' :' :
62
+ // Check if the next character is '=' to form the ':=' token
54
63
if (peek () == ' =' )
55
64
{
56
65
get (); // Consume '='
57
66
return Token (TokenType::ASSIGN, " :=" , line, column);
58
67
}
68
+ // If not, return an UNKNOWN token for ':'
59
69
return Token (TokenType::UNKNOWN, " :" , line, column);
60
70
case ' <' :
61
71
return Token (TokenType::LESSTHAN, " <" , line, column);
@@ -66,15 +76,15 @@ namespace TINY
66
76
// Identifiers and keywords
67
77
if (std::isalpha (current))
68
78
{
69
- std::string identifier (1 , current);
79
+ std::string identifier (1 , current); // Start building the identifier
70
80
71
- // Accept only alphabetic characters for identifiers
81
+ // Continue consuming alphabetic characters
72
82
while (std::isalpha (peek ()))
73
83
{
74
84
identifier += get ();
75
85
}
76
86
77
- // Check for keywords
87
+ // Check if the identifier matches any reserved keywords
78
88
if (identifier == " if" )
79
89
return Token (TokenType::IF, identifier, line, column);
80
90
if (identifier == " then" )
@@ -90,61 +100,142 @@ namespace TINY
90
100
if (identifier == " write" )
91
101
return Token (TokenType::WRITE, identifier, line, column);
92
102
103
+ // If not a keyword, it's an identifier
93
104
return Token (TokenType::IDENTIFIER, identifier, line, column);
94
105
}
95
106
96
- // Numbers
107
+ // Numbers (integer literals)
97
108
if (std::isdigit (current))
98
109
{
99
- std::string number (1 , current);
110
+ std::string number (1 , current); // Start building the number literal
111
+
112
+ // Continue consuming digit characters
100
113
while (std::isdigit (peek ()))
101
114
{
102
115
number += get ();
103
116
}
117
+
118
+ // Return a NUMBER token
104
119
return Token (TokenType::NUMBER, number, line, column);
105
120
}
106
121
107
- // Unknown token
122
+ // If the character doesn't match any known token patterns, return an UNKNOWN token
108
123
return Token (TokenType::UNKNOWN, std::string (1 , current), line, column);
109
124
}
110
125
111
- bool Scanner::hasMoreTokens () const
126
+ // Checks if there are more tokens to be extracted
127
+ bool Scanner::hasMoreTokens ()
128
+ {
129
+ // Save the current state to avoid modifying the scanner's actual state
130
+ size_t tempPos = pos;
131
+ int tempLine = line;
132
+ int tempColumn = column;
133
+
134
+ // Temporarily skip whitespace and comments
135
+ bool unclosedComment = skipWhitespaceAndComments ();
136
+
137
+ // Determine if there are more tokens
138
+ bool hasMore = (pos < input.size ()) && !unclosedComment;
139
+
140
+ // Restore the scanner's state
141
+ pos = tempPos;
142
+ line = tempLine;
143
+ column = tempColumn;
144
+
145
+ return hasMore;
146
+ }
147
+
148
+ // Skips over whitespace and comments in the input
149
+ bool Scanner::skipWhitespaceAndComments ()
112
150
{
113
- return pos < input.size ();
151
+ while (true )
152
+ {
153
+ skipWhitespace ();
154
+
155
+ bool unclosedComment = skipComments ();
156
+ if (unclosedComment)
157
+ {
158
+ // Unclosed comment detected; return true to indicate error
159
+ return true ;
160
+ }
161
+
162
+ // If no more whitespace or comments, break out of the loop
163
+ if (!std::isspace (peek ()) && peek () != ' {' )
164
+ {
165
+ break ;
166
+ }
167
+ }
168
+ // No unclosed comment detected
169
+ return false ;
114
170
}
115
171
172
+ // Skips over whitespace characters in the input
173
+ void Scanner::skipWhitespace ()
174
+ {
175
+ // Consume all consecutive whitespace characters
176
+ while (pos < input.size () && std::isspace (peek ()))
177
+ {
178
+ get (); // Consume the whitespace character
179
+ }
180
+ }
181
+
182
+ // Skips over comments in the input source code
183
+ bool Scanner::skipComments ()
184
+ {
185
+ if (pos < input.size () && peek () == ' {' )
186
+ {
187
+ get (); // Consume '{'
188
+ while (pos < input.size () && peek () != ' }' )
189
+ {
190
+ get (); // Consume characters inside the comment
191
+ }
192
+ if (pos < input.size ())
193
+ {
194
+ get (); // Consume '}'
195
+ skipWhitespace (); // Skip any whitespace after the comment
196
+ return false ; // Comment was successfully skipped
197
+ }
198
+ else
199
+ {
200
+ // EOF reached before closing '}'
201
+ // Unclosed comment detected
202
+ return true ;
203
+ }
204
+ }
205
+ return false ; // No comment to skip
206
+ }
207
+
208
+ // Peeks at the next character in the input without advancing the position
116
209
char Scanner::peek () const
117
210
{
211
+ // Return the next character if within bounds, or '\0' if at the end
118
212
return pos < input.size () ? input[pos] : ' \0 ' ;
119
213
}
120
214
215
+ // Gets the next character in the input and advances the position
121
216
char Scanner::get ()
122
217
{
218
+ // Check if at the end of input
123
219
if (pos >= input.size ())
124
220
{
125
221
return ' \0 ' ;
126
222
}
127
223
128
- char currentChar = input[pos++];
224
+ char currentChar = input[pos++]; // Get the current character and advance position
225
+
226
+ // Update line and column numbers for error reporting and tracking
129
227
if (currentChar == ' \n ' )
130
228
{
131
- line++;
132
- column = 1 ;
229
+ line++; // Move to the next line
230
+ column = 1 ; // Reset column number
133
231
}
134
232
else
135
233
{
136
- column++;
234
+ column++; // Move to the next column
137
235
}
138
236
139
237
return currentChar;
140
238
}
141
239
142
- void Scanner::skipWhitespace ()
143
- {
144
- while (std::isspace (peek ()))
145
- {
146
- get ();
147
- }
148
- }
149
240
} // namespace SCANNER
150
241
} // namespace TINY
0 commit comments