Skip to content

Commit f09659c

Browse files
committed
update the project
1 parent 0539474 commit f09659c

File tree

6 files changed

+312
-32
lines changed

6 files changed

+312
-32
lines changed

scanner/build/scanner_test

42.6 KB
Binary file not shown.

scanner/build/scanner_test_test

-643 KB
Binary file not shown.

scanner/build/token_test_test

-611 KB
Binary file not shown.

scanner/include/scanner.hpp

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ namespace TINY
6868
*
6969
* @return True if there are more tokens, false otherwise.
7070
*/
71-
bool hasMoreTokens() const;
71+
bool hasMoreTokens();
7272

7373
private:
7474
std::string input; /**< The source code to be tokenized. */
@@ -82,7 +82,7 @@ namespace TINY
8282
* This method allows inspecting the next character in the source code
8383
* without consuming it.
8484
*
85-
* @return The next character in the input.
85+
* @return The next character in the input, or '\0' if end of input.
8686
*/
8787
char peek() const;
8888

@@ -92,7 +92,7 @@ namespace TINY
9292
* This method reads the next character from the source code and moves
9393
* the cursor forward.
9494
*
95-
* @return The next character in the input.
95+
* @return The next character in the input, or '\0' if end of input.
9696
*/
9797
char get();
9898

@@ -103,6 +103,31 @@ namespace TINY
103103
* encountered in the source code.
104104
*/
105105
void skipWhitespace();
106+
107+
/**
108+
* @brief Skips over comments in the input source code.
109+
*
110+
* This method checks if the current position is at the start of a comment (indicated by a '{' character).
111+
* If it is, the method skips over all characters until it finds the corresponding closing '}'.
112+
* After successfully skipping a comment, it also skips any whitespace characters that follow the comment.
113+
* If the end of the input is reached before finding a closing '}', the method returns `true` to indicate
114+
* that an unclosed comment was detected.
115+
*
116+
* @return True if an unclosed comment was detected, false otherwise.
117+
*/
118+
bool skipComments();
119+
120+
/**
121+
* @brief Skips over whitespace and comments in the input.
122+
*
123+
* This method continuously skips whitespace and comments until it reaches
124+
* a character that is neither whitespace nor part of a comment.
125+
* If an unclosed comment is detected (i.e., the end of input is reached before a closing '}' is found),
126+
* the method returns `true` to indicate the error.
127+
*
128+
* @return True if an unclosed comment was detected, false otherwise.
129+
*/
130+
bool skipWhitespaceAndComments();
106131
};
107132
} // namespace SCANNER
108133
} // namespace TINY

scanner/src/scanner.cpp

Lines changed: 118 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,40 @@
88
*/
99

1010
#include "scanner.hpp"
11-
#include <cctype>
12-
#include <stdexcept>
11+
#include <cctype> // For character classification functions
12+
#include <stdexcept> // For exception handling (if needed)
1313

1414
namespace TINY
1515
{
16-
/**
17-
* @namespace SCANNER
18-
* @brief Contains all components related to the lexical analysis (scanning) of TINY language.
19-
*/
2016
namespace SCANNER
2117
{
2218

19+
// Constructor: Initializes the Scanner with the input source code
2320
Scanner::Scanner(const std::string &input) : input(input) {}
2421

22+
// Extracts the next token from the input source code
2523
Token Scanner::getNextToken()
2624
{
27-
skipWhitespace();
25+
// Skip any whitespace and comments before processing the next token
26+
bool unclosedComment = skipWhitespaceAndComments();
2827

28+
// If an unclosed comment was detected, return an UNKNOWN token with an error message
29+
if (unclosedComment)
30+
{
31+
return Token(TokenType::UNKNOWN, "Unclosed comment", line, column);
32+
}
33+
34+
// Check if we've reached the end of the input
2935
if (pos >= input.size())
3036
{
37+
// Return an UNKNOWN token to indicate end of input (or define an EOF token if desired)
3138
return Token(TokenType::UNKNOWN, "", line, column);
3239
}
3340

41+
// Get the next character from the input
3442
char current = get();
3543

36-
// Single character tokens
44+
// Handle single-character tokens using a switch statement
3745
switch (current)
3846
{
3947
case '+':
@@ -51,11 +59,13 @@ namespace TINY
5159
case ';':
5260
return Token(TokenType::SEMICOLON, ";", line, column);
5361
case ':':
62+
// Check if the next character is '=' to form the ':=' token
5463
if (peek() == '=')
5564
{
5665
get(); // Consume '='
5766
return Token(TokenType::ASSIGN, ":=", line, column);
5867
}
68+
// If not, return an UNKNOWN token for ':'
5969
return Token(TokenType::UNKNOWN, ":", line, column);
6070
case '<':
6171
return Token(TokenType::LESSTHAN, "<", line, column);
@@ -66,15 +76,15 @@ namespace TINY
6676
// Identifiers and keywords
6777
if (std::isalpha(current))
6878
{
69-
std::string identifier(1, current);
79+
std::string identifier(1, current); // Start building the identifier
7080

71-
// Accept only alphabetic characters for identifiers
81+
// Continue consuming alphabetic characters
7282
while (std::isalpha(peek()))
7383
{
7484
identifier += get();
7585
}
7686

77-
// Check for keywords
87+
// Check if the identifier matches any reserved keywords
7888
if (identifier == "if")
7989
return Token(TokenType::IF, identifier, line, column);
8090
if (identifier == "then")
@@ -90,61 +100,142 @@ namespace TINY
90100
if (identifier == "write")
91101
return Token(TokenType::WRITE, identifier, line, column);
92102

103+
// If not a keyword, it's an identifier
93104
return Token(TokenType::IDENTIFIER, identifier, line, column);
94105
}
95106

96-
// Numbers
107+
// Numbers (integer literals)
97108
if (std::isdigit(current))
98109
{
99-
std::string number(1, current);
110+
std::string number(1, current); // Start building the number literal
111+
112+
// Continue consuming digit characters
100113
while (std::isdigit(peek()))
101114
{
102115
number += get();
103116
}
117+
118+
// Return a NUMBER token
104119
return Token(TokenType::NUMBER, number, line, column);
105120
}
106121

107-
// Unknown token
122+
// If the character doesn't match any known token patterns, return an UNKNOWN token
108123
return Token(TokenType::UNKNOWN, std::string(1, current), line, column);
109124
}
110125

111-
bool Scanner::hasMoreTokens() const
126+
// Checks if there are more tokens to be extracted
127+
bool Scanner::hasMoreTokens()
128+
{
129+
// Save the current state to avoid modifying the scanner's actual state
130+
size_t tempPos = pos;
131+
int tempLine = line;
132+
int tempColumn = column;
133+
134+
// Temporarily skip whitespace and comments
135+
bool unclosedComment = skipWhitespaceAndComments();
136+
137+
// Determine if there are more tokens
138+
bool hasMore = (pos < input.size()) && !unclosedComment;
139+
140+
// Restore the scanner's state
141+
pos = tempPos;
142+
line = tempLine;
143+
column = tempColumn;
144+
145+
return hasMore;
146+
}
147+
148+
// Skips over whitespace and comments in the input
149+
bool Scanner::skipWhitespaceAndComments()
112150
{
113-
return pos < input.size();
151+
while (true)
152+
{
153+
skipWhitespace();
154+
155+
bool unclosedComment = skipComments();
156+
if (unclosedComment)
157+
{
158+
// Unclosed comment detected; return true to indicate error
159+
return true;
160+
}
161+
162+
// If no more whitespace or comments, break out of the loop
163+
if (!std::isspace(peek()) && peek() != '{')
164+
{
165+
break;
166+
}
167+
}
168+
// No unclosed comment detected
169+
return false;
114170
}
115171

172+
// Skips over whitespace characters in the input
173+
void Scanner::skipWhitespace()
174+
{
175+
// Consume all consecutive whitespace characters
176+
while (pos < input.size() && std::isspace(peek()))
177+
{
178+
get(); // Consume the whitespace character
179+
}
180+
}
181+
182+
// Skips over comments in the input source code
183+
bool Scanner::skipComments()
184+
{
185+
if (pos < input.size() && peek() == '{')
186+
{
187+
get(); // Consume '{'
188+
while (pos < input.size() && peek() != '}')
189+
{
190+
get(); // Consume characters inside the comment
191+
}
192+
if (pos < input.size())
193+
{
194+
get(); // Consume '}'
195+
skipWhitespace(); // Skip any whitespace after the comment
196+
return false; // Comment was successfully skipped
197+
}
198+
else
199+
{
200+
// EOF reached before closing '}'
201+
// Unclosed comment detected
202+
return true;
203+
}
204+
}
205+
return false; // No comment to skip
206+
}
207+
208+
// Peeks at the next character in the input without advancing the position
116209
char Scanner::peek() const
117210
{
211+
// Return the next character if within bounds, or '\0' if at the end
118212
return pos < input.size() ? input[pos] : '\0';
119213
}
120214

215+
// Gets the next character in the input and advances the position
121216
char Scanner::get()
122217
{
218+
// Check if at the end of input
123219
if (pos >= input.size())
124220
{
125221
return '\0';
126222
}
127223

128-
char currentChar = input[pos++];
224+
char currentChar = input[pos++]; // Get the current character and advance position
225+
226+
// Update line and column numbers for error reporting and tracking
129227
if (currentChar == '\n')
130228
{
131-
line++;
132-
column = 1;
229+
line++; // Move to the next line
230+
column = 1; // Reset column number
133231
}
134232
else
135233
{
136-
column++;
234+
column++; // Move to the next column
137235
}
138236

139237
return currentChar;
140238
}
141239

142-
void Scanner::skipWhitespace()
143-
{
144-
while (std::isspace(peek()))
145-
{
146-
get();
147-
}
148-
}
149240
} // namespace SCANNER
150241
} // namespace TINY

0 commit comments

Comments
 (0)