Skip to content

Commit 201f6a7

Browse files
author
Nicolas Laurent
committed
further clarifications
1 parent 6f6bf21 commit 201f6a7

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

src/main/java/org/truffleruby/parser/lexer/HeredocTerm.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ public int parseString(RubyLexer lexer) {
104104
return error(lexer, nd_lit);
105105
}
106106

107-
// Found end marker for this heredoc, on the very first line
107+
// Found end marker for this heredoc, at the start of a line
108108
if (lexer.was_bol() && lexer.whole_match_p(this.nd_lit, indent)) {
109-
lexer.heredoc_restore(this);
109+
lexer.heredoc_restore(this); // will also skip over the end marker
110110
lexer.setStrTerm(null);
111111
lexer.setState(EXPR_END);
112112
return RubyParser.tSTRING_END;
@@ -173,11 +173,15 @@ public int parseString(RubyLexer lexer) {
173173
RopeBuilder tok = new RopeBuilder();
174174
tok.setEncoding(lexer.getEncoding());
175175

176-
// TODO why is this needed at the start?
177176
if (c == '#') {
178177
// interpolated variable or block begin
178+
// This returns tSTRING_DVAR (if it finds $, @ or @@), tSTRING_DBEG (if it finds '{'), or 0 (none of
179+
// these things were found).
179180
int token = lexer.peekVariableName(RubyParser.tSTRING_DVAR, RubyParser.tSTRING_DBEG);
180181
if (token != 0) {
182+
// Emit the token - note that the parser will unset RubyLexer#lex_strTerm while the variable or
183+
// block is being parse and restore it when it is done, allowing the rest of the heredoc to be
184+
// processed.
181185
return token;
182186
}
183187
tok.append('#');
@@ -187,10 +191,12 @@ public int parseString(RubyLexer lexer) {
187191
do {
188192
lexer.pushback(c);
189193

190-
Encoding enc[] = new Encoding[1];
194+
Encoding[] enc = new Encoding[1];
191195
enc[0] = lexer.getEncoding();
192196

193-
// parse the line into the buffer, as a regular string (with expansion)
197+
// Parse the next string segment into the buffer, as a regular string (with expansion).
198+
// The segment might terminate because of a newline, line continuation (\\) or because of a
199+
// an interpolation (#{...}, #@foo, #$foo, etc).
194200
if ((c = new StringTerm(flags, '\0', '\n', lexer.ruby_sourceline)
195201
.parseStringIntoBuffer(lexer, tok, enc)) == EOF) {
196202
if (lexer.eofp) {
@@ -205,7 +211,7 @@ public int parseString(RubyLexer lexer) {
205211
return RubyParser.tSTRING_CONTENT;
206212
}
207213

208-
// TODO is this a newline?
214+
// append the terminating newline
209215
tok.append(lexer.nextc());
210216

211217
if (lexer.getHeredocIndent() > 0) {
@@ -219,6 +225,7 @@ public int parseString(RubyLexer lexer) {
219225
if ((c = lexer.nextc()) == EOF) {
220226
return error(lexer, nd_lit);
221227
}
228+
// NOTE: The end marker is not processed here, but in the next call to HeredocTerm#parseString
222229
} while (!lexer.whole_match_p(nd_lit, indent));
223230
str = tok;
224231
}

src/main/java/org/truffleruby/parser/lexer/RubyLexer.java

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3455,7 +3455,7 @@ public void setHeredocLineIndent(int heredoc_line_indent) {
34553455
}
34563456

34573457
/** Sets {@link #heredoc_indent}. Usually used to reset the indent to 0 in the parser after we've finished parsing a
3458-
* heredoc ({@link RubyParser#tSTRING_END} or {@link RubyParser#tSTRING_DEND} has been seen). */
3458+
* heredoc ({@link RubyParser#tSTRING_END} has been seen). */
34593459
public void setHeredocIndent(int heredoc_indent) {
34603460
this.heredoc_indent = heredoc_indent;
34613461
}
@@ -3555,33 +3555,45 @@ public void tokaddmbc(int codepoint, RopeBuilder buffer) {
35553555
buffer.append(bytes);
35563556
}
35573557

3558-
/** Updates {@link #heredoc_line_indent} and {@link #heredoc_indent} based on the character {@code c} read on the
3559-
* current line. If the character is whitespace, increments {@link #heredoc_line_indent} with its width and return
3560-
* true. If not, sets {@link #heredoc_indent} to {@link #heredoc_line_indent} if line_indent is lesser, sets
3561-
* {@link #heredoc_line_indent} to -1, and returns false.
3558+
/** Updates {@link #heredoc_line_indent} and {@link #heredoc_indent} based on the current value of these two
3559+
* variables and of the character {@code c} read on the current line.
3560+
*
3561+
* <p>
3562+
* This always returns false if {@link #heredoc_line_indent} is -1, and the only effect to to reset
3563+
* {@link #heredoc_line_indent} to 0 if if the character is a newline.
3564+
*
3565+
* <p>
3566+
* Otherwise, if the character is a space or a tab, increments {@link #heredoc_line_indent} with its width and
3567+
* return true. In every other case, false is returned. Refer to the source code for more details.
3568+
*
3569+
* <p>
3570+
* Return false without further actions for newlines.
3571+
*
35623572
* <p>
3563-
* Further invocations of this after {@link #heredoc_line_indent} has been set to 1 will return false. If {@code c}
3564-
* is a newline in this case, will reset {@link #heredoc_line_indent} to 0. */
3573+
* Otherwise, this is the first non-whitespace character, and {@link #heredoc_indent} is set to
3574+
* {@link #heredoc_line_indent} if the later is smaller. {@link #heredoc_line_indent} is set to -1. */
35653575
public boolean update_heredoc_indent(int c) {
35663576
if (heredoc_line_indent == -1) {
35673577
if (c == '\n') {
35683578
heredoc_line_indent = 0;
35693579
}
3580+
return false;
35703581
} else if (c == ' ') {
35713582
heredoc_line_indent++;
35723583
return true;
35733584
} else if (c == '\t') {
35743585
int w = (heredoc_line_indent / TAB_WIDTH) + 1;
35753586
heredoc_line_indent = w * TAB_WIDTH;
35763587
return true;
3577-
} else if (c != '\n') {
3588+
} else if (c == '\n') {
3589+
return false;
3590+
} else {
35783591
if (heredoc_indent > heredoc_line_indent) {
35793592
heredoc_indent = heredoc_line_indent;
35803593
}
35813594
heredoc_line_indent = -1;
3595+
return false;
35823596
}
3583-
3584-
return false;
35853597
}
35863598

35873599
public void validateFormalIdentifier(Rope identifier) {
@@ -3633,7 +3645,7 @@ public boolean was_bol() {
36333645
}
36343646

36353647
/** Indicates whether the current line matches the given marker, after stripping away leading whitespace if
3636-
* {@code indent} is true. */
3648+
* {@code indent} is true. Does not advance the input position ({@link #lex_p}). */
36373649
boolean whole_match_p(Rope eos, boolean indent) {
36383650
int len = eos.byteLength();
36393651
int p = lex_pbeg;

0 commit comments

Comments
 (0)