53
53
* ',bar)\n' = lastLine
54
54
* </pre>
55
55
*/
56
- public class HeredocTerm extends StrTerm {
57
- // Marker delimiting heredoc boundary
56
+ public final class HeredocTerm extends StrTerm {
57
+ /** End marker delimiting heredoc boundary. */
58
58
private final Rope nd_lit ;
59
59
60
- // Expand variables, Indentation of final marker
60
+ /** Indicates whether string interpolation (expansion) should be performed, and the identation of the end marker. */
61
61
private final int flags ;
62
62
63
- protected final int nth ;
63
+ /** End position of the end marker on the line where it is declared. */
64
+ final int nth ;
64
65
65
- protected final int line ;
66
+ /** Line index of the line where the end marker is declared (1-based). */
67
+ final int line ;
66
68
67
- // Portion of line right after beginning marker
68
- protected final Rope lastLine ;
69
+ /** Portion of the line where the end marker is declarer, from right after the marker until the end of the line. */
70
+ final Rope lastLine ;
69
71
70
72
public HeredocTerm (Rope marker , int func , int nth , int line , Rope lastLine ) {
71
73
this .nd_lit = marker ;
@@ -85,44 +87,45 @@ protected int error(RubyLexer lexer, Rope eos) {
85
87
return -1 ;
86
88
}
87
89
88
- protected int restore (RubyLexer lexer ) {
90
+ private int restore (RubyLexer lexer ) {
89
91
lexer .heredoc_restore (this );
90
- lexer . setStrTerm ( new StringTerm ( flags | STR_FUNC_TERM , 0 , 0 , line )); // weird way to terminate heredoc.
91
-
92
+ // this will cause the next call to RubyLexer#yylex() to emit the RubyParser.tSTRING_END token
93
+ lexer . setStrTerm ( new StringTerm ( flags | STR_FUNC_TERM , 0 , 0 , line ));
92
94
return EOF ;
93
95
}
94
96
95
97
@ Override
96
98
public int parseString (RubyLexer lexer ) {
97
99
RopeBuilder str = null ;
98
- Rope eos = nd_lit ;
99
100
boolean indent = (flags & STR_FUNC_INDENT ) != 0 ;
100
101
int c = lexer .nextc ();
101
102
102
103
if (c == EOF ) {
103
- return error (lexer , eos );
104
+ return error (lexer , nd_lit );
104
105
}
105
106
106
- // Found end marker for this heredoc
107
- if (lexer .was_bol () && lexer .whole_match_p (nd_lit , indent )) {
108
- lexer .heredoc_restore (this );
107
+ // Found end marker for this heredoc, at the start of a line
108
+ if (lexer .was_bol () && lexer .whole_match_p (this . nd_lit , indent )) {
109
+ lexer .heredoc_restore (this ); // will also skip over the end marker
109
110
lexer .setStrTerm (null );
110
111
lexer .setState (EXPR_END );
111
112
return RubyParser .tSTRING_END ;
112
113
}
113
114
114
115
if ((flags & STR_FUNC_EXPAND ) == 0 ) {
115
- do {
116
- Rope lbuf = lexer .lex_lastline ;
117
- int p = 0 ;
116
+ // heredocs without string interpolation
117
+
118
+ do { // iterate on lines, while end marker not found
119
+ final Rope lbuf = lexer .lexb ;
118
120
int pend = lexer .lex_pend ;
119
- if (pend > p ) {
121
+
122
+ // Remove trailing newline, it will be appended later in normalized form (single \n).
123
+ if (pend > 0 ) {
120
124
switch (lexer .p (pend - 1 )) {
121
125
case '\n' :
122
126
pend --;
123
- if (pend == p || lexer .p (pend - 1 ) == '\r' ) {
124
- pend ++;
125
- break ;
127
+ if (pend > 0 && lexer .p (pend - 1 ) == '\r' ) {
128
+ pend --;
126
129
}
127
130
break ;
128
131
case '\r' :
@@ -131,77 +134,99 @@ public int parseString(RubyLexer lexer) {
131
134
}
132
135
}
133
136
137
+ // if we are dealing with a squiggly heredoc
134
138
if (lexer .getHeredocIndent () > 0 ) {
135
- for (int i = 0 ; p + i < pend && lexer .update_heredoc_indent (lexer .p (p + i )); i ++) {
139
+ // update the indent for the current line
140
+ for (int i = 0 ; i < pend && lexer .update_heredoc_indent (lexer .p (i )); i ++) {
136
141
}
142
+ // reset heredoc_line_indent to 0 (was -1 after we matched the first non-whitespace character)
137
143
lexer .setHeredocLineIndent (0 );
138
144
}
139
145
140
146
if (str != null ) {
141
- str .append (lbuf .getBytes (), p , pend - p );
147
+ str .append (lbuf .getBytes (), 0 , pend );
142
148
} else {
143
- final RopeBuilder builder = RopeBuilder .createRopeBuilder (lbuf .getBytes (), p , pend - p );
149
+ // lazy initialization of string builder
150
+ final RopeBuilder builder = RopeBuilder .createRopeBuilder (lbuf .getBytes (), 0 , pend );
144
151
builder .setEncoding (lbuf .getEncoding ());
145
152
str = builder ;
146
153
}
147
154
148
- if (pend < lexer .lex_pend ) {
149
- str .append ('\n' );
150
- }
155
+ // append the newline that we removed earlier
156
+ str .append ('\n' );
151
157
lexer .lex_goto_eol ();
152
158
153
159
if (lexer .getHeredocIndent () > 0 ) {
160
+ // for squiggly (indented) heredocs, generate one string content token token per line
161
+ // this will be dedented in the parser through lexer.heredoc_dedent
154
162
lexer .setValue (lexer .createStr (str , 0 ));
155
163
return RubyParser .tSTRING_CONTENT ;
156
164
}
157
165
// MRI null checks str in this case but it is unconditionally non-null?
158
166
if (lexer .nextc () == -1 ) {
159
- return error (lexer , eos );
167
+ return error (lexer , nd_lit );
160
168
}
161
- } while (!lexer .whole_match_p (eos , indent ));
169
+ } while (!lexer .whole_match_p (nd_lit , indent ));
162
170
} else {
171
+ // heredoc with string interpolation
172
+
163
173
RopeBuilder tok = new RopeBuilder ();
164
174
tok .setEncoding (lexer .getEncoding ());
175
+
165
176
if (c == '#' ) {
177
+ // interpolated variable or block begin
178
+ // This returns tSTRING_DVAR (if it finds $, @ or @@), tSTRING_DBEG (if it finds '{'), or 0 (none of
179
+ // these things were found).
166
180
int token = lexer .peekVariableName (RubyParser .tSTRING_DVAR , RubyParser .tSTRING_DBEG );
167
-
168
181
if (token != 0 ) {
182
+ // Emit the token - note that the parser will unset RubyLexer#lex_strTerm while the variable or
183
+ // block is being parse and restore it when it is done, allowing the rest of the heredoc to be
184
+ // processed.
169
185
return token ;
170
186
}
171
-
172
187
tok .append ('#' );
173
188
}
174
189
175
190
// MRI has extra pointer which makes our code look a little bit more strange in comparison
176
191
do {
177
192
lexer .pushback (c );
178
193
179
- Encoding enc [] = new Encoding [1 ];
194
+ Encoding [] enc = new Encoding [1 ];
180
195
enc [0 ] = lexer .getEncoding ();
181
196
197
+ // Parse the next string segment into the buffer, as a regular string (with expansion).
198
+ // The segment might terminate because of a newline, line continuation (\\) or because of a
199
+ // an interpolation (#{...}, #@foo, #$foo, etc).
182
200
if ((c = new StringTerm (flags , '\0' , '\n' , lexer .ruby_sourceline )
183
201
.parseStringIntoBuffer (lexer , tok , enc )) == EOF ) {
184
202
if (lexer .eofp ) {
185
- return error (lexer , eos );
203
+ return error (lexer , nd_lit );
186
204
}
187
205
return restore (lexer );
188
206
}
207
+
189
208
if (c != '\n' ) {
209
+ // emit the string segment
190
210
lexer .setValue (lexer .createStr (tok , 0 ));
191
211
return RubyParser .tSTRING_CONTENT ;
192
212
}
213
+
214
+ // append the terminating newline
193
215
tok .append (lexer .nextc ());
194
216
195
217
if (lexer .getHeredocIndent () > 0 ) {
218
+ // for squiggly (indented) heredocs, generate one string content token token per line
219
+ // this will be dedented in the parser through lexer.heredoc_dedent
196
220
lexer .lex_goto_eol ();
197
221
lexer .setValue (lexer .createStr (tok , 0 ));
198
222
return RubyParser .tSTRING_CONTENT ;
199
223
}
200
224
201
225
if ((c = lexer .nextc ()) == EOF ) {
202
- return error (lexer , eos );
226
+ return error (lexer , nd_lit );
203
227
}
204
- } while (!lexer .whole_match_p (eos , indent ));
228
+ // NOTE: The end marker is not processed here, but in the next call to HeredocTerm#parseString
229
+ } while (!lexer .whole_match_p (nd_lit , indent ));
205
230
str = tok ;
206
231
}
207
232
0 commit comments