Skip to content

Commit 9a84a12

Browse files
committed
[GR-45043] RubySource improvements
PullRequest: truffleruby/4066
2 parents c14c9fe + 8cd8369 commit 9a84a12

File tree

18 files changed

+225
-264
lines changed

18 files changed

+225
-264
lines changed

spec/truffle/interop/polyglot/polyglot_spec.rb

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,8 @@
4242
Polyglot.eval("ruby", "# encoding: us-ascii\n__ENCODING__.name").should == "US-ASCII"
4343
end
4444

45-
it "will not allow code in Ruby to have a magic comment to change the encoding to something not a subset of UTF-8" do
46-
-> {
47-
Polyglot.eval("ruby", "# encoding: big5\n__ENCODING__.name")
48-
}.should raise_error(ArgumentError, /big5 cannot be used as an encoding for a Polyglot API Source/)
45+
it "will allow code in Ruby to have a magic comment to change the encoding to something not a subset of UTF-8" do
46+
Polyglot.eval("ruby", "# encoding: big5\n__ENCODING__.name").should == "Big5"
4947
end
5048
end
5149

@@ -87,10 +85,8 @@
8785
Polyglot.eval_file("ruby", fixture(__FILE__, "usascii_magic.rb")).should == "US-ASCII"
8886
end
8987

90-
it "will not allow code in Ruby to have a magic comment to change the encoding" do
91-
-> {
92-
Polyglot.eval_file("ruby", fixture(__FILE__, "big5_magic.rb"))
93-
}.should raise_error(ArgumentError, /big5 cannot be used as an encoding for a Polyglot API Source/)
88+
it "will allow code in Ruby to have a magic comment to change the encoding" do
89+
Polyglot.eval_file("ruby", fixture(__FILE__, "big5_magic.rb")).should == "Big5"
9490
end
9591
end
9692

src/main/java/org/truffleruby/RubyFileTypeDetector.java

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@
1616
import java.util.Locale;
1717
import java.util.regex.Pattern;
1818

19-
import org.jcodings.Encoding;
20-
import org.truffleruby.core.encoding.EncodingManager;
2119
import org.truffleruby.core.encoding.Encodings;
20+
import org.truffleruby.core.encoding.RubyEncoding;
2221
import org.truffleruby.core.encoding.TStringUtils;
2322
import org.truffleruby.core.string.TStringWithEncoding;
2423
import org.truffleruby.parser.lexer.RubyLexer;
@@ -53,46 +52,56 @@ public String findMimeType(TruffleFile file) throws IOException {
5352
}
5453
}
5554

56-
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
55+
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.ISO_8859_1)) {
5756
final String firstLine = fileContent.readLine();
5857
if (firstLine != null && SHEBANG_REGEXP.matcher(firstLine).matches()) {
5958
return RubyLanguage.getMimeType(false);
6059
}
6160
} catch (IOException | SecurityException e) {
62-
// Reading random files as UTF-8 could cause all sorts of errors
61+
// Reading random files could cause all sorts of errors
6362
}
6463
return null;
6564
}
6665

6766
@Override
68-
public Charset findEncoding(TruffleFile file) throws IOException {
69-
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
70-
final String firstLine = fileContent.readLine();
67+
public Charset findEncoding(TruffleFile file) {
68+
// We use ISO-8859-1 because every byte is valid in that encoding and
69+
// we only care about US-ASCII characters for magic encoding comments.
70+
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.ISO_8859_1)) {
71+
var encoding = findEncoding(fileContent);
72+
if (encoding != null) {
73+
return encoding.jcoding.getCharset();
74+
}
75+
} catch (IOException | SecurityException e) {
76+
// Reading random files could cause all sorts of errors
77+
}
78+
return null; // no magic encoding comment
79+
}
80+
81+
public static RubyEncoding findEncoding(BufferedReader reader) {
82+
try {
83+
final String firstLine = reader.readLine();
7184
if (firstLine != null) {
72-
String encodingCommentLine;
85+
final String encodingCommentLine;
7386
if (SHEBANG_REGEXP.matcher(firstLine).matches()) {
74-
encodingCommentLine = fileContent.readLine();
87+
encodingCommentLine = reader.readLine();
7588
} else {
7689
encodingCommentLine = firstLine;
7790
}
91+
7892
if (encodingCommentLine != null) {
79-
var encodingComment = new TStringWithEncoding(TStringUtils.utf8TString(encodingCommentLine),
80-
Encodings.UTF_8);
81-
Charset[] encodingHolder = new Charset[1];
82-
RubyLexer.parseMagicComment(encodingComment, (name, value) -> {
83-
if (RubyLexer.isMagicEncodingComment(name)) {
84-
Encoding encoding = EncodingManager.getEncoding(value);
85-
if (encoding != null) {
86-
encodingHolder[0] = encoding.getCharset();
87-
}
88-
}
89-
});
90-
return encodingHolder[0];
93+
var encodingComment = new TStringWithEncoding(
94+
TStringUtils.fromJavaString(encodingCommentLine, Encodings.BINARY), Encodings.BINARY);
95+
var encoding = RubyLexer.parseMagicEncodingComment(encodingComment);
96+
if (encoding != null) {
97+
return encoding;
98+
}
9199
}
92100
}
93-
} catch (IOException | SecurityException e) {
94-
// Reading random files as UTF-8 could cause all sorts of errors
101+
} catch (IOException e) {
102+
// Use the default encoding if reading failed somehow
95103
}
104+
96105
return null;
97106
}
98107
}

src/main/java/org/truffleruby/RubyLanguage.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -577,21 +577,17 @@ protected RootCallTarget parse(ParsingRequest request) {
577577

578578
final ParsingParameters parsingParameters = parsingRequestParams.get();
579579
if (parsingParameters != null) { // from #require or core library
580-
assert parsingParameters.getSource().equals(source);
581-
final RubySource rubySource = new RubySource(
582-
source,
583-
parsingParameters.getPath(),
584-
parsingParameters.getTStringWithEnc());
580+
assert parsingParameters.rubySource.getSource().equals(source);
585581
final ParserContext parserContext = MIME_TYPE_MAIN_SCRIPT.equals(source.getMimeType())
586582
? ParserContext.TOP_LEVEL_FIRST
587583
: ParserContext.TOP_LEVEL;
588584
final LexicalScope lexicalScope = contextIfSingleContext.map(RubyContext::getRootLexicalScope).orElse(null);
589585
return getCurrentContext().getCodeLoader().parse(
590-
rubySource,
586+
parsingParameters.rubySource,
591587
parserContext,
592588
null,
593589
lexicalScope,
594-
parsingParameters.getCurrentNode());
590+
parsingParameters.currentNode);
595591
}
596592

597593
RootNode root;

src/main/java/org/truffleruby/core/CoreLibrary.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424

2525
import com.oracle.truffle.api.RootCallTarget;
2626
import com.oracle.truffle.api.exception.AbstractTruffleException;
27-
import org.graalvm.collections.Pair;
2827
import org.jcodings.transcode.EConvFlags;
2928
import org.truffleruby.RubyContext;
3029
import org.truffleruby.RubyLanguage;
@@ -45,7 +44,6 @@
4544
import org.truffleruby.core.module.RubyModule;
4645
import org.truffleruby.core.numeric.BigIntegerOps;
4746
import org.truffleruby.core.numeric.RubyBignum;
48-
import org.truffleruby.core.string.TStringWithEncoding;
4947
import org.truffleruby.debug.BindingLocalVariablesObject;
5048
import org.truffleruby.debug.GlobalVariablesObject;
5149
import org.truffleruby.debug.TopScopeObject;
@@ -64,6 +62,7 @@
6462
import org.truffleruby.language.objects.ForeignClassNode;
6563
import org.truffleruby.language.objects.SingletonClassNode;
6664
import org.truffleruby.parser.ParserContext;
65+
import org.truffleruby.parser.RubySource;
6766
import org.truffleruby.parser.TranslatorDriver;
6867
import org.truffleruby.parser.ast.RootParseNode;
6968
import org.truffleruby.platform.NativeConfiguration;
@@ -761,10 +760,9 @@ public void loadRubyCoreLibraryAndPostBoot() {
761760
state = State.LOADED;
762761
}
763762

764-
var sourceTStringPair = loadCoreFileSource(language.coreLoadPath + file);
765-
final Source source = sourceTStringPair.getLeft();
766-
final RootCallTarget callTarget = context.getCodeLoader().parseTopLevelWithCache(sourceTStringPair,
767-
node);
763+
var rubySource = loadCoreFileSource(language.coreLoadPath + file);
764+
final Source source = rubySource.getSource();
765+
final RootCallTarget callTarget = context.getCodeLoader().parseTopLevelWithCache(rubySource, node);
768766

769767
final CodeLoader.DeferredCall deferredCall = context.getCodeLoader().prepareExecute(
770768
callTarget,
@@ -787,13 +785,13 @@ public void loadRubyCoreLibraryAndPostBoot() {
787785
}
788786
}
789787

790-
public Pair<Source, TStringWithEncoding> loadCoreFileSource(String path) throws IOException {
788+
public RubySource loadCoreFileSource(String path) throws IOException {
791789
if (path.startsWith(RubyLanguage.RESOURCE_SCHEME)) {
792790
if (TruffleOptions.AOT || ParserCache.INSTANCE != null) {
793791
final RootParseNode rootParseNode = ParserCache.INSTANCE.get(path);
794-
return Pair.create(rootParseNode.getSource(), null);
792+
return new RubySource(rootParseNode.getSource(), path);
795793
} else {
796-
return Pair.create(ResourceLoader.loadResource(path, language.options.CORE_AS_INTERNAL), null);
794+
return new RubySource(ResourceLoader.loadResource(path, language.options.CORE_AS_INTERNAL), path);
797795
}
798796
} else {
799797
final FileLoader fileLoader = new FileLoader(context, language);

src/main/java/org/truffleruby/core/kernel/TruffleKernelNodes.java

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
import com.oracle.truffle.api.frame.Frame;
1919
import com.oracle.truffle.api.nodes.Node;
2020
import com.oracle.truffle.api.profiles.InlinedConditionProfile;
21-
import com.oracle.truffle.api.source.Source;
2221
import com.oracle.truffle.api.strings.TruffleString;
23-
import org.graalvm.collections.Pair;
2422
import org.truffleruby.Layouts;
2523
import org.truffleruby.annotations.CoreMethod;
2624
import org.truffleruby.builtins.CoreMethodArrayArgumentsNode;
@@ -32,7 +30,6 @@
3230
import org.truffleruby.core.kernel.TruffleKernelNodesFactory.GetSpecialVariableStorageNodeGen;
3331
import org.truffleruby.core.module.RubyModule;
3432
import org.truffleruby.core.proc.RubyProc;
35-
import org.truffleruby.core.string.TStringWithEncoding;
3633
import org.truffleruby.core.symbol.RubySymbol;
3734
import org.truffleruby.language.LexicalScope;
3835
import org.truffleruby.language.Nil;
@@ -89,13 +86,12 @@ boolean load(Object file, Nil wrapModule,
8986
@Cached @Shared RubyStringLibrary strings,
9087
@Cached @Shared IndirectCallNode callNode) {
9188
final String feature = RubyGuards.getJavaString(file);
92-
final Pair<Source, TStringWithEncoding> sourceTStringPair = getSourceTStringPair(feature);
89+
final RubySource rubySource = getRubySource(feature);
9390

9491
final DeclarationContext declarationContext = DeclarationContext.topLevel(getContext());
9592
final LexicalScope lexicalScope = getContext().getRootLexicalScope();
9693
final Object self = getContext().getCoreLibrary().mainObject;
97-
final RootCallTarget callTarget = getContext().getCodeLoader().parseTopLevelWithCache(sourceTStringPair,
98-
this);
94+
final RootCallTarget callTarget = getContext().getCodeLoader().parseTopLevelWithCache(rubySource, this);
9995

10096
final CodeLoader.DeferredCall deferredCall = getContext().getCodeLoader().prepareExecute(
10197
callTarget,
@@ -116,7 +112,7 @@ boolean load(Object file, RubyModule wrapModule,
116112
@Cached @Shared RubyStringLibrary strings,
117113
@Cached @Shared IndirectCallNode callNode) {
118114
final String feature = RubyGuards.getJavaString(file);
119-
final Pair<Source, TStringWithEncoding> sourceTStringPair = getSourceTStringPair(feature);
115+
final RubySource rubySource = getRubySource(feature);
120116

121117
final DeclarationContext declarationContext = DeclarationContext.topLevel(wrapModule);
122118
final LexicalScope lexicalScope = new LexicalScope(getContext().getRootLexicalScope(), wrapModule);
@@ -127,10 +123,6 @@ boolean load(Object file, RubyModule wrapModule,
127123
DispatchNode.getUncached().call(self, "extend", wrapModule);
128124

129125
// callTarget
130-
final RubySource rubySource = new RubySource(
131-
sourceTStringPair.getLeft(),
132-
feature,
133-
sourceTStringPair.getRight());
134126
final RootCallTarget callTarget = getContext()
135127
.getCodeLoader()
136128
.parse(rubySource, ParserContext.TOP_LEVEL, null, lexicalScope, this);
@@ -148,7 +140,7 @@ boolean load(Object file, RubyModule wrapModule,
148140
return true;
149141
}
150142

151-
private Pair<Source, TStringWithEncoding> getSourceTStringPair(String feature) {
143+
private RubySource getRubySource(String feature) {
152144
try {
153145
final FileLoader fileLoader = new FileLoader(getContext(), getLanguage());
154146
return fileLoader.loadFile(feature);

src/main/java/org/truffleruby/language/TruffleBootNodes.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import com.oracle.truffle.api.RootCallTarget;
1919
import com.oracle.truffle.api.nodes.NodeUtil;
2020
import com.oracle.truffle.api.strings.TruffleString;
21-
import org.graalvm.collections.Pair;
2221
import org.graalvm.options.OptionDescriptor;
2322
import org.truffleruby.RubyContext;
2423
import org.truffleruby.RubyLanguage;
@@ -122,21 +121,17 @@ int main(int argc, long argv, String kind, String toExecute) {
122121

123122
// Need to set $0 before loading required libraries
124123
// Also, a non-existing main script file errors out before loading required libraries
125-
final RubySource source = loadMainSourceSettingDollarZero(kind, toExecute.intern()); //intern() to improve footprint
124+
final RubySource rubySource = loadMainSourceSettingDollarZero(kind, toExecute.intern()); //intern() to improve footprint
126125

127126
// Load libraries required from the command line (-r LIBRARY)
128127
for (String requiredLibrary : getContext().getOptions().REQUIRED_LIBRARIES) {
129128
requireNode.call(coreLibrary().mainObject, "require", utf8(requiredLibrary));
130129
}
131130

132131
if (getContext().getOptions().SYNTAX_CHECK) {
133-
checkSyntax.call(coreLibrary().truffleBootModule, "check_syntax", source);
132+
checkSyntax.call(coreLibrary().truffleBootModule, "check_syntax", rubySource);
134133
} else {
135-
var tstringWithEncoding = source.hasTruffleString() ? source.getTStringWithEncoding() : null;
136-
var sourceTStringPair = Pair.create(source.getSource(), tstringWithEncoding);
137-
final RootCallTarget callTarget = getContext()
138-
.getCodeLoader()
139-
.parseTopLevelWithCache(sourceTStringPair, null);
134+
var callTarget = getContext().getCodeLoader().parseTopLevelWithCache(rubySource, null);
140135

141136
final CodeLoader.DeferredCall deferredCall = getContext().getCodeLoader().prepareExecute(
142137
callTarget,

src/main/java/org/truffleruby/language/loader/CodeLoader.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,9 @@
1010
package org.truffleruby.language.loader;
1111

1212
import com.oracle.truffle.api.source.Source;
13-
import org.graalvm.collections.Pair;
1413
import org.truffleruby.RubyContext;
1514
import org.truffleruby.RubyLanguage;
1615
import org.truffleruby.core.module.RubyModule;
17-
import org.truffleruby.core.string.TStringWithEncoding;
1816
import org.truffleruby.language.LexicalScope;
1917
import org.truffleruby.language.Nil;
2018
import org.truffleruby.language.RubyNode;
@@ -53,22 +51,19 @@ public CodeLoader(RubyLanguage language, RubyContext context) {
5351
}
5452

5553
@TruffleBoundary
56-
public RootCallTarget parseTopLevelWithCache(Pair<Source, TStringWithEncoding> sourceTStringPair,
57-
Node currentNode) {
58-
final Source source = sourceTStringPair.getLeft();
59-
final TStringWithEncoding tstringWithEncoding = sourceTStringPair.getRight();
60-
54+
public RootCallTarget parseTopLevelWithCache(RubySource rubySource, Node currentNode) {
55+
final Source source = rubySource.getSource();
6156
final String path = RubyLanguage.getPath(source);
57+
6258
if (language.singleContext && !alreadyLoadedInContext.add(language.getPathRelativeToHome(path))) {
6359
/* Duplicate load of the same file in the same context, we cannot use the cache because it would re-assign
6460
* the live modules of static LexicalScopes and we cannot/do not want to invalidate static LexicalScopes, so
6561
* there the static lexical scope and its module are constants and need no checks in single context (e.g.,
6662
* in LookupConstantWithLexicalScopeNode). */
67-
final RubySource rubySource = new RubySource(source, path, tstringWithEncoding);
6863
return parse(rubySource, ParserContext.TOP_LEVEL, null, context.getRootLexicalScope(), currentNode);
6964
}
7065

71-
language.parsingRequestParams.set(new ParsingParameters(currentNode, tstringWithEncoding, source));
66+
language.parsingRequestParams.set(new ParsingParameters(currentNode, rubySource));
7267
try {
7368
return (RootCallTarget) context.getEnv().parseInternal(source);
7469
} finally {

src/main/java/org/truffleruby/language/loader/EvalLoader.java

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
import com.oracle.truffle.api.nodes.Node;
1313
import com.oracle.truffle.api.strings.AbstractTruffleString;
1414
import org.truffleruby.RubyContext;
15-
import org.truffleruby.core.encoding.EncodingManager;
16-
import org.truffleruby.core.encoding.Encodings;
1715
import org.truffleruby.core.encoding.RubyEncoding;
1816
import org.truffleruby.core.string.CannotConvertBinaryRubyStringToJavaString;
1917
import org.truffleruby.core.string.TStringWithEncoding;
@@ -32,7 +30,7 @@ public static RubySource createEvalSource(RubyContext context, AbstractTruffleSt
3230
RubyEncoding encoding, String method, String file, int line, Node currentNode) {
3331
var code = new TStringWithEncoding(codeTString.asTruffleStringUncached(encoding.tencoding), encoding);
3432

35-
var sourceTString = createEvalTString(code);
33+
var sourceTString = RubyLexer.createSourceTStringBasedOnMagicEncodingComment(code, code.encoding);
3634
var sourceEncoding = sourceTString.encoding;
3735

3836
if (!sourceEncoding.isAsciiCompatible) {
@@ -65,20 +63,4 @@ public static RubySource createEvalSource(RubyContext context, AbstractTruffleSt
6563
return rubySource;
6664
}
6765

68-
private static TStringWithEncoding createEvalTString(TStringWithEncoding source) {
69-
final RubyEncoding[] encoding = { source.getEncoding() };
70-
71-
RubyLexer.parseMagicComment(source, (name, value) -> {
72-
if (RubyLexer.isMagicEncodingComment(name)) {
73-
encoding[0] = Encodings.getBuiltInEncoding(EncodingManager.getEncoding(value));
74-
}
75-
});
76-
77-
if (source.getEncoding() != encoding[0]) {
78-
source = source.forceEncoding(encoding[0]);
79-
}
80-
81-
return source;
82-
}
83-
8466
}

0 commit comments

Comments
 (0)