Skip to content

Commit 0434ba6

Browse files
committed
[GR-14621] Adopt the new TruffleFileTypeDectector and use it for encoding detection.
1 parent fab94ff commit 0434ba6

File tree

7 files changed

+245
-137
lines changed

7 files changed

+245
-137
lines changed

mx.truffleruby/suite.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"name": "tools",
1010
"subdir": True,
1111
# version must always be equal to the version of the "sulong" import below
12-
"version": "f7f14f5d4d9ac7c2537932810082fc9f54cb635e",
12+
"version": "41667df9feab4601c919fccd48943e97832b0c8e",
1313
"urls": [
1414
{"url": "https://github.com/oracle/graal.git", "kind": "git"},
1515
{"url": "https://curio.ssw.jku.at/nexus/content/repositories/snapshots", "kind": "binary"},
@@ -19,7 +19,7 @@
1919
"name": "sulong",
2020
"subdir": True,
2121
# version must always be equal to the version of the "tools" import above
22-
"version": "f7f14f5d4d9ac7c2537932810082fc9f54cb635e",
22+
"version": "41667df9feab4601c919fccd48943e97832b0c8e",
2323
"urls": [
2424
{"url": "https://github.com/oracle/graal.git", "kind": "git"},
2525
{"url": "https://curio.ssw.jku.at/nexus/content/repositories/snapshots", "kind": "binary"},
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Copyright (c) 2016, 2019 Oracle and/or its affiliates. All rights reserved. This
3+
* code is released under a tri EPL/GPL/LGPL license. You can use it,
4+
* redistribute it and/or modify it under the terms of the:
5+
*
6+
* Eclipse Public License version 1.0, or
7+
* GNU General Public License version 2, or
8+
* GNU Lesser General Public License version 2.1.
9+
*/
10+
package org.truffleruby;
11+
12+
import java.io.BufferedReader;
13+
import java.io.IOException;
14+
import java.nio.charset.Charset;
15+
import java.nio.charset.StandardCharsets;
16+
import java.util.Locale;
17+
import java.util.function.BiConsumer;
18+
import java.util.regex.Pattern;
19+
import com.oracle.truffle.api.TruffleFile;
20+
import org.jcodings.Encoding;
21+
import org.truffleruby.core.encoding.EncodingManager;
22+
import org.truffleruby.core.rope.Rope;
23+
import org.truffleruby.core.string.StringOperations;
24+
import org.truffleruby.parser.lexer.RubyLexer;
25+
26+
public class RubyFileTypeDetector implements TruffleFile.FileTypeDetector {
27+
28+
private static final String MIME_TYPE = "application/x-ruby";
29+
30+
private static final String[] KNOWN_RUBY_FILES = new String[]{ "Gemfile", "Rakefile", "Mavenfile" };
31+
private static final String[] KNOWN_RUBY_SUFFIXES = new String[]{ ".rb", ".rake", ".gemspec" };
32+
private static final Pattern SHEBANG_REGEXP = Pattern.compile("^#! ?/usr/bin/(env +ruby|ruby).*");
33+
34+
@Override
35+
public String findMimeType(TruffleFile file) throws IOException {
36+
return findMimeAndEncodingImpl(file, null);
37+
}
38+
39+
@Override
40+
public Charset findEncoding(TruffleFile file) throws IOException {
41+
Charset[] encodingHolder = new Charset[1];
42+
findMimeAndEncodingImpl(file, encodingHolder);
43+
return encodingHolder[0];
44+
}
45+
46+
private String findMimeAndEncodingImpl(TruffleFile file, Charset[] encodingHolder) {
47+
final String fileName = file.getName();
48+
49+
if (fileName == null) {
50+
return null;
51+
}
52+
53+
final String lowerCaseFileName = fileName.toLowerCase(Locale.ROOT);
54+
String mimeType = null;
55+
56+
for (String candidate : KNOWN_RUBY_SUFFIXES) {
57+
if (lowerCaseFileName.endsWith(candidate)) {
58+
mimeType = MIME_TYPE;
59+
break;
60+
}
61+
}
62+
63+
if (mimeType == null) {
64+
for (String candidate : KNOWN_RUBY_FILES) {
65+
if (fileName.equals(candidate)) {
66+
mimeType = MIME_TYPE;
67+
break;
68+
}
69+
}
70+
}
71+
72+
if (mimeType == null || encodingHolder != null) {
73+
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
74+
final String firstLine = fileContent.readLine();
75+
if (firstLine != null) {
76+
String encodingCommentLine;
77+
if (SHEBANG_REGEXP.matcher(firstLine).matches()) {
78+
mimeType = mimeType == null ? MIME_TYPE : mimeType;
79+
encodingCommentLine = encodingHolder == null ? null : fileContent.readLine();
80+
} else {
81+
encodingCommentLine = encodingHolder == null ? null : firstLine;
82+
}
83+
if (encodingCommentLine != null) {
84+
Rope encodingCommentRope = StringOperations.encodeRope(encodingCommentLine, EncodingManager.getEncoding("UTF-8"));
85+
RubyLexer.parseMagicComment(encodingCommentRope, new BiConsumer<String, Rope>() {
86+
@Override
87+
public void accept(String name, Rope value) {
88+
if (RubyLexer.isMagicEncodingComment(name)) {
89+
Encoding encoding = EncodingManager.getEncoding(value);
90+
if (encoding != null) {
91+
encodingHolder[0] = encoding.getCharset();
92+
}
93+
}
94+
}
95+
});
96+
}
97+
}
98+
} catch (IOException | SecurityException e) {
99+
// Reading random files as UTF-8 could cause all sorts of errors
100+
}
101+
}
102+
103+
return mimeType;
104+
}
105+
106+
}

src/main/java/org/truffleruby/RubyLanguage.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved. This
2+
* Copyright (c) 2015, 2019 Oracle and/or its affiliates. All rights reserved. This
33
* code is released under a tri EPL/GPL/LGPL license. You can use it,
44
* redistribute it and/or modify it under the terms of the:
55
*
@@ -41,7 +41,8 @@
4141
version = BuildInformationImpl.RUBY_VERSION,
4242
characterMimeTypes = TruffleRuby.MIME_TYPE,
4343
defaultMimeType = TruffleRuby.MIME_TYPE,
44-
dependentLanguages = TruffleRuby.LLVM_ID)
44+
dependentLanguages = TruffleRuby.LLVM_ID,
45+
fileTypeDetectors = RubyFileTypeDetector.class)
4546
@ProvidedTags({
4647
CoverageManager.LineTag.class,
4748
TraceManager.CallTag.class,

src/services/java/META-INF/services/java.nio.file.spi.FileTypeDetector

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/services/java/org/truffleruby/services/RubyFileTypeDetector.java

Lines changed: 0 additions & 64 deletions
This file was deleted.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Copyright (c) 2018, 2019 Oracle and/or its affiliates. All rights reserved. This
3+
* code is released under a tri EPL/GPL/LGPL license. You can use it,
4+
* redistribute it and/or modify it under the terms of the:
5+
*
6+
* Eclipse Public License version 1.0, or
7+
* GNU General Public License version 2, or
8+
* GNU Lesser General Public License version 2.1.
9+
*/
10+
package org.truffleruby;
11+
12+
import com.oracle.truffle.api.TruffleFile;
13+
import com.oracle.truffle.api.TruffleLanguage;
14+
import org.junit.Test;
15+
import org.truffleruby.RubyTest;
16+
import org.truffleruby.shared.TruffleRuby;
17+
18+
import java.io.IOException;
19+
import java.nio.charset.Charset;
20+
import java.nio.charset.StandardCharsets;
21+
import java.nio.file.Files;
22+
import java.nio.file.Path;
23+
import java.nio.file.StandardOpenOption;
24+
import java.util.ArrayList;
25+
import java.util.List;
26+
import java.util.function.Consumer;
27+
import org.graalvm.polyglot.Source;
28+
29+
import static org.junit.Assert.assertEquals;
30+
import static org.junit.Assert.assertNotEquals;
31+
import org.truffleruby.language.RubyRootNode;
32+
33+
public class RubyFileTypeDetectorTest extends RubyTest {
34+
35+
@Test
36+
public void testDirect() {
37+
final RubyFileTypeDetector fileTypeDetector = new RubyFileTypeDetector();
38+
testWithAST("", new Consumer<RubyRootNode>() {
39+
@Override
40+
public void accept(RubyRootNode rootNode) {
41+
TruffleLanguage.Env env = rootNode.getContext().getEnv();
42+
try {
43+
for (TestCase testCase : getTestCases()) {
44+
TruffleFile file = env.getTruffleFile(testCase.path.toString());
45+
if (testCase.hasRubyMimeType) {
46+
assertEquals(TruffleRuby.MIME_TYPE, fileTypeDetector.findMimeType(file));
47+
} else {
48+
assertNotEquals(TruffleRuby.MIME_TYPE, fileTypeDetector.findMimeType(file));
49+
}
50+
}
51+
} catch (IOException ioe) {
52+
throw new RuntimeException(ioe);
53+
}
54+
}
55+
});
56+
}
57+
58+
@Test
59+
public void testIndirect() throws IOException {
60+
for (TestCase testCase : getTestCases()) {
61+
if (testCase.hasRubyMimeType) {
62+
assertEquals(TruffleRuby.MIME_TYPE, Source.findMimeType(testCase.path.toFile()));
63+
} else {
64+
assertNotEquals(TruffleRuby.MIME_TYPE, Source.findMimeType(testCase.path.toFile()));
65+
}
66+
}
67+
}
68+
69+
@Test
70+
public void testEncoding() {
71+
final RubyFileTypeDetector fileTypeDetector = new RubyFileTypeDetector();
72+
testWithAST("", new Consumer<RubyRootNode>() {
73+
@Override
74+
public void accept(RubyRootNode rootNode) {
75+
TruffleLanguage.Env env = rootNode.getContext().getEnv();
76+
try {
77+
for (TestCase testCase : getTestCases()) {
78+
TruffleFile file = env.getTruffleFile(testCase.path.toString());
79+
if (testCase.hasRubyMimeType) {
80+
assertEquals(TruffleRuby.MIME_TYPE, fileTypeDetector.findMimeType(file));
81+
} else {
82+
assertNotEquals(TruffleRuby.MIME_TYPE, fileTypeDetector.findMimeType(file));
83+
}
84+
}
85+
} catch (IOException ioe) {
86+
throw new RuntimeException(ioe);
87+
}
88+
}
89+
});
90+
}
91+
92+
private static TestCase[] getTestCases() throws IOException {
93+
final Path tempDirectory = Files.createTempDirectory("truffleruby");
94+
tempDirectory.toFile().deleteOnExit();
95+
96+
final List<TestCase> testCases = new ArrayList<>();
97+
98+
testCases.add(new TestCase(createFile(tempDirectory, "test.rb", "puts 'hello'"), true, null));
99+
testCases.add(new TestCase(createFile(tempDirectory, "TESTUP.RB", "puts 'hello'"), true, null));
100+
testCases.add(new TestCase(createFile(tempDirectory, "Gemfile", "puts 'hello'"), true, null));
101+
testCases.add(new TestCase(createFile(tempDirectory, "Rakefile", "puts 'hello'"), true, null));
102+
testCases.add(new TestCase(createFile(tempDirectory, "Mavenfile", "puts 'hello'"), true, null));
103+
testCases.add(new TestCase(createFile(tempDirectory, "test.rake", "puts 'hello'"), true, null));
104+
testCases.add(new TestCase(createFile(tempDirectory, "test.gemspec", "puts 'hello'"), true, null));
105+
testCases.add(new TestCase(createFile(tempDirectory, "shebang", "#!/usr/bin/ruby\nputs 'hello'"), true, null));
106+
testCases.add(new TestCase(createFile(tempDirectory, "env-shebang", "#!/usr/bin/env ruby\nputs 'hello'"), true, null));
107+
testCases.add(new TestCase(createFile(tempDirectory, "test.norb", "# encoding: UTF-8\nputs 'hello'"), false, null));
108+
testCases.add(new TestCase(createFile(tempDirectory, "encoding1.rb", "# encoding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
109+
testCases.add(new TestCase(createFile(tempDirectory, "encoding2.rb", "# coding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
110+
testCases.add(new TestCase(createFile(tempDirectory, "encoding3.rb", "# -*- coding: UTF-8 -*-\nputs 'hello'"), true, StandardCharsets.UTF_8));
111+
testCases.add(new TestCase(createFile(tempDirectory, "shebang-encoding", "#!/usr/bin/ruby\n# encoding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
112+
testCases.add(new TestCase(createFile(tempDirectory, "env-shebang-encoding", "#!/usr/bin/env ruby\n# encoding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
113+
return testCases.toArray(new TestCase[testCases.size()]);
114+
}
115+
116+
private static Path createFile(Path parent, String name, String contents) throws IOException {
117+
final Path file = Files.createFile(parent.resolve(name));
118+
Files.write(file, contents.getBytes(StandardCharsets.UTF_8), StandardOpenOption.CREATE);
119+
return file;
120+
}
121+
122+
private static final class TestCase {
123+
final Path path;
124+
final boolean hasRubyMimeType;
125+
final Charset encoding;
126+
127+
private TestCase(Path path, boolean hasRubyMimeType, Charset encoding) {
128+
this.path = path;
129+
this.hasRubyMimeType = hasRubyMimeType;
130+
this.encoding = encoding;
131+
}
132+
}
133+
134+
}

0 commit comments

Comments
 (0)