Skip to content

Commit 0ff328a

Browse files
committed
[GR-14621] Adopt the new TruffleFileTypeDectector and use it for encoding detection.
PullRequest: truffleruby/725
2 parents 82a3526 + 7d5c96e commit 0ff328a

File tree

7 files changed

+237
-137
lines changed

7 files changed

+237
-137
lines changed

mx.truffleruby/suite.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"name": "tools",
1010
"subdir": True,
1111
# version must always be equal to the version of the "sulong" import below
12-
"version": "f7f14f5d4d9ac7c2537932810082fc9f54cb635e",
12+
"version": "41667df9feab4601c919fccd48943e97832b0c8e",
1313
"urls": [
1414
{"url": "https://github.com/oracle/graal.git", "kind": "git"},
1515
{"url": "https://curio.ssw.jku.at/nexus/content/repositories/snapshots", "kind": "binary"},
@@ -19,7 +19,7 @@
1919
"name": "sulong",
2020
"subdir": True,
2121
# version must always be equal to the version of the "tools" import above
22-
"version": "f7f14f5d4d9ac7c2537932810082fc9f54cb635e",
22+
"version": "41667df9feab4601c919fccd48943e97832b0c8e",
2323
"urls": [
2424
{"url": "https://github.com/oracle/graal.git", "kind": "git"},
2525
{"url": "https://curio.ssw.jku.at/nexus/content/repositories/snapshots", "kind": "binary"},
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Copyright (c) 2016, 2019 Oracle and/or its affiliates. All rights reserved. This
3+
* code is released under a tri EPL/GPL/LGPL license. You can use it,
4+
* redistribute it and/or modify it under the terms of the:
5+
*
6+
* Eclipse Public License version 1.0, or
7+
* GNU General Public License version 2, or
8+
* GNU Lesser General Public License version 2.1.
9+
*/
10+
package org.truffleruby;
11+
12+
import java.io.BufferedReader;
13+
import java.io.IOException;
14+
import java.nio.charset.Charset;
15+
import java.nio.charset.StandardCharsets;
16+
import java.util.Locale;
17+
import java.util.function.BiConsumer;
18+
import java.util.regex.Pattern;
19+
import com.oracle.truffle.api.TruffleFile;
20+
import org.jcodings.Encoding;
21+
import org.jcodings.specific.UTF8Encoding;
22+
import org.truffleruby.core.encoding.EncodingManager;
23+
import org.truffleruby.core.rope.Rope;
24+
import org.truffleruby.core.string.StringOperations;
25+
import org.truffleruby.parser.lexer.RubyLexer;
26+
import org.truffleruby.shared.TruffleRuby;
27+
28+
public class RubyFileTypeDetector implements TruffleFile.FileTypeDetector {
29+
30+
private static final String[] KNOWN_RUBY_FILES = new String[]{ "Gemfile", "Rakefile" };
31+
private static final String[] KNOWN_RUBY_SUFFIXES = new String[]{ ".rb", ".rake", ".gemspec" };
32+
private static final Pattern SHEBANG_REGEXP = Pattern.compile("^#! ?/usr/bin/(env +ruby|ruby).*");
33+
34+
@Override
35+
public String findMimeType(TruffleFile file) throws IOException {
36+
final String fileName = file.getName();
37+
38+
if (fileName == null) {
39+
return null;
40+
}
41+
42+
final String lowerCaseFileName = fileName.toLowerCase(Locale.ROOT);
43+
44+
for (String candidate : KNOWN_RUBY_SUFFIXES) {
45+
if (lowerCaseFileName.endsWith(candidate)) {
46+
return TruffleRuby.MIME_TYPE;
47+
}
48+
}
49+
50+
for (String candidate : KNOWN_RUBY_FILES) {
51+
if (fileName.equals(candidate)) {
52+
return TruffleRuby.MIME_TYPE;
53+
}
54+
}
55+
56+
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
57+
final String firstLine = fileContent.readLine();
58+
if (firstLine != null && SHEBANG_REGEXP.matcher(firstLine).matches()) {
59+
return TruffleRuby.MIME_TYPE;
60+
}
61+
} catch (IOException | SecurityException e) {
62+
// Reading random files as UTF-8 could cause all sorts of errors
63+
}
64+
return null;
65+
}
66+
67+
@Override
68+
public Charset findEncoding(TruffleFile file) throws IOException {
69+
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
70+
final String firstLine = fileContent.readLine();
71+
if (firstLine != null) {
72+
String encodingCommentLine;
73+
if (SHEBANG_REGEXP.matcher(firstLine).matches()) {
74+
encodingCommentLine = fileContent.readLine();
75+
} else {
76+
encodingCommentLine = firstLine;
77+
}
78+
if (encodingCommentLine != null) {
79+
Rope encodingCommentRope = StringOperations.encodeRope(encodingCommentLine, UTF8Encoding.INSTANCE);
80+
Charset[] encodingHolder = new Charset[1];
81+
RubyLexer.parseMagicComment(encodingCommentRope, new BiConsumer<String, Rope>() {
82+
@Override
83+
public void accept(String name, Rope value) {
84+
if (RubyLexer.isMagicEncodingComment(name)) {
85+
Encoding encoding = EncodingManager.getEncoding(value);
86+
if (encoding != null) {
87+
encodingHolder[0] = encoding.getCharset();
88+
}
89+
}
90+
}
91+
});
92+
return encodingHolder[0];
93+
}
94+
}
95+
} catch (IOException | SecurityException e) {
96+
// Reading random files as UTF-8 could cause all sorts of errors
97+
}
98+
return null;
99+
}
100+
}

src/main/java/org/truffleruby/RubyLanguage.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved. This
2+
* Copyright (c) 2015, 2019 Oracle and/or its affiliates. All rights reserved. This
33
* code is released under a tri EPL/GPL/LGPL license. You can use it,
44
* redistribute it and/or modify it under the terms of the:
55
*
@@ -41,7 +41,8 @@
4141
version = BuildInformationImpl.RUBY_VERSION,
4242
characterMimeTypes = TruffleRuby.MIME_TYPE,
4343
defaultMimeType = TruffleRuby.MIME_TYPE,
44-
dependentLanguages = TruffleRuby.LLVM_ID)
44+
dependentLanguages = TruffleRuby.LLVM_ID,
45+
fileTypeDetectors = RubyFileTypeDetector.class)
4546
@ProvidedTags({
4647
CoverageManager.LineTag.class,
4748
TraceManager.CallTag.class,

src/services/java/META-INF/services/java.nio.file.spi.FileTypeDetector

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/services/java/org/truffleruby/services/RubyFileTypeDetector.java

Lines changed: 0 additions & 64 deletions
This file was deleted.
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* Copyright (c) 2018, 2019 Oracle and/or its affiliates. All rights reserved. This
3+
* code is released under a tri EPL/GPL/LGPL license. You can use it,
4+
* redistribute it and/or modify it under the terms of the:
5+
*
6+
* Eclipse Public License version 1.0, or
7+
* GNU General Public License version 2, or
8+
* GNU Lesser General Public License version 2.1.
9+
*/
10+
package org.truffleruby;
11+
12+
import com.oracle.truffle.api.TruffleFile;
13+
import com.oracle.truffle.api.TruffleLanguage;
14+
import org.junit.Test;
15+
import org.truffleruby.RubyTest;
16+
import org.truffleruby.shared.TruffleRuby;
17+
18+
import java.io.IOException;
19+
import java.nio.charset.Charset;
20+
import java.nio.charset.StandardCharsets;
21+
import java.nio.file.Files;
22+
import java.nio.file.Path;
23+
import java.nio.file.StandardOpenOption;
24+
import java.util.ArrayList;
25+
import java.util.List;
26+
import java.util.function.Consumer;
27+
import org.graalvm.polyglot.Source;
28+
29+
import static org.junit.Assert.assertEquals;
30+
import static org.junit.Assert.assertNotEquals;
31+
import org.truffleruby.language.RubyRootNode;
32+
33+
public class RubyFileTypeDetectorTest extends RubyTest {
34+
35+
@Test
36+
public void testDirect() {
37+
final RubyFileTypeDetector fileTypeDetector = new RubyFileTypeDetector();
38+
testWithAST("", new Consumer<RubyRootNode>() {
39+
@Override
40+
public void accept(RubyRootNode rootNode) {
41+
TruffleLanguage.Env env = rootNode.getContext().getEnv();
42+
try {
43+
for (TestCase testCase : getTestCases()) {
44+
TruffleFile file = env.getTruffleFile(testCase.path.toString());
45+
if (testCase.hasRubyMimeType) {
46+
assertEquals(testCase.path.toString(), TruffleRuby.MIME_TYPE, fileTypeDetector.findMimeType(file));
47+
} else {
48+
assertNotEquals(testCase.path.toString(), TruffleRuby.MIME_TYPE, fileTypeDetector.findMimeType(file));
49+
}
50+
}
51+
} catch (IOException ioe) {
52+
throw new RuntimeException(ioe);
53+
}
54+
}
55+
});
56+
}
57+
58+
@Test
59+
public void testIndirect() throws IOException {
60+
for (TestCase testCase : getTestCases()) {
61+
if (testCase.hasRubyMimeType) {
62+
assertEquals(testCase.path.toString(), TruffleRuby.MIME_TYPE, Source.findMimeType(testCase.path.toFile()));
63+
} else {
64+
assertNotEquals(testCase.path.toString(), TruffleRuby.MIME_TYPE, Source.findMimeType(testCase.path.toFile()));
65+
}
66+
}
67+
}
68+
69+
@Test
70+
public void testEncoding() {
71+
final RubyFileTypeDetector fileTypeDetector = new RubyFileTypeDetector();
72+
testWithAST("", new Consumer<RubyRootNode>() {
73+
@Override
74+
public void accept(RubyRootNode rootNode) {
75+
TruffleLanguage.Env env = rootNode.getContext().getEnv();
76+
try {
77+
for (TestCase testCase : getTestCases()) {
78+
if (testCase.hasRubyMimeType) {
79+
TruffleFile file = env.getTruffleFile(testCase.path.toString());
80+
assertEquals(testCase.encoding, fileTypeDetector.findEncoding(file));
81+
}
82+
}
83+
} catch (IOException ioe) {
84+
throw new RuntimeException(ioe);
85+
}
86+
}
87+
});
88+
}
89+
90+
private static TestCase[] getTestCases() throws IOException {
91+
final Path tempDirectory = Files.createTempDirectory("truffleruby");
92+
tempDirectory.toFile().deleteOnExit();
93+
94+
final List<TestCase> testCases = new ArrayList<>();
95+
96+
testCases.add(new TestCase(createFile(tempDirectory, "test.rb", "puts 'hello'"), true, null));
97+
testCases.add(new TestCase(createFile(tempDirectory, "TESTUP.RB", "puts 'hello'"), true, null));
98+
testCases.add(new TestCase(createFile(tempDirectory, "Gemfile", "puts 'hello'"), true, null));
99+
testCases.add(new TestCase(createFile(tempDirectory, "Rakefile", "puts 'hello'"), true, null));
100+
testCases.add(new TestCase(createFile(tempDirectory, "Mavenfile", "puts 'hello'"), false, null));
101+
testCases.add(new TestCase(createFile(tempDirectory, "test.rake", "puts 'hello'"), true, null));
102+
testCases.add(new TestCase(createFile(tempDirectory, "test.gemspec", "puts 'hello'"), true, null));
103+
testCases.add(new TestCase(createFile(tempDirectory, "shebang", "#!/usr/bin/ruby\nputs 'hello'"), true, null));
104+
testCases.add(new TestCase(createFile(tempDirectory, "env-shebang", "#!/usr/bin/env ruby\nputs 'hello'"), true, null));
105+
testCases.add(new TestCase(createFile(tempDirectory, "test.norb", "# encoding: UTF-8\nputs 'hello'"), false, null));
106+
testCases.add(new TestCase(createFile(tempDirectory, "encoding1.rb", "# encoding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
107+
testCases.add(new TestCase(createFile(tempDirectory, "encoding2.rb", "# coding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
108+
testCases.add(new TestCase(createFile(tempDirectory, "encoding3.rb", "# -*- coding: UTF-8 -*-\nputs 'hello'"), true, StandardCharsets.UTF_8));
109+
testCases.add(new TestCase(createFile(tempDirectory, "shebang-encoding", "#!/usr/bin/ruby\n# encoding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
110+
testCases.add(new TestCase(createFile(tempDirectory, "env-shebang-encoding", "#!/usr/bin/env ruby\n# encoding: UTF-8\nputs 'hello'"), true, StandardCharsets.UTF_8));
111+
return testCases.toArray(new TestCase[testCases.size()]);
112+
}
113+
114+
private static Path createFile(Path parent, String name, String contents) throws IOException {
115+
final Path file = Files.createFile(parent.resolve(name));
116+
Files.write(file, contents.getBytes(StandardCharsets.UTF_8), StandardOpenOption.CREATE);
117+
return file;
118+
}
119+
120+
private static final class TestCase {
121+
final Path path;
122+
final boolean hasRubyMimeType;
123+
final Charset encoding;
124+
125+
private TestCase(Path path, boolean hasRubyMimeType, Charset encoding) {
126+
this.path = path;
127+
this.hasRubyMimeType = hasRubyMimeType;
128+
this.encoding = encoding;
129+
}
130+
}
131+
132+
}

0 commit comments

Comments
 (0)