Skip to content

Commit 08dce0a

Browse files
Bugfix estimation of file extension.
1 parent b161be4 commit 08dce0a

File tree

3 files changed

+57
-21
lines changed

3 files changed

+57
-21
lines changed

src/main/java/edu/kit/datamanager/mappingservice/util/FileUtil.java

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public class FileUtil {
5252
/**
5353
* Default value for suffix of temporary files.
5454
*/
55-
public static final String DEFAULT_SUFFIX = ".tmp";
55+
public static final String DEFAULT_SUFFIX = ".html";
5656
/**
5757
* Default value for prefix of temporary files.
5858
*/
@@ -61,9 +61,9 @@ public class FileUtil {
6161
* Logger for this class.
6262
*/
6363
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtil.class);
64-
64+
6565
private static final int MAX_LENGTH_OF_HEADER = 100;
66-
66+
6767
private static final Pattern JSON_FIRST_BYTE = Pattern.compile("(\\R\\s)*\\s*\\{\\s*\"(.|\\s)*", Pattern.MULTILINE);//^\\s{\\s*\".*");
6868
private static final Pattern XML_FIRST_BYTE = Pattern.compile("((.|\\s)*<\\?xml[^<]*)?\\s*<\\s*(\\w+:)?\\w+(.|\\s)*", Pattern.MULTILINE);
6969

@@ -101,7 +101,7 @@ public static Optional<Path> downloadResource(URI resourceURL) {
101101
throw new MappingException("Error downloading resource from '" + resourceURL + "'!", tw);
102102
}
103103
downloadedFile = fixFileExtension(downloadedFile);
104-
104+
105105
return Optional.ofNullable(downloadedFile);
106106
}
107107

@@ -114,14 +114,11 @@ public static Optional<Path> downloadResource(URI resourceURL) {
114114
public static Path fixFileExtension(Path pathToFile) {
115115
Path returnFile = pathToFile;
116116
Path renamedFile = pathToFile;
117+
LOGGER.trace("fixFileExtension({})", pathToFile);
117118
try {
118119
if ((pathToFile != null) && (pathToFile.toFile().exists())) {
119-
Tika tika = new Tika();
120-
String mimeType = tika.detect(pathToFile.toFile());
121-
MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
122-
MimeType estimatedMimeType = allTypes.forName(mimeType);
123-
String newExtension = estimatedMimeType.getExtension(); // .jpg
124-
120+
String contentOfFile = FileUtils.readFileToString(pathToFile.toFile(), StandardCharsets.UTF_8);
121+
String newExtension = guessFileExtension(contentOfFile.getBytes());
125122
if (newExtension != null) {
126123
if (!pathToFile.toString().endsWith(newExtension)) {
127124
renamedFile = Paths.get(pathToFile + newExtension);
@@ -130,9 +127,10 @@ public static Path fixFileExtension(Path pathToFile) {
130127
}
131128
}
132129
}
133-
} catch (IOException|MimeTypeException ex) {
130+
} catch (IOException ex) {
134131
LOGGER.error("Error moving file '{}' to '{}'.", pathToFile, renamedFile);
135132
}
133+
LOGGER.trace("'{}' -> '{}'", pathToFile, returnFile);
136134
return returnFile;
137135
}
138136

@@ -177,21 +175,38 @@ public static void removeFile(Path tempFile) {
177175
* @return Estimated extension. e.g. '.xml'
178176
*/
179177
private static String guessFileExtension(byte[] schema) {
178+
String returnValue = null;
180179
// Cut schema to a maximum of MAX_LENGTH_OF_HEADER characters.
181180
int length = Math.min(schema.length, MAX_LENGTH_OF_HEADER);
182181
String schemaAsString = new String(schema, 0, length);
183182
LOGGER.trace("Guess type for '{}'", schemaAsString);
184-
183+
185184
Matcher m = JSON_FIRST_BYTE.matcher(schemaAsString);
186185
if (m.matches()) {
187-
return ".json";
186+
returnValue = ".json";
188187
} else {
189188
m = XML_FIRST_BYTE.matcher(schemaAsString);
190189
if (m.matches()) {
191-
return ".xml";
190+
returnValue = ".xml";
192191
}
193192
}
194-
return null;
193+
if (returnValue == null) {
194+
// Use tika library to estimate extension
195+
LOGGER.trace("Use tika library to estimate extension.");
196+
Tika tika = new Tika();
197+
String mimeType;
198+
mimeType = tika.detect(schema);
199+
MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
200+
MimeType estimatedMimeType;
201+
try {
202+
estimatedMimeType = allTypes.forName(mimeType);
203+
returnValue = estimatedMimeType.getExtension(); // .jpg
204+
LOGGER.trace("Mimetype: '{}', Extension: '{}'", mimeType, returnValue);
205+
} catch (MimeTypeException ex) {
206+
LOGGER.error("Unknown mimetype '{}'", mimeType);
207+
}
208+
}
209+
return returnValue;
195210
}
196211

197212
/**
@@ -217,7 +232,7 @@ public static Path cloneGitRepository(String repositoryUrl, String branch) {
217232
public static Path cloneGitRepository(String repositoryUrl, String branch, String targetFolder) {
218233
File target = new File(targetFolder);
219234
target.mkdirs();
220-
235+
221236
LOGGER.info("Cloning branch '{}' of repository '{}' to '{}'", branch, repositoryUrl, target.getPath());
222237
try {
223238
Git.cloneRepository().setURI(repositoryUrl).setBranch(branch).setDirectory(target).call();
@@ -226,7 +241,7 @@ public static Path cloneGitRepository(String repositoryUrl, String branch, Strin
226241
} catch (GitAPIException ex) {
227242
throw new MappingException("Error cloning git repository '" + repositoryUrl + "' to '" + target + "'!", ex);
228243
}
229-
244+
230245
return target.toPath();
231246
}
232247
}

src/test/java/edu/kit/datamanager/mappingservice/util/FileUtilTest.java

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,12 @@ public void testDownloadLocalXMLFileWithoutSuffix() throws IOException {
142142
public void testDownloadLocalResourceWithoutSuffix() {
143143
System.out.println("testDownloadLocalResource");
144144
File srcFile = new File("src/test/resources/examples/anyContentWithoutSuffix");
145+
String expectedExtension = ".txt";
145146
assertTrue(srcFile.exists());
146147
Optional<Path> result = FileUtil.downloadResource(srcFile.getAbsoluteFile().toURI());
147148
assertTrue(result.isPresent());
148149
assertTrue(result.get().toFile().exists());
149-
assertTrue(result.get().toString().endsWith(FileUtil.DEFAULT_SUFFIX));
150+
assertTrue(result.get().toString().endsWith(expectedExtension));
150151
assertTrue(result.get().toFile().delete());
151152
}
152153

@@ -249,7 +250,7 @@ public void testFixFileExtensionJson() throws IOException {
249250
System.out.println("testFixFileExtensionJson");
250251
File srcFile = new File("src/test/resources/examples/gemma/simple.json");
251252
assertTrue(srcFile.exists());
252-
String[] extensions = {"nosuffix", "json", ".json ", ".xml"};
253+
String[] extensions = {"nosuffix", "json", ".json", ".xml"};
253254
for (String extension : extensions) {
254255
Path createTempFile = FileUtil.createTempFile(null, extension);
255256
Files.copy(srcFile, createTempFile.toFile());
@@ -263,14 +264,34 @@ public void testFixFileExtensionJson() throws IOException {
263264
@Test
264265
public void testFixFileExtensionUnknown() throws IOException {
265266
System.out.println("testFixFileExtensionUnknown");
267+
266268
File srcFile = new File("src/test/resources/examples/anyContentWithoutSuffix");
267269
assertTrue(srcFile.exists());
268-
String[] extensions = {"nosuffix", "json", ".json ", ".xml"};
270+
String[] extensions = {"nosuffix", "json", ".json", ".xml", ".txt"};
271+
String expectedExtension = ".txt";
272+
for (String extension : extensions) {
273+
Path createTempFile = FileUtil.createTempFile(null, extension);
274+
Files.copy(srcFile, createTempFile.toFile());
275+
Path result = FileUtil.fixFileExtension(createTempFile);
276+
assertTrue(result.toString().endsWith(expectedExtension), "Result: " + result.toString());
277+
assertTrue(result.toFile().delete());
278+
279+
}
280+
}
281+
282+
@Test
283+
public void testFixZipFileExtensionUnknown() throws IOException {
284+
System.out.println("testFixFileExtensionUnknown");
285+
286+
File srcFile = new File("src/test/resources/examples/record_json_zip");
287+
assertTrue(srcFile.exists());
288+
String[] extensions = {"nosuffix", ".zip", ".json", ".xml"};
289+
String expectedExtension = ".zip";
269290
for (String extension : extensions) {
270291
Path createTempFile = FileUtil.createTempFile(null, extension);
271292
Files.copy(srcFile, createTempFile.toFile());
272293
Path result = FileUtil.fixFileExtension(createTempFile);
273-
assertTrue(result.toString().endsWith(extension));
294+
assertTrue(result.toString().endsWith(expectedExtension));
274295
assertTrue(result.toFile().delete());
275296

276297
}
222 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)