Skip to content

Commit 8580fc6

Browse files
committed
Resolved merge conflicts
2 parents ca6d257 + 21cf3de commit 8580fc6

File tree

5 files changed

+229
-169
lines changed

5 files changed

+229
-169
lines changed

build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ dependencies {
5555
implementation 'commons-io:commons-io:2.11.0'
5656
implementation 'javax.validation:validation-api:2.0.1.Final'
5757
implementation 'edu.kit.datamanager:service-base:1.0.4'
58+
// apache
59+
implementation "org.apache.tika:tika-core:2.7.0"
5860

5961
testImplementation platform('org.junit:junit-bom:5.9.0')
6062
testImplementation 'org.junit.jupiter:junit-jupiter:5.9.0'

src/main/java/edu/kit/datamanager/mappingservice/util/FileUtil.java

Lines changed: 190 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
1716
package edu.kit.datamanager.mappingservice.util;
1817

1918
import edu.kit.datamanager.clients.SimpleServiceClient;
@@ -37,6 +36,10 @@
3736
import java.util.Optional;
3837
import java.util.regex.Matcher;
3938
import java.util.regex.Pattern;
39+
import org.apache.tika.Tika;
40+
import org.apache.tika.mime.MimeType;
41+
import org.apache.tika.mime.MimeTypeException;
42+
import org.apache.tika.mime.MimeTypes;
4043

4144
/**
4245
* Various utility methods for file handling.
@@ -46,174 +49,199 @@
4649
*/
4750
public class FileUtil {
4851

49-
/**
50-
* Default value for suffix of temporary files.
51-
*/
52-
public static final String DEFAULT_SUFFIX = ".tmp";
53-
/**
54-
* Default value for prefix of temporary files.
55-
*/
56-
public static final String DEFAULT_PREFIX = "MappingUtil_";
57-
/**
58-
* Logger for this class.
59-
*/
60-
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtil.class);
61-
62-
private static final int MAX_LENGTH_OF_HEADER = 100;
63-
64-
private static final Pattern JSON_FIRST_BYTE = Pattern.compile("(\\R\\s)*\\s*\\{\\s*\"(.|\\s)*", Pattern.MULTILINE);//^\\s{\\s*\".*");
65-
private static final Pattern XML_FIRST_BYTE = Pattern.compile("((.|\\s)*<\\?xml[^<]*)?\\s*<\\s*(\\w+:)?\\w+(.|\\s)*", Pattern.MULTILINE);
66-
67-
/**
68-
* Downloads or copy the file behind the given URI and returns its path on
69-
* local disc. You should delete or move to another location afterwards.
70-
*
71-
* @param resourceURL the given URI
72-
* @return the path to the created file.
73-
*/
74-
public static Optional<Path> downloadResource(URI resourceURL) {
75-
String content;
76-
Path downloadedFile = null;
77-
try {
78-
if (resourceURL != null) {
79-
String suffix = FilenameUtils.getExtension(resourceURL.getPath());
80-
suffix = suffix.trim().isEmpty() ? DEFAULT_SUFFIX : "." + suffix;
81-
if (resourceURL.getHost() != null) {
82-
content = SimpleServiceClient
83-
.create(resourceURL.toString())
84-
.accept(MediaType.TEXT_PLAIN)
85-
.getResource(String.class);
86-
downloadedFile = createTempFile("download", suffix);
87-
FileUtils.writeStringToFile(downloadedFile.toFile(), content, StandardCharsets.UTF_8);
88-
} else {
89-
// copy local file to new place.
90-
File srcFile = new File(resourceURL.getPath());
91-
File destFile = FileUtil.createTempFile("local", suffix).toFile();
92-
FileUtils.copyFile(srcFile, destFile);
93-
downloadedFile = destFile.toPath();
94-
}
95-
}
96-
} catch (Throwable tw) {
97-
LOGGER.error("Error reading URI '" + resourceURL + "'", tw);
98-
throw new MappingException("Error downloading resource from '" + resourceURL + "'!", tw);
52+
/**
53+
* Default value for suffix of temporary files.
54+
*/
55+
public static final String DEFAULT_SUFFIX = ".html";
56+
/**
57+
* Default value for prefix of temporary files.
58+
*/
59+
public static final String DEFAULT_PREFIX = "MappingUtil_";
60+
/**
61+
* Logger for this class.
62+
*/
63+
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtil.class);
64+
65+
private static final int MAX_LENGTH_OF_HEADER = 100;
66+
67+
private static final Pattern JSON_FIRST_BYTE = Pattern.compile("(\\R\\s)*\\s*\\{\\s*\"(.|\\s)*", Pattern.MULTILINE);//^\\s{\\s*\".*");
68+
private static final Pattern XML_FIRST_BYTE = Pattern.compile("((.|\\s)*<\\?xml[^<]*)?\\s*<\\s*(\\w+:)?\\w+(.|\\s)*", Pattern.MULTILINE);
69+
70+
/**
71+
* Downloads or copy the file behind the given URI and returns its path on
72+
* local disc. You should delete or move to another location afterwards.
73+
*
74+
* @param resourceURL the given URI
75+
* @return the path to the created file.
76+
*/
77+
public static Optional<Path> downloadResource(URI resourceURL) {
78+
String content;
79+
Path downloadedFile = null;
80+
try {
81+
if (resourceURL != null) {
82+
String suffix = FilenameUtils.getExtension(resourceURL.getPath());
83+
suffix = suffix.trim().isEmpty() ? DEFAULT_SUFFIX : "." + suffix;
84+
if (resourceURL.getHost() != null) {
85+
content = SimpleServiceClient
86+
.create(resourceURL.toString())
87+
.accept(MediaType.TEXT_PLAIN)
88+
.getResource(String.class);
89+
downloadedFile = createTempFile("download", suffix);
90+
FileUtils.writeStringToFile(downloadedFile.toFile(), content, StandardCharsets.UTF_8);
91+
} else {
92+
// copy local file to new place.
93+
File srcFile = new File(resourceURL.getPath());
94+
File destFile = FileUtil.createTempFile("local", suffix).toFile();
95+
FileUtils.copyFile(srcFile, destFile);
96+
downloadedFile = destFile.toPath();
9997
}
100-
downloadedFile = fixFileExtension(downloadedFile);
101-
102-
return Optional.ofNullable(downloadedFile);
98+
}
99+
} catch (Throwable tw) {
100+
LOGGER.error("Error reading URI '" + resourceURL + "'", tw);
101+
throw new MappingException("Error downloading resource from '" + resourceURL + "'!", tw);
103102
}
104-
105-
/**
106-
* Fix extension of file if possible.
107-
*
108-
* @param pathToFile the given URI
109-
* @return the path to the (renamed) file.
110-
*/
111-
public static Path fixFileExtension(Path pathToFile) {
112-
Path returnFile = pathToFile;
113-
Path renamedFile = pathToFile;
114-
try {
115-
if ((pathToFile != null) && (pathToFile.toFile().exists())) {
116-
String contentOfFile = FileUtils.readFileToString(pathToFile.toFile(), StandardCharsets.UTF_8);
117-
String newExtension = guessFileExtension(contentOfFile.getBytes());
118-
if (newExtension != null) {
119-
if (!pathToFile.toString().endsWith(newExtension)) {
120-
renamedFile = Paths.get(pathToFile + newExtension);
121-
FileUtils.moveFile(pathToFile.toFile(), renamedFile.toFile());
122-
returnFile = renamedFile;
123-
}
124-
}
125-
}
126-
} catch (IOException ex) {
127-
LOGGER.error("Error moving file '{}' to '{}'.", pathToFile, renamedFile);
103+
downloadedFile = fixFileExtension(downloadedFile);
104+
105+
return Optional.ofNullable(downloadedFile);
106+
}
107+
108+
/**
109+
* Fix extension of file if possible.
110+
*
111+
* @param pathToFile the given URI
112+
* @return the path to the (renamed) file.
113+
*/
114+
public static Path fixFileExtension(Path pathToFile) {
115+
Path returnFile = pathToFile;
116+
Path renamedFile = pathToFile;
117+
LOGGER.trace("fixFileExtension({})", pathToFile);
118+
try {
119+
if ((pathToFile != null) && (pathToFile.toFile().exists())) {
120+
String contentOfFile = FileUtils.readFileToString(pathToFile.toFile(), StandardCharsets.UTF_8);
121+
String newExtension = guessFileExtension(contentOfFile.getBytes());
122+
if (newExtension != null) {
123+
if (!pathToFile.toString().endsWith(newExtension)) {
124+
renamedFile = Paths.get(pathToFile + newExtension);
125+
FileUtils.moveFile(pathToFile.toFile(), renamedFile.toFile());
126+
returnFile = renamedFile;
127+
}
128128
}
129-
return returnFile;
129+
}
130+
} catch (IOException ex) {
131+
LOGGER.error("Error moving file '{}' to '{}'.", pathToFile, renamedFile);
130132
}
131-
132-
/**
133-
* Create temporary file. Attention: The file will not be removed
134-
* automatically.
135-
*
136-
* @param prefix prefix of the file
137-
* @param suffix suffix of the file
138-
* @return Path to file
139-
* @throws MappingException if an error occurs
140-
*/
141-
public static Path createTempFile(String prefix, String suffix) {
142-
Path tempFile;
143-
prefix = (prefix == null || prefix.trim().isEmpty()) ? DEFAULT_PREFIX : prefix;
144-
suffix = (suffix == null || suffix.trim().isEmpty() || suffix.trim().equals(".")) ? DEFAULT_SUFFIX : suffix;
145-
try {
146-
tempFile = Files.createTempFile(prefix, suffix);
147-
} catch (IOException ioe) {
148-
throw new MappingException("Error creating tmp file!", ioe);
149-
}
150-
return tempFile;
133+
LOGGER.trace("'{}' -> '{}'", pathToFile, returnFile);
134+
return returnFile;
135+
}
136+
137+
/**
138+
* Create temporary file. Attention: The file will not be removed
139+
* automatically.
140+
*
141+
* @param prefix prefix of the file
142+
* @param suffix suffix of the file
143+
* @return Path to file
144+
* @throws MappingException if an error occurs
145+
*/
146+
public static Path createTempFile(String prefix, String suffix) {
147+
Path tempFile;
148+
prefix = (prefix == null || prefix.trim().isEmpty()) ? DEFAULT_PREFIX : prefix;
149+
suffix = (suffix == null || suffix.trim().isEmpty() || suffix.trim().equals(".")) ? DEFAULT_SUFFIX : suffix;
150+
try {
151+
tempFile = Files.createTempFile(prefix, suffix);
152+
} catch (IOException ioe) {
153+
throw new MappingException("Error creating tmp file!", ioe);
151154
}
152-
153-
/**
154-
* Remove temporary file.
155-
*
156-
* @param tempFile Path to file
157-
*/
158-
public static void removeFile(Path tempFile) {
159-
try {
160-
Files.deleteIfExists(tempFile);
161-
} catch (IOException ioe) {
162-
throw new MappingException("Error removing file '" + tempFile + "'!", ioe);
163-
}
155+
return tempFile;
156+
}
157+
158+
/**
159+
* Remove temporary file.
160+
*
161+
* @param tempFile Path to file
162+
*/
163+
public static void removeFile(Path tempFile) {
164+
try {
165+
Files.deleteIfExists(tempFile);
166+
} catch (IOException ioe) {
167+
throw new MappingException("Error removing file '" + tempFile + "'!", ioe);
164168
}
165-
166-
private static String guessFileExtension(byte[] schema) {
167-
// Cut schema to a maximum of MAX_LENGTH_OF_HEADER characters.
168-
int length = Math.min(schema.length, MAX_LENGTH_OF_HEADER);
169-
String schemaAsString = new String(schema, 0, length);
170-
LOGGER.trace("Guess type for '{}'", schemaAsString);
171-
172-
Matcher m = JSON_FIRST_BYTE.matcher(schemaAsString);
173-
if (m.matches()) {
174-
return ".json";
175-
} else {
176-
m = XML_FIRST_BYTE.matcher(schemaAsString);
177-
if (m.matches()) {
178-
return ".xml";
179-
}
180-
}
181-
return null;
169+
}
170+
171+
/**
172+
* Guess the extension of the file from the first bytes using Apache Tika
173+
*
174+
* @param schema First bytes of the file.
175+
* @return Estimated extension. e.g. '.xml'
176+
*/
177+
private static String guessFileExtension(byte[] schema) {
178+
String returnValue = null;
179+
// Cut schema to a maximum of MAX_LENGTH_OF_HEADER characters.
180+
int length = Math.min(schema.length, MAX_LENGTH_OF_HEADER);
181+
String schemaAsString = new String(schema, 0, length);
182+
LOGGER.trace("Guess type for '{}'", schemaAsString);
183+
184+
Matcher m = JSON_FIRST_BYTE.matcher(schemaAsString);
185+
if (m.matches()) {
186+
returnValue = ".json";
187+
} else {
188+
m = XML_FIRST_BYTE.matcher(schemaAsString);
189+
if (m.matches()) {
190+
returnValue = ".xml";
191+
}
182192
}
183-
184-
/**
185-
* This method clones a git repository into the 'lib' folder.
186-
*
187-
* @param repositoryUrl the url of the repository to clone
188-
* @param branch the branch to clone
189-
* @return the path to the cloned repository
190-
*/
191-
public static Path cloneGitRepository(String repositoryUrl, String branch) {
192-
String target = "lib/" + repositoryUrl.trim().replace("https://", "").replace("http://", "").replace(".git", "") + "_" + branch;
193-
return cloneGitRepository(repositoryUrl, branch, target);
193+
if (returnValue == null) {
194+
// Use tika library to estimate extension
195+
LOGGER.trace("Use tika library to estimate extension.");
196+
Tika tika = new Tika();
197+
String mimeType;
198+
mimeType = tika.detect(schema);
199+
MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
200+
MimeType estimatedMimeType;
201+
try {
202+
estimatedMimeType = allTypes.forName(mimeType);
203+
returnValue = estimatedMimeType.getExtension(); // .jpg
204+
LOGGER.trace("Mimetype: '{}', Extension: '{}'", mimeType, returnValue);
205+
} catch (MimeTypeException ex) {
206+
LOGGER.error("Unknown mimetype '{}'", mimeType);
207+
}
194208
}
195-
196-
/**
197-
* This method clones a git repository into the 'lib' folder.
198-
*
199-
* @param repositoryUrl the url of the repository to clone
200-
* @param branch the branch to clone
201-
* @param targetFolder the target folder
202-
* @return the path to the cloned repository
203-
*/
204-
public static Path cloneGitRepository(String repositoryUrl, String branch, String targetFolder) {
205-
File target = new File(targetFolder);
206-
target.mkdirs();
207-
208-
LOGGER.info("Cloning branch '{}' of repository '{}' to '{}'", branch, repositoryUrl, target.getPath());
209-
try {
210-
Git.cloneRepository().setURI(repositoryUrl).setBranch(branch).setDirectory(target).call();
211-
} catch (JGitInternalException e) {
212-
LOGGER.info(e.getMessage());
213-
} catch (GitAPIException ex) {
214-
throw new MappingException("Error cloning git repository '" + repositoryUrl + "' to '" + target + "'!", ex);
215-
}
216-
217-
return target.toPath();
209+
return returnValue;
210+
}
211+
212+
/**
213+
* This method clones a git repository into the 'lib' folder.
214+
*
215+
* @param repositoryUrl the url of the repository to clone
216+
* @param branch the branch to clone
217+
* @return the path to the cloned repository
218+
*/
219+
public static Path cloneGitRepository(String repositoryUrl, String branch) {
220+
String target = "lib/" + repositoryUrl.trim().replace("https://", "").replace("http://", "").replace(".git", "") + "_" + branch;
221+
return cloneGitRepository(repositoryUrl, branch, target);
222+
}
223+
224+
/**
225+
* This method clones a git repository into the 'lib' folder.
226+
*
227+
* @param repositoryUrl the url of the repository to clone
228+
* @param branch the branch to clone
229+
* @param targetFolder the target folder
230+
* @return the path to the cloned repository
231+
*/
232+
public static Path cloneGitRepository(String repositoryUrl, String branch, String targetFolder) {
233+
File target = new File(targetFolder);
234+
target.mkdirs();
235+
236+
LOGGER.info("Cloning branch '{}' of repository '{}' to '{}'", branch, repositoryUrl, target.getPath());
237+
try {
238+
Git.cloneRepository().setURI(repositoryUrl).setBranch(branch).setDirectory(target).call();
239+
} catch (JGitInternalException e) {
240+
LOGGER.info(e.getMessage());
241+
} catch (GitAPIException ex) {
242+
throw new MappingException("Error cloning git repository '" + repositoryUrl + "' to '" + target + "'!", ex);
218243
}
244+
245+
return target.toPath();
246+
}
219247
}

0 commit comments

Comments
 (0)