diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java index b66ce1cf8..5a0cd3eef 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java @@ -3,6 +3,7 @@ package org.metafacture.biblio; +import org.metafacture.framework.FluxCommand; import org.metafacture.framework.MetafactureException; import org.metafacture.framework.ObjectReceiver; import org.metafacture.framework.annotations.Description; @@ -30,6 +31,7 @@ @Description("Opens an OAI-PMH stream and passes a reader to the receiver. Mandatory arguments are: BASE_URL, DATE_FROM, DATE_UNTIL, METADATA_PREFIX, SET_SPEC .") @In(String.class) @Out(java.io.Reader.class) +@FluxCommand("open-oaipmh") public final class OaiPmhOpener extends DefaultObjectPipe> { private String encoding = "UTF-8"; diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest_test.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest_test.java new file mode 100644 index 000000000..eb3445b6a --- /dev/null +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest_test.java @@ -0,0 +1,162 @@ +/*package org.metafacture.biblio; + +import org.metafacture.framework.ObjectReceiver; + +import com.github.tomakehurst.wiremock.client.MappingBuilder; +import com.github.tomakehurst.wiremock.client.ResponseDefinitionBuilder; +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.core.WireMockConfiguration; +import com.github.tomakehurst.wiremock.http.RequestMethod; +import com.github.tomakehurst.wiremock.junit.WireMockRule; +import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder; +import com.github.tomakehurst.wiremock.matching.StringValuePattern; +import com.github.tomakehurst.wiremock.matching.UrlPattern; +import org.junit.Assert; +import org.junit.ComparisonFailure; +import org.junit.Rule; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; + + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.util.Arrays; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + + +public final class SruOpenerTest { + +private StringBuilder resultCollector = new StringBuilder(); + private int resultCollectorsResetStreamCount; + private static final String RESPONSE_BODY = "response bödy"; // UTF-8 + private static final String TEST_URL = "/test/path"; + + + @Rule + public MockitoRule mockitoRule = MockitoJUnit.rule(); + + @Rule + public WireMockRule wireMockRule = new WireMockRule(WireMockConfiguration.wireMockConfig(). + jettyAcceptors(Runtime.getRuntime() + .availableProcessors()) + .dynamicPort()); + + @Mock + private ObjectReceiver receiver; + + public SruOpenerTest() { + } + + // @Test + public void test(){ + SruOpener sruOpener = new SruOpener(); + sruOpener.setReceiver(new ObjectReceiver () { + + @Override + public void process(final Reader obj) { + BufferedReader in = new BufferedReader(obj); + String line = null; + StringBuilder rslt = new StringBuilder(); + while (true) { + try { + if (!((line = in.readLine()) != null)) break; + } + catch (IOException e) { + throw new RuntimeException(e); + } + rslt.append(line); + } + String result = rslt.toString(); + if (result.length() > 768) { + System.out.println(rslt.toString().substring(768, 1024)); + } + else System.out.println("Antwort zu klein, gehe von ende der Anzhal der Records aus"); + resultCollector.append(obj); + } + + @Override + public void resetStream() { + ++resultCollectorsResetStreamCount; + } + + @Override + public void closeStream() { + + } + }); + + // sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("1890"); + sruOpener.setTotal("32"); + sruOpener.process("https://services.dnb.de/sru/dnb"); + // System.out.println(resultCollector.toString()); + } + + @Test + public void shouldPerformGetRequestWithInputAsUrlByDefault() throws IOException { + SruOpener sruOpener = new SruOpener(); + sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("1890"); + sruOpener.setTotal("32"); + shouldPerformRequest(TEST_URL,sruOpener); + } + + + mach lieber wie in metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java wiremock + private void shouldPerformRequest(String input, SruOpener sruOpener) throws IOException { // checkstyle-disable-line ParameterNumber + + final BiConsumer consumer; + final Consumer stubConsumer; + final Consumer requestConsumer; + final Consumer responseConsumer = null; + final String responseBody; + final ResponseDefinitionBuilder response = WireMock.ok().withBody(RESPONSE_BODY); + if (responseConsumer != null) { + responseConsumer.accept(response); + } + + final String baseUrl = wireMockRule.baseUrl(); + final String url = String.format(TEST_URL, baseUrl); + + final UrlPattern urlPattern = WireMock.urlPathEqualTo(TEST_URL); + + final SruOpener opener = new SruOpener(); + opener.setReceiver(receiver); + consumer.accept(opener, url); + + final MappingBuilder stub = WireMock.request("GET", urlPattern).willReturn(response); + if (stubConsumer != null) { + stubConsumer.accept(stub); + } + + final RequestPatternBuilder request = new RequestPatternBuilder(RequestMethod.fromString("GET"), urlPattern) + .withRequestBody(method.getRequestHasBody() ? WireMock.equalTo(REQUEST_BODY) : WireMock.absent()); + if (requestConsumer != null) { + requestConsumer.accept(request); + } + + WireMock.stubFor(stub); + + opener.process(String.format(input, baseUrl)); + + // use the opener a second time in a workflow: + opener.process(String.format(input, baseUrl)); + + opener.closeStream(); + + + WireMock.verify(request); + } + + +} +*/ \ No newline at end of file diff --git a/metafacture-flux/build.gradle b/metafacture-flux/build.gradle index 8a4919845..80698d15b 100644 --- a/metafacture-flux/build.gradle +++ b/metafacture-flux/build.gradle @@ -23,6 +23,8 @@ dependencies { api project(':metafacture-framework') implementation project(':metafacture-commons') implementation project(':metafacture-io') + implementation project(':metafacture-biblio') + implementation project(':metafacture-xml') testRuntimeOnly project(':metafacture-plumbing') antlr "org.antlr:antlr:${versions.antlr}" testImplementation "junit:junit:${versions.junit}" diff --git a/metafacture-io/build.gradle b/metafacture-io/build.gradle index 4f1363d6c..a5c054f29 100644 --- a/metafacture-io/build.gradle +++ b/metafacture-io/build.gradle @@ -19,6 +19,8 @@ description = 'Modules for reading and writing data streams' dependencies { api project(':metafacture-framework') + api project(':metafacture-formatting') + api project(':metafacture-xml') implementation project(':metafacture-commons') implementation "commons-io:commons-io:${versions.commons_io}" implementation "org.apache.commons:commons-compress:${versions.commons_compress}" diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java new file mode 100644 index 000000000..79c6516d1 --- /dev/null +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -0,0 +1,234 @@ +/* Copyright 2013 Pascal Christoph. + * Licensed under the Eclipse Public License 1.0 */ + +package org.metafacture.io; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultObjectPipe; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.xml.sax.SAXException; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringWriter; +import java.net.HttpURLConnection; +import java.net.URL; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +/** + * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. Pages through the SRU. + * + * @author Pascal Christoph (dr0i) + */ +@Description( + "Opens a SRU stream and passes a reader to the receiver. The input is the base URL of the SRU service " + + "to be retrieved from. Mandatory argument is: QUERY.\n" + + "The output is an XML document holding the user defined \"maximumRecords\" as documents. If there are" + + "more documents than defined by MAXIMUM_RECORDS and there are more documents wanted (defined by " + + "\"totalRecords\") there will be consecutive XML documents output as it pages through the SRU.") +@In(String.class) +@Out(java.io.Reader.class) +@FluxCommand("open-sru") +public final class SruOpener extends DefaultObjectPipe> { + + private static final String OPERATION = "searchRetrieve"; + private static final String RECORD_SCHEMA = "MARC21-xml"; + private static final String USER_AGENT = ""; + private static final String VERSION = "2.0"; + + private static final int CONNECTION_TIMEOUT = 11000; + private static final int MAXIMUM_RECORDS = 10; + private static final int START_RECORD = 1; + private String operation = OPERATION; + private String query; + private String recordSchema = RECORD_SCHEMA; + private String userAgent = USER_AGENT; + private String version = VERSION; + + private int maximumRecords = MAXIMUM_RECORDS; + private int startRecord = START_RECORD; + private int totalRecords = Integer.MAX_VALUE; + int numberOfRecords = Integer.MAX_VALUE; + + private boolean stopRetrieving; + private int recordsRetrieved; + + private String xmlDeclarationTemplate = ""; + private String xmlDeclaration; + + /** + * Default constructor + */ + public SruOpener() { + } + + /** + * Sets the User Agent to use. Default value: {@value USER_AGENT}. + * + * @param userAgent a user agent to be used when opening a URL + */ + public void setUserAgent(final String userAgent) { + this.userAgent = userAgent; + } + + /** + * Sets the query of the search. + * Setting a query is mandatory. + * + * @param query the query + */ + + public void setQuery(final String query) { + this.query = query; + } + + /** + * Sets total number of records to be retrieved. Default value: indefinite (as in "all") + * . + * + * @param totalRecords total number of records to be retrieved + */ + public void setTotal(final String totalRecords) { + this.totalRecords = Integer.parseInt(totalRecords); + } + + /** + * Sets the maximum of records returned in one lookup. Default value: {@value MAXIMUM_RECORDS}. + * The lookup is repeated as long as {@link #maximumRecords} is lesser than {@link #totalRecords}. + * + * @param maximumRecords maximum of records returned in one lookup + */ + public void setMaximumRecords(final String maximumRecords) { + this.maximumRecords = Integer.parseInt(maximumRecords); + } + + /** + * Sets where to start when retrieving records. Default value: {@value START_RECORD}. + * + * @param startRecord where to start when retrieving records + */ + public void setStartRecord(final String startRecord) { + this.startRecord = Integer.parseInt(startRecord); + } + + /** + * Sets the format of the retrieved record data. Default value: {@value RECORD_SCHEMA}. + * + * @param recordSchema the format of the data of the records + */ + public void setRecordSchema(final String recordSchema) { + this.recordSchema = recordSchema; + } + + /** + * Sets the kind of operation of the lookup. Default value: {@value OPERATION}. + * + * @param operation the kind of operation of the lookup + */ + public void setOperation(final String operation) { + this.operation = operation; + } + + /** + * Sets the version of the lookup. Default value: {@value VERSION}. + * + * @param version the version of the lookup + */ + public void setVersion(final String version) { + this.version = version; + } + + @Override + public void process(final String baseUrl) { + + StringBuilder srUrl = new StringBuilder(baseUrl); + if (query != null) { + srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=") + .append(recordSchema).append("&version=").append(version); + } + else { + throw new IllegalArgumentException("Missing mandatory parameter 'query'"); + } + + while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) { + InputStream inputStream = getXmlDocsViaSru(srUrl); + getReceiver().process(new InputStreamReader(inputStream)); + } + + } + + private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { + try { + InputStream inputStreamOfURl = retrieveUrl(srUrl, startRecord, maximumRecords); + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder docBuilder = factory.newDocumentBuilder(); + Document xmldoc = docBuilder.parse(inputStreamOfURl); + + Transformer t = TransformerFactory.newInstance().newTransformer(); + StringWriter stringWriter = new StringWriter(); + t.transform(new DOMSource(xmldoc), new StreamResult(stringWriter)); + + numberOfRecords = getIntegerValueFromElement(xmldoc,"numberOfRecords", 0); + int recordPosition = getIntegerValueFromElement(xmldoc,"recordPosition", 0); + int nextRecordPosition = getIntegerValueFromElement(xmldoc,"nextRecordPosition", totalRecords); + + recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; + startRecord = nextRecordPosition; // grenzwert : wenn maximumRcords > als in echt + + return new ByteArrayInputStream(stringWriter.toString().getBytes()); + + } + catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + throw new MetafactureException(e); + } + } + + private int getIntegerValueFromElement(final Document xmlDoc, final String tagName, final int fallback) { + Node node = xmlDoc.getElementsByTagName(tagName).item(0); + if (node != null) { + return Integer.parseInt(node.getTextContent()); + } + return fallback; + } + + private InputStream retrieveUrl(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { + final URL urlToOpen = + new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); + final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); + + connection.setConnectTimeout(CONNECTION_TIMEOUT); + if (!userAgent.isEmpty()) { + connection.setRequestProperty("User-Agent", userAgent); + } + InputStream inputStream = getInputStream(connection); + + return inputStream; + } + + private InputStream getInputStream(final HttpURLConnection connection) { + try { + return connection.getInputStream(); + } + catch (final IOException e) { + stopRetrieving = true; + return connection.getErrorStream(); + } + } + +} diff --git a/metafacture-io/src/main/resources/flux-commands.properties b/metafacture-io/src/main/resources/flux-commands.properties index 39540d47e..e69c03c40 100644 --- a/metafacture-io/src/main/resources/flux-commands.properties +++ b/metafacture-io/src/main/resources/flux-commands.properties @@ -22,3 +22,4 @@ write org.metafacture.io.ObjectWriter as-records org.metafacture.io.RecordReader open-resource org.metafacture.io.ResourceOpener open-tar org.metafacture.io.TarReader +open-sru org.metafacture.io.SruOpener diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java new file mode 100644 index 000000000..883db268b --- /dev/null +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -0,0 +1,56 @@ +package org.metafacture.io; + +import org.junit.Test; + +public class SruOpenerTest { + + private StringBuilder resultCollector = new StringBuilder(); + private int resultCollectorsResetStreamCount; + + + @Test + public void test(){ + SruOpener sruOpener = new SruOpener(); + RecordReader recordReader = new RecordReader(); + recordReader.setReceiver(new ObjectStdoutWriter()); + sruOpener.setReceiver(recordReader);// { + + + /* @Override + public void process(final XmlReceiver obj) { + BufferedReader in = new BufferedReader(obj); + String line = null; + StringBuilder rslt = new StringBuilder(); + while (true) { + try { + if (!((line = in.readLine()) != null)) break; + } + catch (IOException e) { + throw new RuntimeException(e); + } + rslt.append(line); + }*/ + /* StreamLiteralFormatter streamLiteralFormatter = new StreamLiteralFormatter(); + ObjectStdoutWriter objectStdoutWriter = new ObjectStdoutWriter(); + XmlElementSplitter xmlElementSplitter = new XmlElementSplitter(); + streamLiteralFormatter.setReceiver(objectStdoutWriter); + xmlElementSplitter.setReceiver(streamLiteralFormatter); + xmlDecoder.setReceiver(xmlElementSplitter);*/ + // System.out.println(rslt.toString()); + // resultCollector.append(obj); + //} + + sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + // sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("3029"); + sruOpener.setMaximumRecords("1"); + sruOpener.setTotal("1"); + // sruOpener.process("https://services.dnb.de/sru/dnb"); + sruOpener.process("https://services.dnb.de/sru/zdb"); + // sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); + + // System.out.println(resultCollector.toString()); + } +}