From 3986e5c1170f384c3e7e6a1092f058adc779b68e Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 24 Mar 2025 14:51:30 +0100 Subject: [PATCH 01/13] WIP SRUopener --- .../org/metafacture/biblio/SruOpener.java | 199 ++++++++++++++++++ .../org/metafacture/biblio/SruOpenerTest.java | 59 ++++++ 2 files changed, 258 insertions(+) create mode 100644 metafacture-biblio/src/main/java/org/metafacture/biblio/SruOpener.java create mode 100644 metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest.java diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/SruOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/SruOpener.java new file mode 100644 index 000000000..8b2833cea --- /dev/null +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/SruOpener.java @@ -0,0 +1,199 @@ +/* Copyright 2013 Pascal Christoph. + * Licensed under the Eclipse Public License 1.0 */ + +package org.metafacture.biblio; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultObjectPipe; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.net.HttpURLConnection; +import java.net.URL; + +/** + * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. + * The input should be the base URL of the SRU service to be retrieved from. + * + * @author Pascal Christoph (dr0i) + */ +@Description("Opens a SRU stream and passes a reader to the receiver. The input should be the base URL of the SRU service to be retrieved from. Mandatory argument is: QUERY.") +@In(String.class) +@Out(Reader.class) +@FluxCommand("open-sru") +public final class SruOpener extends DefaultObjectPipe> { + + private static final String OPERATION = "searchRetrieve"; + private static final String RECORD_SCHEMA = "MARC21-xml"; + private static final String USER_AGENT = ""; + private static final String VERSION = "2.0"; + + private static final int CONNECTION_TIMEOUT = 11000; + private static final int MAXIMUM_RECORDS = 10; + private static final int START_RECORD = 1; + + private String operation = OPERATION; + private String query; + private String recordSchema = RECORD_SCHEMA; + private String userAgent = USER_AGENT; + private String version = VERSION; + + private int maximumRecords = MAXIMUM_RECORDS; + private int startRecord = START_RECORD; + private int totalRecords; + + private boolean stopRetrieving; + + + /** + * Creates an instance of {@link SruOpener} + */ + public SruOpener() { + } + + /** + * Sets the User Agent to use. Default value: {@value USER_AGENT}. + * + * @param userAgent a user agent to be used when opening a URL + */ + public void setUserAgent(final String userAgent) { + this.userAgent = userAgent; + } + + /** + * Sets the query of the search. + * Setting a query is mandatory. + * + * @param query the query + */ + + public void setQuery(final String query) { + this.query = query; + } + + /** + * Sets total number of records to be retrieved. Default value: indefinite (as in "all"). + * + * @param totalRecords total number of records to be retrieved + */ + public void setTotal(final String totalRecords) { + this.totalRecords = Integer.parseInt(totalRecords); + } + + /** + * Sets the maximum of records returned in one lookup. Default value: {@value MAXIMUM_RECORDS}. + * The lookup is repeated as long as {@link #maximumRecords} is lesser than {@link #totalRecords}. + * + * @param maximumRecords maximum of records returned in one lookup + */ + public void setMaximumRecords(final String maximumRecords) { + this.maximumRecords = Integer.parseInt(maximumRecords); + } + + /** + * Sets where to start when retrieving records. Default value: {@value START_RECORD}. + * + * @param startRecord where to start when retrieving records + */ + public void setStartRecord(final String startRecord) { + this.startRecord = Integer.parseInt(startRecord); + } + + /** + * Sets the format of the retrieved record data. Default value: {@value RECORD_SCHEMA}. + * + * @param recordSchema the format of the data of the records + */ + public void setRecordSchema(final String recordSchema) { + this.recordSchema = recordSchema; + } + + /** + * Sets the kind of operation of the lookup. Default value: {@value OPERATION}. + * + * @param operation the kind of operation of the lookup + */ + public void setOperation(final String operation) { + this.operation = operation; + } + + /** + * Sets the version of the lookup. Default value: {@value VERSION}. + * + * @param version the version of the lookup + */ + public void setVersion(final String version) { + this.version = version; + } + + @Override + public void process(final String baseUrl) { + + try { + + StringBuilder srUrl = new StringBuilder(baseUrl); + if (query != null) { + srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=").append(recordSchema).append("&version=").append(version); + } + else { + throw new IllegalArgumentException("Missing mandatory parameter 'query'"); + } + int retrievedRecords = 0; + while (!stopRetrieving && (totalRecords==0 || retrievedRecords < totalRecords)) { + if (totalRecords >0) { + int yetToRetrieveRecords = retrievedRecords - totalRecords; + if (yetToRetrieveRecords > maximumRecords) { + maximumRecords = yetToRetrieveRecords; + } + } + retrieve(srUrl, startRecord); //todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr) + startRecord = startRecord + maximumRecords; + retrievedRecords = retrievedRecords + maximumRecords; + } + } + catch (final IOException e) { + throw new MetafactureException(e); + } + } + + private void retrieve(StringBuilder srUrl, int startRecord) throws IOException { + final URL urlToOpen = new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords+"&startRecord=" + startRecord); + final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); + + connection.setConnectTimeout(CONNECTION_TIMEOUT); + if (!userAgent.isEmpty()) { + connection.setRequestProperty("User-Agent", userAgent); + } + InputStream istream = getInputStream(connection); + try ( + InputStreamReader inputStreamReader = new InputStreamReader(istream); + ) { + System.out.println("srUrl="+srUrl); + System.out.println("startRecord="+startRecord); + System.out.println("istream.length="+istream.available()); + if (istream.available() < 768){ // we take it that this is a result without a record + stopRetrieving = true; + } + + getReceiver().process(inputStreamReader); + } + } + + private InputStream getInputStream(final HttpURLConnection connection) { + try { + return connection.getInputStream(); + } + catch (final IOException e) { + stopRetrieving = true; + return connection.getErrorStream(); + } + } + +} diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest.java new file mode 100644 index 000000000..484ab498d --- /dev/null +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest.java @@ -0,0 +1,59 @@ +package org.metafacture.biblio; + +import org.junit.Test; +import org.metafacture.framework.ObjectReceiver; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; + +public class SruOpenerTest { + + private StringBuilder resultCollector = new StringBuilder(); + private int resultCollectorsResetStreamCount; + + + @Test + public void test(){ + SruOpener sruOpener = new SruOpener(); + sruOpener.setReceiver(new ObjectReceiver () { + + @Override + public void process(final Reader obj) { + BufferedReader in = new BufferedReader(obj); + String line = null; + StringBuilder rslt = new StringBuilder(); + while (true) { + try { + if (!((line = in.readLine()) != null)) break; + } + catch (IOException e) { + throw new RuntimeException(e); + } + rslt.append(line); + } + System.out.println(rslt.toString()); + resultCollector.append(obj); + } + + @Override + public void resetStream() { + ++resultCollectorsResetStreamCount; + } + + @Override + public void closeStream() { + + } + }); + + // sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("1890"); + sruOpener.process("https://services.dnb.de/sru/dnb"); +System.out.println(resultCollector.toString()); + System.out.println(resultCollector.toString()); + } +} From 1f2cb3dd39302f207136d13b8fadf1d01fec2207 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 27 Mar 2025 15:23:01 +0100 Subject: [PATCH 02/13] WIP --- .../src/main/java/org/metafacture/biblio/OaiPmhOpener.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java index b66ce1cf8..5a0cd3eef 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java @@ -3,6 +3,7 @@ package org.metafacture.biblio; +import org.metafacture.framework.FluxCommand; import org.metafacture.framework.MetafactureException; import org.metafacture.framework.ObjectReceiver; import org.metafacture.framework.annotations.Description; @@ -30,6 +31,7 @@ @Description("Opens an OAI-PMH stream and passes a reader to the receiver. Mandatory arguments are: BASE_URL, DATE_FROM, DATE_UNTIL, METADATA_PREFIX, SET_SPEC .") @In(String.class) @Out(java.io.Reader.class) +@FluxCommand("open-oaipmh") public final class OaiPmhOpener extends DefaultObjectPipe> { private String encoding = "UTF-8"; From c3f3ad65345ae218675282fa2989e2b0c2411559 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 10 Apr 2025 15:31:10 +0200 Subject: [PATCH 03/13] Refactor WIP --- metafacture-io/build.gradle | 2 + .../java/org/metafacture/io}/SruOpener.java | 63 ++++++++++++++----- .../org/metafacture/io}/SruOpenerTest.java | 30 ++++++--- 3 files changed, 73 insertions(+), 22 deletions(-) rename {metafacture-biblio/src/main/java/org/metafacture/biblio => metafacture-io/src/main/java/org/metafacture/io}/SruOpener.java (75%) rename {metafacture-biblio/src/test/java/org/metafacture/biblio => metafacture-io/src/test/java/org/metafacture/io}/SruOpenerTest.java (55%) diff --git a/metafacture-io/build.gradle b/metafacture-io/build.gradle index 4f1363d6c..a5c054f29 100644 --- a/metafacture-io/build.gradle +++ b/metafacture-io/build.gradle @@ -19,6 +19,8 @@ description = 'Modules for reading and writing data streams' dependencies { api project(':metafacture-framework') + api project(':metafacture-formatting') + api project(':metafacture-xml') implementation project(':metafacture-commons') implementation "commons-io:commons-io:${versions.commons_io}" implementation "org.apache.commons:commons-compress:${versions.commons_compress}" diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java similarity index 75% rename from metafacture-biblio/src/main/java/org/metafacture/biblio/SruOpener.java rename to metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 8b2833cea..bb554a33a 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -1,22 +1,27 @@ /* Copyright 2013 Pascal Christoph. * Licensed under the Eclipse Public License 1.0 */ -package org.metafacture.biblio; +package org.metafacture.io; import org.metafacture.framework.FluxCommand; import org.metafacture.framework.MetafactureException; -import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.XmlReceiver; import org.metafacture.framework.annotations.Description; import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; +import org.xml.sax.XMLReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; import java.net.HttpURLConnection; import java.net.URL; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; /** * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. @@ -26,9 +31,9 @@ */ @Description("Opens a SRU stream and passes a reader to the receiver. The input should be the base URL of the SRU service to be retrieved from. Mandatory argument is: QUERY.") @In(String.class) -@Out(Reader.class) +@Out(XmlReceiver.class) @FluxCommand("open-sru") -public final class SruOpener extends DefaultObjectPipe> { +public final class SruOpener extends DefaultObjectPipe { private static final String OPERATION = "searchRetrieve"; private static final String RECORD_SCHEMA = "MARC21-xml"; @@ -38,6 +43,7 @@ public final class SruOpener extends DefaultObjectPipe () { + sruOpener.setReceiver(new ObjectReceiver () { + @Override - public void process(final Reader obj) { - BufferedReader in = new BufferedReader(obj); + public void process(final XmlReceiver obj) { + /* BufferedReader in = new BufferedReader(obj); String line = null; StringBuilder rslt = new StringBuilder(); while (true) { @@ -31,8 +37,16 @@ public void process(final Reader obj) { throw new RuntimeException(e); } rslt.append(line); - } - System.out.println(rslt.toString()); + }*/ + StreamLiteralFormatter streamLiteralFormatter = new StreamLiteralFormatter(); + ObjectStdoutWriter objectStdoutWriter = new ObjectStdoutWriter(); + XmlElementSplitter xmlElementSplitter = new XmlElementSplitter(); + streamLiteralFormatter.setReceiver(objectStdoutWriter); + xmlElementSplitter.setReceiver(streamLiteralFormatter); + xmlDecoder.setReceiver(xmlElementSplitter); + + + // System.out.println(rslt.toString()); resultCollector.append(obj); } @@ -52,7 +66,9 @@ public void closeStream() { sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); sruOpener.setStartRecord("1890"); - sruOpener.process("https://services.dnb.de/sru/dnb"); + // sruOpener.process("https://services.dnb.de/sru/dnb"); + sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); + System.out.println(resultCollector.toString()); System.out.println(resultCollector.toString()); } From 26640fa6e9456bc6c156759719a5ac2554588589 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 24 Apr 2025 15:59:34 +0200 Subject: [PATCH 04/13] WIP and working --- .../java/org/metafacture/io/SruOpener.java | 97 +++++++++---------- .../org/metafacture/io/SruOpenerTest.java | 59 ++++------- 2 files changed, 66 insertions(+), 90 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index bb554a33a..ffe258d6c 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -5,23 +5,31 @@ import org.metafacture.framework.FluxCommand; import org.metafacture.framework.MetafactureException; -import org.metafacture.framework.XmlReceiver; +import org.metafacture.framework.ObjectReceiver; import org.metafacture.framework.annotations.Description; import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; -import org.xml.sax.InputSource; +import org.w3c.dom.Document; import org.xml.sax.SAXException; -import org.xml.sax.SAXNotRecognizedException; -import org.xml.sax.SAXNotSupportedException; -import org.xml.sax.XMLReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import java.net.HttpURLConnection; import java.net.URL; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.Result; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; /** * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. @@ -31,9 +39,9 @@ */ @Description("Opens a SRU stream and passes a reader to the receiver. The input should be the base URL of the SRU service to be retrieved from. Mandatory argument is: QUERY.") @In(String.class) -@Out(XmlReceiver.class) +@Out(java.io.Reader.class) @FluxCommand("open-sru") -public final class SruOpener extends DefaultObjectPipe { +public final class SruOpener extends DefaultObjectPipe> { private static final String OPERATION = "searchRetrieve"; private static final String RECORD_SCHEMA = "MARC21-xml"; @@ -43,8 +51,6 @@ public final class SruOpener extends DefaultObjectPipe { private static final int CONNECTION_TIMEOUT = 11000; private static final int MAXIMUM_RECORDS = 10; private static final int START_RECORD = 1; - private final XMLReader saxReader; - private String operation = OPERATION; private String query; private String recordSchema = RECORD_SCHEMA; @@ -59,17 +65,9 @@ public final class SruOpener extends DefaultObjectPipe { /** - * Creates an instance of {@link SruOpener} + * Default constructor */ public SruOpener() { - try { - final SAXParserFactory parserFactory = SAXParserFactory.newInstance(); - parserFactory.setNamespaceAware(true); - saxReader = parserFactory.newSAXParser().getXMLReader(); - } - catch (final ParserConfigurationException | SAXException e) { - throw new MetafactureException(e); - } } /** @@ -160,24 +158,42 @@ public void process(final String baseUrl) { throw new IllegalArgumentException("Missing mandatory parameter 'query'"); } int retrievedRecords = 0; - while (!stopRetrieving && (totalRecords==0 || retrievedRecords < totalRecords)) { - if (totalRecords >0) { - int yetToRetrieveRecords = retrievedRecords - totalRecords; - if (yetToRetrieveRecords > maximumRecords) { + while (!stopRetrieving && (retrievedRecords < totalRecords)) { + if (totalRecords >0) { + int yetToRetrieveRecords = totalRecords - retrievedRecords; + if (yetToRetrieveRecords < maximumRecords) { maximumRecords = yetToRetrieveRecords; } } - retrieve(srUrl, startRecord); //todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr) + ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords); + + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer t = tf.newTransformer(); + DocumentBuilderFactory factory =DocumentBuilderFactory.newInstance(); + DocumentBuilder docBuilder = factory.newDocumentBuilder(); + Document xmldoc = docBuilder.parse(byteArrayInputStream); + + ByteArrayOutputStream os = new ByteArrayOutputStream(); + Result result = new StreamResult(os); + t.transform(new DOMSource(xmldoc), result); + + ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray()); + + getReceiver().process( + new InputStreamReader(inputStream)); + t.setOutputProperty("omit-xml-declaration", "yes"); + //todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr) startRecord = startRecord + maximumRecords; retrievedRecords = retrievedRecords + maximumRecords; } } - catch (final IOException e) { + catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { throw new MetafactureException(e); } + } - private void retrieve(StringBuilder srUrl, int startRecord) throws IOException { + private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { final URL urlToOpen = new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords+"&startRecord=" + startRecord); final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); @@ -186,22 +202,16 @@ private void retrieve(StringBuilder srUrl, int startRecord) throws IOException { connection.setRequestProperty("User-Agent", userAgent); } InputStream inputStream = getInputStream(connection); - try { - InputSource inputSource = new InputSource(inputStream); - saxReader.parse(inputSource); - // String sr = saxReader.getProperty("huhu").toString(); - // System.out.println(sr); - } - catch (final IOException | SAXException e) { - throw new MetafactureException(e); - } + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + System.out.println("srUrl="+srUrl); System.out.println("startRecord="+startRecord); System.out.println("istream.length="+inputStream.available()); if (inputStream.available() < 768){ // we take it that this is a result without a record stopRetrieving = true; } - // getReceiver().process(saxReader); + inputStream.transferTo(outputStream); + return new ByteArrayInputStream(outputStream.toByteArray()); } private InputStream getInputStream(final HttpURLConnection connection) { @@ -214,19 +224,4 @@ private InputStream getInputStream(final HttpURLConnection connection) { } } - private static final String SAX_PROPERTY_LEXICAL_HANDLER = "http://xml.org/sax/properties/lexical-handler"; - @Override - protected void onSetReceiver() { - saxReader.setContentHandler(getReceiver()); - saxReader.setDTDHandler(getReceiver()); - saxReader.setEntityResolver(getReceiver()); - saxReader.setErrorHandler(getReceiver()); - try { - saxReader.setProperty(SAX_PROPERTY_LEXICAL_HANDLER, getReceiver()); - } - catch (final SAXNotRecognizedException | SAXNotSupportedException e) { - throw new MetafactureException(e); - } - } - } diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index 4da881cb9..dd1cc5ef5 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -1,16 +1,6 @@ package org.metafacture.io; import org.junit.Test; -import org.metafacture.formatting.StreamLiteralFormatter; -import org.metafacture.framework.ObjectReceiver; -import org.metafacture.framework.XmlReceiver; -import org.metafacture.xml.XmlDecoder; -import org.metafacture.xml.XmlElementSplitter; -import org.xml.sax.XMLReader; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; public class SruOpenerTest { @@ -21,12 +11,14 @@ public class SruOpenerTest { @Test public void test(){ SruOpener sruOpener = new SruOpener(); - sruOpener.setReceiver(new ObjectReceiver () { + RecordReader recordReader = new RecordReader(); + recordReader.setReceiver(new ObjectStdoutWriter()); + sruOpener.setReceiver(recordReader);// { - @Override + /* @Override public void process(final XmlReceiver obj) { - /* BufferedReader in = new BufferedReader(obj); + BufferedReader in = new BufferedReader(obj); String line = null; StringBuilder rslt = new StringBuilder(); while (true) { @@ -38,38 +30,27 @@ public void process(final XmlReceiver obj) { } rslt.append(line); }*/ - StreamLiteralFormatter streamLiteralFormatter = new StreamLiteralFormatter(); + /* StreamLiteralFormatter streamLiteralFormatter = new StreamLiteralFormatter(); ObjectStdoutWriter objectStdoutWriter = new ObjectStdoutWriter(); XmlElementSplitter xmlElementSplitter = new XmlElementSplitter(); streamLiteralFormatter.setReceiver(objectStdoutWriter); xmlElementSplitter.setReceiver(streamLiteralFormatter); - xmlDecoder.setReceiver(xmlElementSplitter); - - - // System.out.println(rslt.toString()); - resultCollector.append(obj); - } - - @Override - public void resetStream() { - ++resultCollectorsResetStreamCount; - } + xmlDecoder.setReceiver(xmlElementSplitter);*/ + // System.out.println(rslt.toString()); + // resultCollector.append(obj); + //} - @Override - public void closeStream() { - - } - }); - - // sruOpener.setQuery("dnb.isil%3DDE-Sol1"); - sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - sruOpener.setStartRecord("1890"); - // sruOpener.process("https://services.dnb.de/sru/dnb"); - sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); - -System.out.println(resultCollector.toString()); - System.out.println(resultCollector.toString()); + // sruOpener.setStartRecord("5"); + sruOpener.setMaximumRecords("5"); + sruOpener.setTotal("6"); + // sruOpener.process("https://services.dnb.de/sru/dnb"); + sruOpener.process("https://services.dnb.de/sru/zdb"); + // sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); + +// System.out.println(resultCollector.toString()); } } From 33aeead7f4daeca54c292f3d37961e37efdd8744 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 24 Apr 2025 16:53:47 +0200 Subject: [PATCH 05/13] WIP parsing working --- .../java/org/metafacture/io/SruOpener.java | 28 +++++++++++-------- .../org/metafacture/io/SruOpenerTest.java | 2 +- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index ffe258d6c..3a68e54aa 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -11,6 +11,7 @@ import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; import org.w3c.dom.Document; +import org.w3c.dom.Element; import org.xml.sax.SAXException; import java.io.ByteArrayInputStream; @@ -59,7 +60,7 @@ public final class SruOpener extends DefaultObjectPipe0) { - int yetToRetrieveRecords = totalRecords - retrievedRecords; + int numberOfRecords = Integer.MAX_VALUE; + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer t = tf.newTransformer(); + while (!stopRetrieving && (startRecord < numberOfRecords)) { + /* if (totalRecords >0) { + yetToRetrieveRecords = totalRecords - retrievedRecords; if (yetToRetrieveRecords < maximumRecords) { maximumRecords = yetToRetrieveRecords; } - } + }*/ ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords); - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer t = tf.newTransformer(); + DocumentBuilderFactory factory =DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = factory.newDocumentBuilder(); Document xmldoc = docBuilder.parse(byteArrayInputStream); - ByteArrayOutputStream os = new ByteArrayOutputStream(); + Element element = (Element)xmldoc.getElementsByTagName("numberOfRecords").item(0); + numberOfRecords=Integer.parseInt(element.getTextContent()); + + ByteArrayOutputStream os = new ByteArrayOutputStream(); Result result = new StreamResult(os); t.transform(new DOMSource(xmldoc), result); - ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray()); getReceiver().process( new InputStreamReader(inputStream)); + tf = TransformerFactory.newInstance(); + t = tf.newTransformer(); t.setOutputProperty("omit-xml-declaration", "yes"); - //todo: bis max lookup zuviel (bis der nämlich sehr klein ist => keine Ergebnisse mehr) startRecord = startRecord + maximumRecords; - retrievedRecords = retrievedRecords + maximumRecords; } } catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index dd1cc5ef5..2ba801d41 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -44,7 +44,7 @@ public void process(final XmlReceiver obj) { // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - // sruOpener.setStartRecord("5"); + sruOpener.setStartRecord("3029"); sruOpener.setMaximumRecords("5"); sruOpener.setTotal("6"); // sruOpener.process("https://services.dnb.de/sru/dnb"); From 1896c17d60d04be91de01d0f7f524e46c13ff15c Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 25 Apr 2025 17:12:41 +0200 Subject: [PATCH 06/13] WIP https://github.com/metafacture/metafacture-core/issues/510#issuecomment-2830696136 --- .../java/org/metafacture/io/SruOpener.java | 133 ++++++++++-------- .../org/metafacture/io/SruOpenerTest.java | 6 +- 2 files changed, 81 insertions(+), 58 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 3a68e54aa..3b3247450 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -14,21 +14,13 @@ import org.w3c.dom.Element; import org.xml.sax.SAXException; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; +import java.io.*; import java.net.HttpURLConnection; import java.net.URL; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.Result; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; +import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; @@ -61,9 +53,13 @@ public final class SruOpener extends DefaultObjectPipe"; + private String xmlDeclaration; /** * Default constructor @@ -149,51 +145,83 @@ public void setVersion(final String version) { @Override public void process(final String baseUrl) { - try { + StringBuilder srUrl = new StringBuilder(baseUrl); + if (query != null) { + srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=") + .append(recordSchema).append("&version=").append(version); + } else { + throw new IllegalArgumentException("Missing mandatory parameter 'query'"); + } - StringBuilder srUrl = new StringBuilder(baseUrl); - if (query != null) { - srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=").append(recordSchema).append("&version=").append(version); - } - else { - throw new IllegalArgumentException("Missing mandatory parameter 'query'"); - } - int numberOfRecords = Integer.MAX_VALUE; - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer t = tf.newTransformer(); - while (!stopRetrieving && (startRecord < numberOfRecords)) { - /* if (totalRecords >0) { - yetToRetrieveRecords = totalRecords - retrievedRecords; - if (yetToRetrieveRecords < maximumRecords) { - maximumRecords = yetToRetrieveRecords; + try { + //get first document and add a starting root tag + Transformer t = TransformerFactory.newInstance().newTransformer(); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(getXmlDocsViaSru(srUrl))); + String line; + StringBuilder stringBuilder = new StringBuilder(1024 * 1024); + boolean rootTagAdded = false; + while ((line = bufferedReader.readLine()) != null) { + if(!rootTagAdded) { + if (line.matches(".*searchRetrieveResponse.*")) { + stringBuilder.append(xmlDeclaration+"\n"); + stringBuilder.append("\n"); + rootTagAdded = true; } - }*/ - ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords); - - - DocumentBuilderFactory factory =DocumentBuilderFactory.newInstance(); - DocumentBuilder docBuilder = factory.newDocumentBuilder(); - Document xmldoc = docBuilder.parse(byteArrayInputStream); - - Element element = (Element)xmldoc.getElementsByTagName("numberOfRecords").item(0); - numberOfRecords=Integer.parseInt(element.getTextContent()); - - ByteArrayOutputStream os = new ByteArrayOutputStream(); - Result result = new StreamResult(os); - t.transform(new DOMSource(xmldoc), result); - ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray()); - - getReceiver().process( - new InputStreamReader(inputStream)); - tf = TransformerFactory.newInstance(); - t = tf.newTransformer(); - t.setOutputProperty("omit-xml-declaration", "yes"); - startRecord = startRecord + maximumRecords; + } + stringBuilder.append(line+"\n"); + } + getReceiver().process(new InputStreamReader(new ByteArrayInputStream(stringBuilder.toString().getBytes()))); + while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) { + InputStream inputStream = getXmlDocsViaSru(srUrl); + getReceiver().process(new InputStreamReader(inputStream)); } + //close root tag + getReceiver().process(new InputStreamReader(new ByteArrayInputStream("\n\n".getBytes()))); } - catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + catch (TransformerConfigurationException | IOException e) { throw new MetafactureException(e); } + } + + private InputStream getXmlDocsViaSru(final StringBuilder srUrl ){ + try { + ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords); + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder docBuilder = factory.newDocumentBuilder(); + Document xmldoc = docBuilder.parse(byteArrayInputStream); + + /* Element newRoot = xmldoc.createElement("harvest"); + newRoot.appendChild(xmldoc.getFirstChild()); + xmldoc.appendChild(newRoot);*/ + + numberOfRecords = + Integer.parseInt(((Element) xmldoc.getElementsByTagName("numberOfRecords").item(0)).getTextContent()); + int recordPosition = + Integer.parseInt(((Element) xmldoc.getElementsByTagName("recordPosition").item(0)).getTextContent()); + int nextRecordPosition = + Integer.parseInt(((Element) xmldoc.getElementsByTagName("nextRecordPosition").item(0)).getTextContent()); + + String xmlEncoding = xmldoc.getXmlEncoding(); + String xmlVersion = xmldoc.getXmlVersion(); + // + xmlDeclaration=String.format(xmlDeclarationTemplate,xmldoc.getXmlVersion(),xmldoc.getXmlEncoding()); + recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; + + ByteArrayOutputStream os = new ByteArrayOutputStream(); + + Result result = new StreamResult(os); + Transformer t = TransformerFactory.newInstance().newTransformer(); + t.setOutputProperty("omit-xml-declaration", "yes"); + t.transform(new DOMSource(xmldoc), result); + + ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray()); + startRecord = startRecord + maximumRecords; + return inputStream; + + } catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + throw new MetafactureException(e); + } + } @@ -206,14 +234,9 @@ private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int connection.setRequestProperty("User-Agent", userAgent); } InputStream inputStream = getInputStream(connection); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - System.out.println("srUrl="+srUrl); - System.out.println("startRecord="+startRecord); - System.out.println("istream.length="+inputStream.available()); - if (inputStream.available() < 768){ // we take it that this is a result without a record - stopRetrieving = true; - } inputStream.transferTo(outputStream); return new ByteArrayInputStream(outputStream.toByteArray()); } diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index 2ba801d41..d1955d415 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -44,9 +44,9 @@ public void process(final XmlReceiver obj) { // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - sruOpener.setStartRecord("3029"); - sruOpener.setMaximumRecords("5"); - sruOpener.setTotal("6"); + sruOpener.setStartRecord("4"); + sruOpener.setMaximumRecords("1"); + sruOpener.setTotal("2"); // sruOpener.process("https://services.dnb.de/sru/dnb"); sruOpener.process("https://services.dnb.de/sru/zdb"); // sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); From 767e8d039c8d03e5afc42b1ff4064e783fc8f83e Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 16 May 2025 11:59:52 +0200 Subject: [PATCH 07/13] WIP testen --- .../biblio/SruOpenerTest_test.java | 162 ++++++++++++++++++ metafacture-flux/build.gradle | 2 + 2 files changed, 164 insertions(+) create mode 100644 metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest_test.java diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest_test.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest_test.java new file mode 100644 index 000000000..eb3445b6a --- /dev/null +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/SruOpenerTest_test.java @@ -0,0 +1,162 @@ +/*package org.metafacture.biblio; + +import org.metafacture.framework.ObjectReceiver; + +import com.github.tomakehurst.wiremock.client.MappingBuilder; +import com.github.tomakehurst.wiremock.client.ResponseDefinitionBuilder; +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.core.WireMockConfiguration; +import com.github.tomakehurst.wiremock.http.RequestMethod; +import com.github.tomakehurst.wiremock.junit.WireMockRule; +import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder; +import com.github.tomakehurst.wiremock.matching.StringValuePattern; +import com.github.tomakehurst.wiremock.matching.UrlPattern; +import org.junit.Assert; +import org.junit.ComparisonFailure; +import org.junit.Rule; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; + + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.util.Arrays; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + + +public final class SruOpenerTest { + +private StringBuilder resultCollector = new StringBuilder(); + private int resultCollectorsResetStreamCount; + private static final String RESPONSE_BODY = "response bödy"; // UTF-8 + private static final String TEST_URL = "/test/path"; + + + @Rule + public MockitoRule mockitoRule = MockitoJUnit.rule(); + + @Rule + public WireMockRule wireMockRule = new WireMockRule(WireMockConfiguration.wireMockConfig(). + jettyAcceptors(Runtime.getRuntime() + .availableProcessors()) + .dynamicPort()); + + @Mock + private ObjectReceiver receiver; + + public SruOpenerTest() { + } + + // @Test + public void test(){ + SruOpener sruOpener = new SruOpener(); + sruOpener.setReceiver(new ObjectReceiver () { + + @Override + public void process(final Reader obj) { + BufferedReader in = new BufferedReader(obj); + String line = null; + StringBuilder rslt = new StringBuilder(); + while (true) { + try { + if (!((line = in.readLine()) != null)) break; + } + catch (IOException e) { + throw new RuntimeException(e); + } + rslt.append(line); + } + String result = rslt.toString(); + if (result.length() > 768) { + System.out.println(rslt.toString().substring(768, 1024)); + } + else System.out.println("Antwort zu klein, gehe von ende der Anzhal der Records aus"); + resultCollector.append(obj); + } + + @Override + public void resetStream() { + ++resultCollectorsResetStreamCount; + } + + @Override + public void closeStream() { + + } + }); + + // sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("1890"); + sruOpener.setTotal("32"); + sruOpener.process("https://services.dnb.de/sru/dnb"); + // System.out.println(resultCollector.toString()); + } + + @Test + public void shouldPerformGetRequestWithInputAsUrlByDefault() throws IOException { + SruOpener sruOpener = new SruOpener(); + sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("1890"); + sruOpener.setTotal("32"); + shouldPerformRequest(TEST_URL,sruOpener); + } + + + mach lieber wie in metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java wiremock + private void shouldPerformRequest(String input, SruOpener sruOpener) throws IOException { // checkstyle-disable-line ParameterNumber + + final BiConsumer consumer; + final Consumer stubConsumer; + final Consumer requestConsumer; + final Consumer responseConsumer = null; + final String responseBody; + final ResponseDefinitionBuilder response = WireMock.ok().withBody(RESPONSE_BODY); + if (responseConsumer != null) { + responseConsumer.accept(response); + } + + final String baseUrl = wireMockRule.baseUrl(); + final String url = String.format(TEST_URL, baseUrl); + + final UrlPattern urlPattern = WireMock.urlPathEqualTo(TEST_URL); + + final SruOpener opener = new SruOpener(); + opener.setReceiver(receiver); + consumer.accept(opener, url); + + final MappingBuilder stub = WireMock.request("GET", urlPattern).willReturn(response); + if (stubConsumer != null) { + stubConsumer.accept(stub); + } + + final RequestPatternBuilder request = new RequestPatternBuilder(RequestMethod.fromString("GET"), urlPattern) + .withRequestBody(method.getRequestHasBody() ? WireMock.equalTo(REQUEST_BODY) : WireMock.absent()); + if (requestConsumer != null) { + requestConsumer.accept(request); + } + + WireMock.stubFor(stub); + + opener.process(String.format(input, baseUrl)); + + // use the opener a second time in a workflow: + opener.process(String.format(input, baseUrl)); + + opener.closeStream(); + + + WireMock.verify(request); + } + + +} +*/ \ No newline at end of file diff --git a/metafacture-flux/build.gradle b/metafacture-flux/build.gradle index 8a4919845..80698d15b 100644 --- a/metafacture-flux/build.gradle +++ b/metafacture-flux/build.gradle @@ -23,6 +23,8 @@ dependencies { api project(':metafacture-framework') implementation project(':metafacture-commons') implementation project(':metafacture-io') + implementation project(':metafacture-biblio') + implementation project(':metafacture-xml') testRuntimeOnly project(':metafacture-plumbing') antlr "org.antlr:antlr:${versions.antlr}" testImplementation "junit:junit:${versions.junit}" From 3dc041601b077fe1dfd83b718d84e1d3b777d223 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 2 Jun 2025 16:20:55 +0200 Subject: [PATCH 08/13] WIP every single output is a valid XML by itself Works hereby in the "stream" fashion. - add to flux-command --- .../java/org/metafacture/io/SruOpener.java | 105 ++++++++---------- .../main/resources/flux-commands.properties | 3 +- 2 files changed, 50 insertions(+), 58 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 3b3247450..9f742332e 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -26,11 +26,15 @@ /** * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. - * The input should be the base URL of the SRU service to be retrieved from. * * @author Pascal Christoph (dr0i) */ -@Description("Opens a SRU stream and passes a reader to the receiver. The input should be the base URL of the SRU service to be retrieved from. Mandatory argument is: QUERY.") +@Description( + "Opens a SRU stream and passes a reader to the receiver. The input is be the base URL of the SRU service " + + "to be retrieved from. Mandatory argument is: QUERY.\n" + + "The output is an XML document holding the user defined \"maximumRecords\" as documents. If there are" + + "more documents than defined by MAXIMUM_RECORDS and there are more documents wanted (defined by " + + "\"totalRecords\") there will be consecutives XML documents output.") @In(String.class) @Out(java.io.Reader.class) @FluxCommand("open-sru") @@ -58,7 +62,7 @@ public final class SruOpener extends DefaultObjectPipe"; + private String xmlDeclarationTemplate = ""; private String xmlDeclaration; /** @@ -88,7 +92,8 @@ public void setQuery(final String query) { } /** - * Sets total number of records to be retrieved. Default value: indefinite (as in "all"). + * Sets total number of records to be retrieved. Default value: indefinite (as in "all") + * . * * @param totalRecords total number of records to be retrieved */ @@ -112,7 +117,7 @@ public void setMaximumRecords(final String maximumRecords) { * @param startRecord where to start when retrieving records */ public void setStartRecord(final String startRecord) { - this.startRecord = Integer.parseInt(startRecord); + this.startRecord = Integer.parseInt(startRecord); } /** @@ -150,83 +155,69 @@ public void process(final String baseUrl) { srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=") .append(recordSchema).append("&version=").append(version); } else { + stopRetrieving = true; throw new IllegalArgumentException("Missing mandatory parameter 'query'"); } - try { - //get first document and add a starting root tag - Transformer t = TransformerFactory.newInstance().newTransformer(); - BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(getXmlDocsViaSru(srUrl))); - String line; - StringBuilder stringBuilder = new StringBuilder(1024 * 1024); - boolean rootTagAdded = false; - while ((line = bufferedReader.readLine()) != null) { - if(!rootTagAdded) { - if (line.matches(".*searchRetrieveResponse.*")) { - stringBuilder.append(xmlDeclaration+"\n"); - stringBuilder.append("\n"); - rootTagAdded = true; - } - } - stringBuilder.append(line+"\n"); - } - getReceiver().process(new InputStreamReader(new ByteArrayInputStream(stringBuilder.toString().getBytes()))); - while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) { - InputStream inputStream = getXmlDocsViaSru(srUrl); - getReceiver().process(new InputStreamReader(inputStream)); - } - //close root tag - getReceiver().process(new InputStreamReader(new ByteArrayInputStream("\n\n".getBytes()))); - } - catch (TransformerConfigurationException | IOException e) { - throw new MetafactureException(e); + while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) { + InputStream inputStream = getXmlDocsViaSru(srUrl); + getReceiver().process(new InputStreamReader(inputStream)); } + } - private InputStream getXmlDocsViaSru(final StringBuilder srUrl ){ - try { + private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { + try { ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = factory.newDocumentBuilder(); Document xmldoc = docBuilder.parse(byteArrayInputStream); - /* Element newRoot = xmldoc.createElement("harvest"); - newRoot.appendChild(xmldoc.getFirstChild()); - xmldoc.appendChild(newRoot);*/ - - numberOfRecords = - Integer.parseInt(((Element) xmldoc.getElementsByTagName("numberOfRecords").item(0)).getTextContent()); - int recordPosition = - Integer.parseInt(((Element) xmldoc.getElementsByTagName("recordPosition").item(0)).getTextContent()); - int nextRecordPosition = - Integer.parseInt(((Element) xmldoc.getElementsByTagName("nextRecordPosition").item(0)).getTextContent()); - - String xmlEncoding = xmldoc.getXmlEncoding(); - String xmlVersion = xmldoc.getXmlVersion(); - // - xmlDeclaration=String.format(xmlDeclarationTemplate,xmldoc.getXmlVersion(),xmldoc.getXmlEncoding()); - recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; + numberOfRecords = + Integer.parseInt( + ((Element) xmldoc.getElementsByTagName("numberOfRecords").item(0)).getTextContent()); + int recordPosition = + Integer.parseInt( + ((Element) xmldoc.getElementsByTagName("recordPosition").item(0)).getTextContent()); + int nextRecordPosition = + Integer.parseInt( + ((Element) xmldoc.getElementsByTagName("nextRecordPosition").item(0)).getTextContent()); + + String xmlEncoding = xmldoc.getXmlEncoding(); + String xmlVersion = xmldoc.getXmlVersion(); + xmlDeclaration = String.format(xmlDeclarationTemplate, xmldoc.getXmlVersion(), xmldoc.getXmlEncoding()); + recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; ByteArrayOutputStream os = new ByteArrayOutputStream(); Result result = new StreamResult(os); Transformer t = TransformerFactory.newInstance().newTransformer(); - t.setOutputProperty("omit-xml-declaration", "yes"); + t.setOutputProperty("omit-xml-declaration", "yes"); t.transform(new DOMSource(xmldoc), result); ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray()); startRecord = startRecord + maximumRecords; - return inputStream; - } catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + //get searchRetrieveResponse and add XML declaration + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); + String line; + StringBuilder stringBuilder = new StringBuilder(1024 * 1024); + stringBuilder.append(xmlDeclaration + "\n"); + while ((line = bufferedReader.readLine()) != null) { + stringBuilder.append(line + "\n"); + } + return new ByteArrayInputStream(stringBuilder.toString().getBytes()); + + } + catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + stopRetrieving = true; throw new MetafactureException(e); } - - } private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { - final URL urlToOpen = new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords+"&startRecord=" + startRecord); + final URL urlToOpen = + new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); connection.setConnectTimeout(CONNECTION_TIMEOUT); @@ -239,7 +230,7 @@ private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int inputStream.transferTo(outputStream); return new ByteArrayInputStream(outputStream.toByteArray()); - } + } private InputStream getInputStream(final HttpURLConnection connection) { try { diff --git a/metafacture-io/src/main/resources/flux-commands.properties b/metafacture-io/src/main/resources/flux-commands.properties index 39540d47e..a53ff9ee7 100644 --- a/metafacture-io/src/main/resources/flux-commands.properties +++ b/metafacture-io/src/main/resources/flux-commands.properties @@ -21,4 +21,5 @@ print org.metafacture.io.ObjectStdoutWriter write org.metafacture.io.ObjectWriter as-records org.metafacture.io.RecordReader open-resource org.metafacture.io.ResourceOpener -open-tar org.metafacture.io.TarReader +open-tar org.metafacture.io. +open-sru org.metafacture.io.SruOpener From 54c0d6992619c705a75a1e41e1e0d79292bb4e49 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 5 Jun 2025 09:42:26 +0200 Subject: [PATCH 09/13] Readd TarReader to flux-commands.md --- metafacture-io/src/main/resources/flux-commands.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metafacture-io/src/main/resources/flux-commands.properties b/metafacture-io/src/main/resources/flux-commands.properties index a53ff9ee7..e69c03c40 100644 --- a/metafacture-io/src/main/resources/flux-commands.properties +++ b/metafacture-io/src/main/resources/flux-commands.properties @@ -21,5 +21,5 @@ print org.metafacture.io.ObjectStdoutWriter write org.metafacture.io.ObjectWriter as-records org.metafacture.io.RecordReader open-resource org.metafacture.io.ResourceOpener -open-tar org.metafacture.io. +open-tar org.metafacture.io.TarReader open-sru org.metafacture.io.SruOpener From ac80718d599e317c5c9e949fad0daf5e86277b28 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 10 Jun 2025 15:00:59 +0200 Subject: [PATCH 10/13] WIP Catch Exception --- .../java/org/metafacture/io/SruOpener.java | 42 ++++++++++++------- .../org/metafacture/io/SruOpenerTest.java | 8 ++-- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 9f742332e..822ff9a12 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -11,16 +11,25 @@ import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; import org.w3c.dom.Document; -import org.w3c.dom.Element; +import org.w3c.dom.Node; import org.xml.sax.SAXException; -import java.io.*; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import java.net.HttpURLConnection; import java.net.URL; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.*; +import javax.xml.transform.Result; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; @@ -34,7 +43,7 @@ "to be retrieved from. Mandatory argument is: QUERY.\n" + "The output is an XML document holding the user defined \"maximumRecords\" as documents. If there are" + "more documents than defined by MAXIMUM_RECORDS and there are more documents wanted (defined by " + - "\"totalRecords\") there will be consecutives XML documents output.") + "\"totalRecords\") there will be consecutive XML documents output.") @In(String.class) @Out(java.io.Reader.class) @FluxCommand("open-sru") @@ -173,16 +182,21 @@ private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { DocumentBuilder docBuilder = factory.newDocumentBuilder(); Document xmldoc = docBuilder.parse(byteArrayInputStream); - numberOfRecords = - Integer.parseInt( - ((Element) xmldoc.getElementsByTagName("numberOfRecords").item(0)).getTextContent()); - int recordPosition = - Integer.parseInt( - ((Element) xmldoc.getElementsByTagName("recordPosition").item(0)).getTextContent()); - int nextRecordPosition = - Integer.parseInt( - ((Element) xmldoc.getElementsByTagName("nextRecordPosition").item(0)).getTextContent()); + Node node = xmldoc.getElementsByTagName("numberOfRecords").item(0); + if (node!= null) { + numberOfRecords = Integer.parseInt(node.getTextContent()); + } + int recordPosition=0; + node = xmldoc.getElementsByTagName("recordPosition").item(0); + if (node!= null) { + recordPosition = Integer.parseInt(node.getTextContent()); + } + int nextRecordPosition =recordPosition+1; + node = xmldoc.getElementsByTagName("nextRecordPosition").item(0); + if (node!= null) { + nextRecordPosition = Integer.parseInt(node.getTextContent()); + } String xmlEncoding = xmldoc.getXmlEncoding(); String xmlVersion = xmldoc.getXmlVersion(); xmlDeclaration = String.format(xmlDeclarationTemplate, xmldoc.getXmlVersion(), xmldoc.getXmlEncoding()); @@ -209,7 +223,7 @@ private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { return new ByteArrayInputStream(stringBuilder.toString().getBytes()); } - catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + catch (final IOException | TransformerException| SAXException | ParserConfigurationException e) { stopRetrieving = true; throw new MetafactureException(e); } diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index d1955d415..b1fe5ad6b 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -44,13 +44,13 @@ public void process(final XmlReceiver obj) { // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - sruOpener.setStartRecord("4"); - sruOpener.setMaximumRecords("1"); - sruOpener.setTotal("2"); + sruOpener.setStartRecord("3029"); + sruOpener.setMaximumRecords("2"); + sruOpener.setTotal("3"); // sruOpener.process("https://services.dnb.de/sru/dnb"); sruOpener.process("https://services.dnb.de/sru/zdb"); // sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); -// System.out.println(resultCollector.toString()); + // System.out.println(resultCollector.toString()); } } From 120f6019fa0fbbd45a7ab8ebc3162e59e04f887c Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 10 Jun 2025 16:37:36 +0200 Subject: [PATCH 11/13] WIP imporove --- .../java/org/metafacture/io/SruOpener.java | 72 +++++++------------ .../org/metafacture/io/SruOpenerTest.java | 6 +- 2 files changed, 28 insertions(+), 50 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 822ff9a12..33ee0824a 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -14,19 +14,18 @@ import org.w3c.dom.Node; import org.xml.sax.SAXException; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.io.StringWriter; import java.net.HttpURLConnection; import java.net.URL; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.Result; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; @@ -34,16 +33,16 @@ import javax.xml.transform.stream.StreamResult; /** - * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. + * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. Pages through the SRU. * * @author Pascal Christoph (dr0i) */ @Description( - "Opens a SRU stream and passes a reader to the receiver. The input is be the base URL of the SRU service " + + "Opens a SRU stream and passes a reader to the receiver. The input is the base URL of the SRU service " + "to be retrieved from. Mandatory argument is: QUERY.\n" + "The output is an XML document holding the user defined \"maximumRecords\" as documents. If there are" + "more documents than defined by MAXIMUM_RECORDS and there are more documents wanted (defined by " + - "\"totalRecords\") there will be consecutive XML documents output.") + "\"totalRecords\") there will be consecutive XML documents output as it pages through the SRU.") @In(String.class) @Out(java.io.Reader.class) @FluxCommand("open-sru") @@ -162,9 +161,9 @@ public void process(final String baseUrl) { StringBuilder srUrl = new StringBuilder(baseUrl); if (query != null) { srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=") - .append(recordSchema).append("&version=").append(version); - } else { - stopRetrieving = true; + .append(recordSchema).append("&version=").append(version); + } + else { throw new IllegalArgumentException("Missing mandatory parameter 'query'"); } @@ -182,53 +181,32 @@ private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { DocumentBuilder docBuilder = factory.newDocumentBuilder(); Document xmldoc = docBuilder.parse(byteArrayInputStream); - Node node = xmldoc.getElementsByTagName("numberOfRecords").item(0); - if (node!= null) { - numberOfRecords = Integer.parseInt(node.getTextContent()); - } - - int recordPosition=0; - node = xmldoc.getElementsByTagName("recordPosition").item(0); - if (node!= null) { - recordPosition = Integer.parseInt(node.getTextContent()); - } - int nextRecordPosition =recordPosition+1; - node = xmldoc.getElementsByTagName("nextRecordPosition").item(0); - if (node!= null) { - nextRecordPosition = Integer.parseInt(node.getTextContent()); - } - String xmlEncoding = xmldoc.getXmlEncoding(); - String xmlVersion = xmldoc.getXmlVersion(); - xmlDeclaration = String.format(xmlDeclarationTemplate, xmldoc.getXmlVersion(), xmldoc.getXmlEncoding()); - recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; + Transformer t = TransformerFactory.newInstance().newTransformer(); + StringWriter stringWriter = new StringWriter(); + t.transform(new DOMSource(xmldoc), new StreamResult(stringWriter)); - ByteArrayOutputStream os = new ByteArrayOutputStream(); + numberOfRecords = getIntegerValueFromElement(xmldoc,"numberOfRecords"); + int recordPosition = getIntegerValueFromElement(xmldoc,"recordPosition"); + int nextRecordPosition = getIntegerValueFromElement(xmldoc,"nextRecordPosition"); - Result result = new StreamResult(os); - Transformer t = TransformerFactory.newInstance().newTransformer(); - t.setOutputProperty("omit-xml-declaration", "yes"); - t.transform(new DOMSource(xmldoc), result); - - ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray()); - startRecord = startRecord + maximumRecords; - - //get searchRetrieveResponse and add XML declaration - BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); - String line; - StringBuilder stringBuilder = new StringBuilder(1024 * 1024); - stringBuilder.append(xmlDeclaration + "\n"); - while ((line = bufferedReader.readLine()) != null) { - stringBuilder.append(line + "\n"); - } - return new ByteArrayInputStream(stringBuilder.toString().getBytes()); + recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; + startRecord = nextRecordPosition; // grenzwert : wenn maximumRcords > als in echt + return new ByteArrayInputStream(stringWriter.toString().getBytes()); } - catch (final IOException | TransformerException| SAXException | ParserConfigurationException e) { - stopRetrieving = true; + catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { throw new MetafactureException(e); } } + private int getIntegerValueFromElement(final Document xmlDoc, final String tagName) { + Node node = xmlDoc.getElementsByTagName(tagName).item(0); + if (node != null) { + return Integer.parseInt(node.getTextContent()); + } + return 0; + } + private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { final URL urlToOpen = new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index b1fe5ad6b..ec4aa30f6 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -44,9 +44,9 @@ public void process(final XmlReceiver obj) { // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - sruOpener.setStartRecord("3029"); - sruOpener.setMaximumRecords("2"); - sruOpener.setTotal("3"); + sruOpener.setStartRecord("2800"); + sruOpener.setMaximumRecords("200"); + sruOpener.setTotal("1"); // sruOpener.process("https://services.dnb.de/sru/dnb"); sruOpener.process("https://services.dnb.de/sru/zdb"); // sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); From 831e0fb85c5a3e2f3054cb7cecfb24d7c3ab9800 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 10 Jun 2025 17:36:09 +0200 Subject: [PATCH 12/13] WIP 2 --- .../src/main/java/org/metafacture/io/SruOpener.java | 13 +++++-------- .../test/java/org/metafacture/io/SruOpenerTest.java | 4 ++-- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 33ee0824a..ebf613e7d 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -15,7 +15,6 @@ import org.xml.sax.SAXException; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -176,10 +175,10 @@ public void process(final String baseUrl) { private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { try { - ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords); + InputStream inputStreamOfURl = retrieveUrl(srUrl, startRecord, maximumRecords); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = factory.newDocumentBuilder(); - Document xmldoc = docBuilder.parse(byteArrayInputStream); + Document xmldoc = docBuilder.parse(inputStreamOfURl); Transformer t = TransformerFactory.newInstance().newTransformer(); StringWriter stringWriter = new StringWriter(); @@ -193,6 +192,7 @@ private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { startRecord = nextRecordPosition; // grenzwert : wenn maximumRcords > als in echt return new ByteArrayInputStream(stringWriter.toString().getBytes()); + } catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { throw new MetafactureException(e); @@ -207,7 +207,7 @@ private int getIntegerValueFromElement(final Document xmlDoc, final String tagNa return 0; } - private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { + private InputStream retrieveUrl(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { final URL urlToOpen = new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); @@ -218,10 +218,7 @@ private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int } InputStream inputStream = getInputStream(connection); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - - inputStream.transferTo(outputStream); - return new ByteArrayInputStream(outputStream.toByteArray()); + return inputStream; } private InputStream getInputStream(final HttpURLConnection connection) { diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index ec4aa30f6..883db268b 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -44,8 +44,8 @@ public void process(final XmlReceiver obj) { // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - sruOpener.setStartRecord("2800"); - sruOpener.setMaximumRecords("200"); + sruOpener.setStartRecord("3029"); + sruOpener.setMaximumRecords("1"); sruOpener.setTotal("1"); // sruOpener.process("https://services.dnb.de/sru/dnb"); sruOpener.process("https://services.dnb.de/sru/zdb"); From b92238b80627de903880515c359cfc2d28cf616a Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 20 Jun 2025 11:33:25 +0200 Subject: [PATCH 13/13] Add fallback if elements are missing in the data --- .../src/main/java/org/metafacture/io/SruOpener.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index ebf613e7d..79c6516d1 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -184,9 +184,9 @@ private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { StringWriter stringWriter = new StringWriter(); t.transform(new DOMSource(xmldoc), new StreamResult(stringWriter)); - numberOfRecords = getIntegerValueFromElement(xmldoc,"numberOfRecords"); - int recordPosition = getIntegerValueFromElement(xmldoc,"recordPosition"); - int nextRecordPosition = getIntegerValueFromElement(xmldoc,"nextRecordPosition"); + numberOfRecords = getIntegerValueFromElement(xmldoc,"numberOfRecords", 0); + int recordPosition = getIntegerValueFromElement(xmldoc,"recordPosition", 0); + int nextRecordPosition = getIntegerValueFromElement(xmldoc,"nextRecordPosition", totalRecords); recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; startRecord = nextRecordPosition; // grenzwert : wenn maximumRcords > als in echt @@ -199,12 +199,12 @@ private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { } } - private int getIntegerValueFromElement(final Document xmlDoc, final String tagName) { + private int getIntegerValueFromElement(final Document xmlDoc, final String tagName, final int fallback) { Node node = xmlDoc.getElementsByTagName(tagName).item(0); if (node != null) { return Integer.parseInt(node.getTextContent()); } - return 0; + return fallback; } private InputStream retrieveUrl(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException {