Skip to content

Commit b39ac98

Browse files
committed
Merge remote-tracking branch 'origin/master' into 527-output-valid-leader
2 parents a0f2736 + 29b9759 commit b39ac98

File tree

23 files changed

+342
-79
lines changed

23 files changed

+342
-79
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
![Metafacture](https://raw.github.com/wiki/metafacture/metafacture-core/img/metafacture.png)
22

3-
[![Build](https://github.com/metafacture/metafacture-core/workflows/Build/badge.svg?branch=master)](https://github.com/metafacture/metafacture-core/actions?query=workflow%3ABuild)
3+
[![Build](https://github.com/metafacture/metafacture-core/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/metafacture/metafacture-core/actions?query=workflow%3ABuild)
44

55
Metafacture is a toolkit for processing semi-structured data with a focus on library metadata. It provides a versatile set of tools for reading, writing and transforming data. Metafacture can be used as a stand-alone application or as a Java library in other applications. The name Metafacture is a portmanteau of the words *meta*data and manu*facture*.
66

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ subprojects {
2828
versions = [
2929
'assertj_core': '3.11.1',
3030
'commons_compress': '1.21',
31-
'guava': '29.0-jre',
31+
'guava': '32.0.1-jre',
3232
'jackson_databind': '2.15.1',
3333
'jdk': '11',
3434
'junit': '4.12',

gradle/wrapper/gradle-wrapper.jar

130 Bytes
Binary file not shown.

gradle/wrapper/gradle-wrapper.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
distributionBase=GRADLE_USER_HOME
22
distributionPath=wrapper/dists
3-
distributionSha256Sum=a4b4158601f8636cdeeab09bd76afb640030bb5b144aafe261a5e8af027dc612
4-
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
3+
distributionSha256Sum=57dafb5c2622c6cc08b993c85b7c06956a2f53536432a30ead46166dbca0f1e9
4+
distributionUrl=https\://services.gradle.org/distributions/gradle-8.11-bin.zip
55
networkTimeout=10000
66
validateDistributionUrl=true
77
zipStoreBase=GRADLE_USER_HOME

gradlew

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717
#
18+
# SPDX-License-Identifier: Apache-2.0
19+
#
1820

1921
##############################################################################
2022
#
@@ -55,7 +57,7 @@
5557
# Darwin, MinGW, and NonStop.
5658
#
5759
# (3) This script is generated from the Groovy template
58-
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
60+
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
5961
# within the Gradle project.
6062
#
6163
# You can find Gradle at https://github.com/gradle/gradle/.
@@ -84,7 +86,8 @@ done
8486
# shellcheck disable=SC2034
8587
APP_BASE_NAME=${0##*/}
8688
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
87-
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
89+
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
90+
' "$PWD" ) || exit
8891

8992
# Use the maximum available, or set MAX_FD != -1 to use that value.
9093
MAX_FD=maximum

gradlew.bat

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
@rem See the License for the specific language governing permissions and
1414
@rem limitations under the License.
1515
@rem
16+
@rem SPDX-License-Identifier: Apache-2.0
17+
@rem
1618

1719
@if "%DEBUG%"=="" @echo off
1820
@rem ##########################################################################

metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Encoder.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ public final class Marc21Encoder extends
8181
private State state = State.IN_STREAM;
8282

8383
private boolean generateIdField;
84+
private boolean validateLeader = true;
8485

8586
/**
8687
* Initializes the encoder with MARC 21 constants and charset.
@@ -108,6 +109,18 @@ public void setGenerateIdField(final boolean generateIdField) {
108109
this.generateIdField = generateIdField;
109110
}
110111

112+
/**
113+
* Controls whether the leader should be validated.
114+
* <p>
115+
* The default value of {@code validateLeader} is true.
116+
* <p>
117+
*
118+
* @param validateLeader if false the leader is not validated
119+
*/
120+
public void setValidateLeader(final boolean validateLeader) {
121+
this.validateLeader = validateLeader;
122+
}
123+
111124
/**
112125
* Gets the flag to decide whether the ID field is generated.
113126
*
@@ -259,12 +272,14 @@ private void processLeaderAsSubfields(final String name, final char code) {
259272
}
260273

261274
private void requireValidCode(final char code, final char[] validCodes) {
262-
for (final char validCode: validCodes) {
263-
if (validCode == code) {
264-
return;
275+
if (validateLeader) {
276+
for (final char validCode : validCodes) {
277+
if (validCode == code) {
278+
return;
279+
}
265280
}
281+
throw new FormatException("invalid code '" + code + "'; allowed codes are: " + Arrays.toString(validCodes));
266282
}
267-
throw new FormatException("invalid code '" + code + "'; allowed codes are: " + Arrays.toString(validCodes));
268283
}
269284

270285
private void processTopLevelLiteral(final String name, final String value) {

metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Strin
4949
public static final boolean OMIT_XML_DECLARATION = false;
5050
public static final boolean ENSURE_CORRECT_MARC21_XML = false;
5151

52-
private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
53-
private static final String ROOT_CLOSE = "</marc:collection>";
54-
5552
private enum Tag {
5653

5754
collection(" xmlns%s=\"" + NAMESPACE + "\"%s"),
@@ -106,7 +103,6 @@ public String close(final Object[] args) {
106103
private static final int TAG_END = 3;
107104

108105
private final Encoder encoder = new Encoder();
109-
private final Marc21Decoder decoder = new Marc21Decoder();
110106
private final Marc21Encoder wrapper = new Marc21Encoder();
111107

112108
private DefaultStreamPipe<ObjectReceiver<String>> pipe;
@@ -115,6 +111,7 @@ public String close(final Object[] args) {
115111
* Creates an instance of {@link MarcXmlEncoder}.
116112
*/
117113
public MarcXmlEncoder() {
114+
final Marc21Decoder decoder = new Marc21Decoder();
118115
decoder.setEmitLeaderAsWhole(true);
119116

120117
wrapper
@@ -136,7 +133,6 @@ public void setEmitNamespace(final boolean emitNamespace) {
136133

137134
/**
138135
* Sets the flag to decide whether to omit the XML declaration.
139-
*
140136
* <strong>Default value: {@value #OMIT_XML_DECLARATION}</strong>
141137
*
142138
* @param currentOmitXmlDeclaration true if the XML declaration is omitted, otherwise
@@ -148,7 +144,6 @@ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) {
148144

149145
/**
150146
* Sets the XML version.
151-
*
152147
* <strong>Default value: {@value #XML_VERSION}</strong>
153148
*
154149
* @param xmlVersion the XML version
@@ -159,7 +154,6 @@ public void setXmlVersion(final String xmlVersion) {
159154

160155
/**
161156
* Sets the XML encoding.
162-
*
163157
* <strong>Default value: {@value #XML_ENCODING}</strong>
164158
*
165159
* @param xmlEncoding the XML encoding
@@ -173,7 +167,6 @@ public void setXmlEncoding(final String xmlEncoding) {
173167
* If true, the input data is validated to ensure correct MARC21. Also the leader may be generated.
174168
* It acts as a wrapper: the input is piped to {@link org.metafacture.biblio.marc21.Marc21Encoder}, whose output is piped to {@link org.metafacture.biblio.marc21.Marc21Decoder}, whose output is piped to {@link org.metafacture.biblio.marc21.MarcXmlEncoder}.
175169
* This validation and treatment of the leader is more safe but comes with a performance impact.
176-
*
177170
* <strong>Default value: {@value #ENSURE_CORRECT_MARC21_XML}</strong>
178171
*
179172
* @param ensureCorrectMarc21Xml if true the input data is validated to ensure correct MARC21. Also the leader may be generated.
@@ -184,7 +177,6 @@ public void setEnsureCorrectMarc21Xml(final boolean ensureCorrectMarc21Xml) {
184177

185178
/**
186179
* Formats the resulting xml by indentation. Aka "pretty printing".
187-
*
188180
* <strong>Default value: {@value #PRETTY_PRINTED}</strong>
189181
*
190182
* @param formatted true if formatting is activated, otherwise false
@@ -220,7 +212,7 @@ public void literal(final String name, final String value) {
220212

221213
@Override
222214
protected void onResetStream() {
223-
pipe.resetStream();
215+
encoder.onResetStream();
224216
}
225217

226218
@Override
@@ -247,11 +239,12 @@ private static class Encoder extends DefaultStreamPipe<ObjectReceiver<String>> {
247239
private String currentEntity = "";
248240

249241
private boolean emitNamespace = true;
250-
private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY};
242+
private Object[] namespacePrefix = new Object[]{NAMESPACE_PREFIX};
251243

252244
private int indentationLevel;
253245
private boolean formatted = PRETTY_PRINTED;
254246
private int recordAttributeOffset;
247+
private int recordLeaderOffset;
255248

256249
private Encoder() {
257250
}
@@ -294,7 +287,7 @@ public void startRecord(final String identifier) {
294287
writeTag(Tag.record::open);
295288
recordAttributeOffset = builder.length() - 1;
296289
prettyPrintNewLine();
297-
290+
recordLeaderOffset = builder.length();
298291
incrementIndentationLevel();
299292
}
300293

@@ -345,6 +338,7 @@ public void literal(final String name, final String value) {
345338
if (name.equals(Marc21EventNames.MARCXML_TYPE_LITERAL)) {
346339
if (value != null) {
347340
builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name, value));
341+
recordLeaderOffset = builder.length();
348342
}
349343
}
350344
else if (!appendLeader(name, value)) {
@@ -353,7 +347,7 @@ else if (!appendLeader(name, value)) {
353347
if (value != null) {
354348
writeEscaped(value.trim());
355349
}
356-
writeTag(Tag.controlfield::close);
350+
writeTag(Tag.controlfield::close, false);
357351
prettyPrintNewLine();
358352
}
359353
}
@@ -378,7 +372,9 @@ protected void onResetStream() {
378372

379373
@Override
380374
protected void onCloseStream() {
381-
writeFooter();
375+
if (!atStreamStart) {
376+
writeFooter();
377+
}
382378
sendAndClearData();
383379
}
384380

@@ -408,9 +404,20 @@ private void writeFooter() {
408404
* @param str the unescaped sequence to be written
409405
*/
410406
private void writeRaw(final String str) {
407+
411408
builder.append(str);
412409
}
413410

411+
/**
412+
* Writes the unescaped sequence to the leader position.
413+
*
414+
* @param str the unescaped sequence to be written to the leader position
415+
*/
416+
private void writeRawLeader(final String str) {
417+
builder.insert(recordLeaderOffset, str);
418+
recordLeaderOffset = recordLeaderOffset + str.length();
419+
}
420+
414421
private boolean appendLeader(final String name, final String value) {
415422
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
416423
leaderBuilder.append(value);
@@ -432,12 +439,18 @@ private void writeEscaped(final String str) {
432439

433440
private void writeLeader() {
434441
final String leader = leaderBuilder.toString();
435-
if (!leader.isEmpty()) {
436-
prettyPrintIndentation();
437-
writeTag(Tag.leader::open);
438-
writeRaw("0000" + leader.substring(0, 4) + "2200000" + leader.substring(5, 7) + "4500"); // creates a valid leader without counted elements
439-
writeTag(Tag.leader::close);
440-
prettyPrintNewLine();
442+
if (leaderBuilder.length() > 0) {
443+
if (formatted) {
444+
writeRawLeader(getIndentationPrefix());
445+
}
446+
447+
writeTagLeader(Tag.leader::open);
448+
writeRawLeader("0000" + leader.substring(0, 4) + "2200000" + leader.substring(5, 7) + "4500"); // creates a valid leader without counted elements
449+
writeTagLeader(Tag.leader::close);
450+
451+
if (formatted) {
452+
writeRawLeader(NEW_LINE);
453+
}
441454
}
442455
}
443456

@@ -447,10 +460,17 @@ private void writeTag(final Function<Object[], String> function, final Object...
447460
writeRaw(function.apply(allArgs));
448461
}
449462

463+
private void writeTagLeader(final Function<Object[], String> function) {
464+
writeRawLeader(function.apply(namespacePrefix));
465+
}
466+
467+
private String getIndentationPrefix() {
468+
return String.join("", Collections.nCopies(indentationLevel, INDENT));
469+
}
470+
450471
private void prettyPrintIndentation() {
451472
if (formatted) {
452-
final String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
453-
builder.append(prefix);
473+
builder.append(getIndentationPrefix());
454474
}
455475
}
456476

metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
* @author Markus Michael Geipel
3333
*
3434
*/
35-
@Description("A marc xml reader")
35+
@Description("A MARC XML reader. To read marc data without namespace specification set option `namespace=\"\"`")
3636
@In(XmlReceiver.class)
3737
@Out(StreamReceiver.class)
3838
@FluxCommand("handle-marcxml")
@@ -63,7 +63,8 @@ public MarcXmlHandler() {
6363
*
6464
* <strong>Default value: {@value #NAMESPACE}</strong>
6565
*
66-
* @param namespace the namespace
66+
* @param namespace the namespace. Set to null if namespace shouldn't be checked. Set to empty string
67+
* if the namespace is missing in the data.
6768
*/
6869
public void setNamespace(final String namespace) {
6970
this.namespace = namespace;

metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/Marc21EncoderTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
*/
3939
public final class Marc21EncoderTest {
4040

41+
private static final String BAD_LEADER = "00600ny a22002053n 4500";
42+
4143
private Marc21Encoder marc21Encoder;
4244

4345
@Mock
@@ -147,4 +149,21 @@ public void issue524ShouldComputeValidLeader() {
147149
verify(receiver).process(matches("00055pam a2200037 c 4500021001700000\u001e.*\u001d"));
148150
}
149151

152+
@Test(expected = FormatException.class)
153+
public void issue567ShouldFailValidateLeaderAsDefault() {
154+
marc21Encoder.startRecord("");
155+
marc21Encoder.literal(LEADER_ENTITY, BAD_LEADER);
156+
marc21Encoder.endRecord();
157+
}
158+
159+
@Test
160+
public void issue567ShouldNotValidateLeader() {
161+
marc21Encoder.setValidateLeader(false);
162+
marc21Encoder.startRecord("");
163+
marc21Encoder.literal(LEADER_ENTITY, BAD_LEADER );
164+
marc21Encoder.endRecord();
165+
166+
verify(receiver).process(matches("00026ny a22000253n 4500\u001e\u001d"));
167+
}
168+
150169
}

0 commit comments

Comments
 (0)