Skip to content

Add Marc21XmlEncoder #531

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.metafacture.biblio.marc21;

import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.helpers.DefaultStreamPipe;

public abstract class AbstractMarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> implements MarcXmlEncoderInterface {

protected void onResetStream() {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright 2024 hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.metafacture.biblio.marc21;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;

/**
* Acts as a wrapper: pipes input to Marc21Encoder which output is piped to Marc21Decoder which output is piped to MarcXmlEncoder.
*
* @author Pascal Christoph (dr0i)
*
*/
@In(StreamReceiver.class)
@Out(String.class)
@Description("Encodes MARC21 records as MARCXML. It wraps 'encode-marc21 | decode-marc21 | encode-marcxml ' to generate MARCXML more safely, especially when the building the 'leader'.")
@FluxCommand("encode-marc21xml")
public final class Marc21XmlEncoder extends AbstractMarcXmlEncoder {
private final Marc21Decoder marc21Decoder = new Marc21Decoder();
private final Marc21Encoder marc21Encoder = new Marc21Encoder();
private final MarcXmlEncoder marcXmlEncoder = new MarcXmlEncoder();

/**
* Creates an instance of {@link Marc21XmlEncoder}.
*/
public Marc21XmlEncoder() {
marc21Decoder.setEmitLeaderAsWhole(true);

marc21Encoder.setReceiver(marc21Decoder);
marc21Decoder.setReceiver(marcXmlEncoder);
}

@Override
protected void onSetReceiver() {
marcXmlEncoder.setReceiver(getReceiver());
}

@Override
public void startRecord(final String identifier) {
marc21Encoder.startRecord(identifier);
}

@Override
public void endRecord() {
marc21Encoder.endRecord();
}

@Override
public void startEntity(final String name) {
marc21Encoder.startEntity(name);
}

@Override
public void endEntity() {
marc21Encoder.endEntity();
}

@Override
public void literal(final String name, final String value) {
marc21Encoder.literal(name, value);
}

@Override
protected void onCloseStream() {
marc21Encoder.closeStream();
}

@Override
public void onResetStream() {
marc21Encoder.resetStream();
}

@Override
public void setEmitNamespace(final boolean emitNamespace) {
marcXmlEncoder.setEmitNamespace(emitNamespace);
}

@Override
public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) {
marcXmlEncoder.omitXmlDeclaration(currentOmitXmlDeclaration);
}

@Override
public void setXmlVersion(final String xmlVersion) {
marcXmlEncoder.setXmlVersion(xmlVersion);
}

@Override
public void setXmlEncoding(final String xmlEncoding) {
marcXmlEncoder.setXmlEncoding(xmlEncoding);
}

@Override
public void setFormatted(final boolean formatted) {
marcXmlEncoder.setFormatted(formatted);
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@
import org.metafacture.commons.XmlUtil;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;

import java.util.Arrays;
import java.util.Collections;
Expand All @@ -36,11 +34,11 @@
* @author Pascal Christoph (dr0i) dug it up again
*/

@Description("Encodes a stream into MARCXML.")
@Description("Encodes a stream into MARCXML. Use this only if you can ensure valid MARC21. Also, the leader must be correct and set as one literal. You may want to use encode-marc21xml instead (which can cope with e.g. an irregular leader).")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("encode-marcxml")
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
public final class MarcXmlEncoder extends AbstractMarcXmlEncoder {

public static final String NAMESPACE_NAME = "marc";
public static final String XML_ENCODING = "UTF-8";
Expand Down Expand Up @@ -106,6 +104,7 @@ public String close(final Object[] args) {

private final StringBuilder builder = new StringBuilder();

private final StringBuilder builderLeader = new StringBuilder();
private boolean atStreamStart = true;

private boolean omitXmlDeclaration = OMIT_XML_DECLARATION;
Expand Down Expand Up @@ -206,6 +205,9 @@ public void startRecord(final String identifier) {

@Override
public void endRecord() {
if (builderLeader.length() > 0) {
writeLeader();
Copy link
Member

@blackwinter blackwinter May 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I understanding correctly that what you're doing here with writeLeader() and writeRawLeader() is to combine multiple leader literals into a single element? Isn't this a separate issue (#527 instead of #336#524 for encode-marc21xml)? At the very least, it should have been a separate commit (if not pull request).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have looked again, and come to the conclusion that part of #524 is in fact resolvable only by resolving #527 (see #527 (comment) ). So we should reference #524 here and that's it. But if you have a better solution please go on.

}
decrementIndentationLevel();
prettyPrintIndentation();
writeTag(Tag.record::close);
Expand Down Expand Up @@ -315,6 +317,15 @@ private void writeRaw(final String str) {
builder.append(str);
}

/**
* Writes an unescaped sequence to the leader literal.
*
* @param str the unescaped sequence to be written
*/
private void writeRawLeader(final String str) {
builderLeader.append(str);
}

/**
* Writes an escaped sequence.
*
Expand All @@ -324,14 +335,17 @@ private void writeEscaped(final String str) {
builder.append(XmlUtil.escape(str, false));
}

private void writeLeader() {
prettyPrintIndentation();
writeTag(Tag.leader::open);
writeRaw(builderLeader.toString());
writeTag(Tag.leader::close);
prettyPrintNewLine();
}

private boolean writeLeader(final String name, final String value) {
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
prettyPrintIndentation();
writeTag(Tag.leader::open);
writeRaw(value);
writeTag(Tag.leader::close);
prettyPrintNewLine();

writeRawLeader(value);
return true;
}
else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.metafacture.biblio.marc21;

public interface MarcXmlEncoderInterface {

/**
* Sets the flag to decide whether to emit the {@value MarcXmlEncoder#NAMESPACE_NAME}
* namespace
*
* @param emitNamespace true if the namespace is emitted, otherwise false
*/
void setEmitNamespace(boolean emitNamespace);

/**
* Sets the flag to decide whether to omit the XML declaration.
*
* <strong>Default value: {@value MarcXmlEncoder#OMIT_XML_DECLARATION}</strong>
*
* @param currentOmitXmlDeclaration true if the XML declaration is omitted, otherwise
* false
*/
void omitXmlDeclaration(boolean currentOmitXmlDeclaration);

/**
* Sets the XML version.
*
* <strong>Default value: {@value MarcXmlEncoder#XML_VERSION}</strong>
*
* @param xmlVersion the XML version
*/
void setXmlVersion(String xmlVersion);

/**
* Sets the XML encoding.
*
* <strong>Default value: {@value MarcXmlEncoder#XML_ENCODING}</strong>
*
* @param xmlEncoding the XML encoding
*/
void setXmlEncoding(String xmlEncoding);

/**
* Formats the resulting xml by indentation. Aka "pretty printing".
*
* <strong>Default value: {@value MarcXmlEncoder#PRETTY_PRINTED}</strong>
*
* @param formatted true if formatting is activated, otherwise false
*/
void setFormatted(boolean formatted);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package org.metafacture.biblio.marc21;

import org.junit.Before;
import org.junit.Test;
import org.metafacture.framework.FormatException;
import org.metafacture.framework.MissingIdException;

public class Marc21XmlEncoderTest {
MarcXmlEncoderTest marcXmlEncoderTest = new MarcXmlEncoderTest();

@Before
public void setUp() {
marcXmlEncoderTest.encoder=new Marc21XmlEncoder();
marcXmlEncoderTest.initializeEncoder();
}

@Test(expected = FormatException.class)
public void createAnRecordWithLeader() {
marcXmlEncoderTest.createAnRecordWithLeader();
}

@Test(expected = FormatException.class)
public void issue336_createRecordWithTopLevelLeader() {
marcXmlEncoderTest.issue336_createRecordWithTopLevelLeader();
}

@Test
public void issue336_createRecordWithTopLevelLeader_Marc21Xml() {
marcXmlEncoderTest.issue336_createRecordWithTopLevelLeader_Marc21Xml();
}

@Test(expected = MissingIdException.class)
public void issue527ShouldEmitLeaderAlwaysAsWholeString() {
marcXmlEncoderTest.issue527ShouldEmitLeaderAlwaysAsWholeString();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,20 @@

package org.metafacture.biblio.marc21;

import org.junit.After;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import org.junit.After;
import org.junit.Before;
import org.junit.ComparisonFailure;
import org.junit.Test;
import static org.metafacture.biblio.marc21.Marc21EventNames.BIBLIOGRAPHIC_LEVEL_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.CATALOGING_FORM_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.CHARACTER_CODING_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.ENCODING_LEVEL_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.MULTIPART_LEVEL_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.RECORD_STATUS_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.RECORD_TYPE_LITERAL;
import static org.metafacture.biblio.marc21.Marc21EventNames.TYPE_OF_CONTROL_LITERAL;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.helpers.DefaultObjectReceiver;

Expand All @@ -48,11 +56,15 @@ public class MarcXmlEncoderTest {
private static final String RECORD_ID = "92005291";

private static StringBuilder resultCollector;
private static MarcXmlEncoder encoder;
AbstractMarcXmlEncoder encoder;

@Before
public void setUp() {
encoder = new MarcXmlEncoder();
initializeEncoder();
}

void initializeEncoder() {
encoder.setFormatted(false);
encoder.setReceiver(new DefaultObjectReceiver<String>() {
@Override
Expand All @@ -67,7 +79,7 @@ public void process(final String obj) {
public void tearDown() {
}

private void addOneRecord(MarcXmlEncoder encoder) {
private void addOneRecord(AbstractMarcXmlEncoder encoder) {
encoder.startRecord(RECORD_ID);
encoder.literal("001", RECORD_ID);
encoder.startEntity("010 ");
Expand Down Expand Up @@ -212,6 +224,42 @@ public void issue336_createRecordWithTopLevelLeader() {
assertEquals(expected, actual);
}

@Test(expected = ComparisonFailure.class)
public void issue336_createRecordWithTopLevelLeader_Marc21Xml() {
encoder.startRecord("1");
encoder.literal("001", "8u3287432");
encoder.literal(Marc21EventNames.LEADER_ENTITY, "00000naa a2200000uc 4500");
encoder.endRecord();
encoder.closeStream();
String expected = XML_DECLARATION + XML_ROOT_OPEN
+ "<marc:record><marc:controlfield tag=\"001\">8u3287432</marc:controlfield>" +
"<marc:leader>00048naa a2200037uc 4500</marc:leader></marc:record>" + XML_MARC_COLLECTION_END_TAG;
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

@Test
public void issue527ShouldEmitLeaderAlwaysAsWholeString() {
encoder.startRecord("1");
encoder.startEntity(Marc21EventNames.LEADER_ENTITY);
encoder.literal(RECORD_STATUS_LITERAL, "a");

encoder.literal(RECORD_TYPE_LITERAL, "o");
encoder.literal(BIBLIOGRAPHIC_LEVEL_LITERAL, "a");
encoder.literal(TYPE_OF_CONTROL_LITERAL, " ");
encoder.literal(CHARACTER_CODING_LITERAL, "a");
encoder.literal(ENCODING_LEVEL_LITERAL, "z");
encoder.literal(CATALOGING_FORM_LITERAL, "u");
encoder.literal(MULTIPART_LEVEL_LITERAL, " ");
encoder.endEntity();
encoder.endRecord();
encoder.closeStream();
String expected = XML_DECLARATION + XML_ROOT_OPEN
+ "<marc:record><marc:leader>aoa azu </marc:leader></marc:record>" + XML_MARC_COLLECTION_END_TAG;
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

@Test
public void sendDataAndClearWhenRecordStartedAndStreamResets() {
encoder.startRecord("1");
Expand Down