Skip to content

Commit 24704c8

Browse files
authored
Merge pull request #275 from marklogic/feature/collated-string
Verifying that collatedString works
2 parents 825e6e0 + 62b56e8 commit 24704c8

File tree

8 files changed

+39
-21
lines changed

8 files changed

+39
-21
lines changed

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ java {
2121

2222
repositories {
2323
mavenCentral()
24+
mavenLocal()
2425
maven {
2526
url "https://bed-artifactory.bedford.progress.com:443/artifactory/ml-maven-snapshots/"
2627
}

src/main/java/com/marklogic/spark/reader/optic/SchemaInferrer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public abstract class SchemaInferrer {
5454
COLUMN_INFO_TYPES_TO_SPARK_TYPES.put("yearMonthDuration", DataTypes.StringType);
5555
COLUMN_INFO_TYPES_TO_SPARK_TYPES.put("dayTimeDuration", DataTypes.StringType);
5656
COLUMN_INFO_TYPES_TO_SPARK_TYPES.put("string", DataTypes.StringType);
57+
COLUMN_INFO_TYPES_TO_SPARK_TYPES.put("collatedString", DataTypes.StringType);
5758
COLUMN_INFO_TYPES_TO_SPARK_TYPES.put("anyUri", DataTypes.StringType);
5859
COLUMN_INFO_TYPES_TO_SPARK_TYPES.put("point", DataTypes.StringType);
5960
COLUMN_INFO_TYPES_TO_SPARK_TYPES.put("boolean", DataTypes.BooleanType);

src/test/java/com/marklogic/spark/reader/optic/InferSchemaTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void inferFromAllTdeTypes() throws Exception {
3636
String columnInfoResponse = readClasspathFile("allTypes-columnInfo-response.txt");
3737
StructType schema = SchemaInferrer.inferSchema(columnInfoResponse);
3838

39-
assertEquals(35, schema.size(), "The TDE has 35 columns, and the hidden 'rowid' column that's returned " +
39+
assertEquals(36, schema.size(), "The TDE has 36 columns, and the hidden 'rowid' column that's returned " +
4040
"by /v1/rows should not be included in the Spark schema, as it will never be populated with a value.");
4141

4242
String actualJson = schema.prettyJson();

src/test/java/com/marklogic/spark/reader/optic/ReadRowsWithInferredSchemaTest.java

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,25 +73,26 @@ void allTypes() {
7373
assertEquals("P2Y6M", row.getString(14)); // yearMonthDuration
7474
assertEquals("PT1M", row.getString(15)); // dayTimeDuration
7575
assertEquals("hello", row.getString(16));
76-
assertEquals("http://example.org/", row.getString(17)); // anyURI
77-
assertEquals("50,50", row.getString(18)); // point
78-
assertEquals("50,50", row.getString(19)); // longLatPoint
79-
assertTrue(row.getBoolean(20));
80-
assertEquals("c2xpbmdzIGFuZCBhcnJvd3Mgb2Ygb3V0cmFnZW91cyBmb3J0dW5l", row.getString(21)); // base64Binary
81-
assertEquals("499602D2", row.getString(22)); // hexBinary
82-
assertEquals("1", row.getString(23), "Because MarkLogic defines the type of 'byte' as 'none', the Spark " +
76+
assertEquals("hello collated", row.getString(17));
77+
assertEquals("http://example.org/", row.getString(18)); // anyURI
78+
assertEquals("50,50", row.getString(19)); // point
79+
assertEquals("50,50", row.getString(20)); // longLatPoint
80+
assertTrue(row.getBoolean(21));
81+
assertEquals("c2xpbmdzIGFuZCBhcnJvd3Mgb2Ygb3V0cmFnZW91cyBmb3J0dW5l", row.getString(22)); // base64Binary
82+
assertEquals("499602D2", row.getString(23)); // hexBinary
83+
assertEquals("1", row.getString(24), "Because MarkLogic defines the type of 'byte' as 'none', the Spark " +
8384
"connector treats it as a string."); // byte
84-
assertEquals("PT1M", row.getString(24)); // duration
85-
assertEquals("--04-18", row.getString(25)); // gMonthDay
86-
assertEquals(1, row.getInt(26));
87-
assertEquals(-1, row.getInt(27)); // negativeInteger
88-
assertEquals(11, row.getInt(28)); // nonNegativeInteger
89-
assertEquals(-11, row.getInt(29)); // nonPositiveInteger
90-
assertEquals(20, row.getInt(30)); // positiveInteger
91-
assertEquals(7, row.getInt(31)); // short
92-
assertEquals(4, row.getInt(32)); // unsignedByte
93-
assertEquals(8, row.getInt(33)); // unsignedShort
94-
assertEquals("http://example.org/", row.getString(34)); // IRI
85+
assertEquals("PT1M", row.getString(25)); // duration
86+
assertEquals("--04-18", row.getString(26)); // gMonthDay
87+
assertEquals(1, row.getInt(27));
88+
assertEquals(-1, row.getInt(28)); // negativeInteger
89+
assertEquals(11, row.getInt(29)); // nonNegativeInteger
90+
assertEquals(-11, row.getInt(30)); // nonPositiveInteger
91+
assertEquals(20, row.getInt(31)); // positiveInteger
92+
assertEquals(7, row.getInt(32)); // short
93+
assertEquals(4, row.getInt(33)); // unsignedByte
94+
assertEquals(8, row.getInt(34)); // unsignedShort
95+
assertEquals("http://example.org/", row.getString(35)); // IRI
9596
}
9697

9798
@Test
@@ -106,9 +107,9 @@ void allColumnsNullExceptRequiredOne() {
106107
assertEquals(1, rows.size());
107108

108109
Row row = rows.get(0);
109-
assertEquals(35, row.size(), "Expecting all 35 columns to still exist, even though all but one have a null value");
110+
assertEquals(36, row.size(), "Expecting all 36 columns to still exist, even though all but one have a null value");
110111
assertEquals(2, row.getInt(0));
111-
for (int i = 1; i < 35; i++) {
112+
for (int i = 1; i < 36; i++) {
112113
assertNull(row.get(i));
113114
}
114115
}

src/test/ml-data/allTypes-data.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"yearMonthDurationValue": "P2Y6M",
1919
"dayTimeDurationValue": "PT1M",
2020
"stringValue": "hello",
21+
"collatedStringValue": "hello collated",
2122
"anyURIValue": "http://example.org/",
2223
"pointValue": "50 50",
2324
"longLatPointValue": "50 50",

src/test/ml-schemas/tde/allTypes.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@
108108
"val": "stringValue",
109109
"nullable": true
110110
},
111+
{
112+
"name": "collatedStringValue",
113+
"scalarType": "string",
114+
"collation": "http://marklogic.com/collation/",
115+
"val": "collatedStringValue",
116+
"nullable": true
117+
},
111118
{
112119
"name": "anyURIValue",
113120
"scalarType": "anyURI",

src/test/resources/allTypes-columnInfo-response.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
{"schema":"sparkTest", "view":"AllTypeRows", "column":"yearMonthDurationValue", "type":"yearMonthDuration", "hidden":false, "nullable":true}
1616
{"schema":"sparkTest", "view":"AllTypeRows", "column":"dayTimeDurationValue", "type":"dayTimeDuration", "hidden":false, "nullable":true}
1717
{"schema":"sparkTest", "view":"AllTypeRows", "column":"stringValue", "type":"string", "hidden":false, "nullable":true}
18+
{"schema":"sparkTest", "view":"AllTypeRows", "column":"collatedStringValue", "type":"collatedString", "hidden":false, "nullable":true}
1819
{"schema":"sparkTest", "view":"AllTypeRows", "column":"anyURIValue", "type":"anyUri", "hidden":false, "nullable":true}
1920
{"schema":"sparkTest", "view":"AllTypeRows", "column":"pointValue", "type":"point", "hidden":false, "nullable":true, "coordinate-system":"wgs84"}
2021
{"schema":"sparkTest", "view":"AllTypeRows", "column":"longLatPointValue", "type":"point", "hidden":false, "nullable":true, "coordinate-system":"wgs84"}

src/test/resources/allTypes-expected-spark-schema.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@
103103
"nullable": true,
104104
"metadata": {}
105105
},
106+
{
107+
"name": "sparkTest.AllTypeRows.collatedStringValue",
108+
"type": "string",
109+
"nullable": true,
110+
"metadata": {}
111+
},
106112
{
107113
"name": "sparkTest.AllTypeRows.anyURIValue",
108114
"type": "string",

0 commit comments

Comments
 (0)