From 0253f70195120d5a4d1aa82603cbc0663beb5cfd Mon Sep 17 00:00:00 2001 From: Marco Date: Fri, 13 Jun 2025 12:14:08 -0300 Subject: [PATCH 1/2] fea(Arrow)t: Implement ListArray and ListArrayBuilder with associated functionality --- Arrow/Sources/Arrow/ArrowArray.swift | 68 ++++++++++++++++ Arrow/Sources/Arrow/ArrowArrayBuilder.swift | 46 ++++++++++- Arrow/Sources/Arrow/ArrowBufferBuilder.swift | 63 ++++++++++++++- Arrow/Sources/Arrow/ArrowReader.swift | 44 +++++++++-- Arrow/Sources/Arrow/ArrowReaderHelper.swift | 36 ++++++--- Arrow/Sources/Arrow/ArrowType.swift | 26 ++++++- Arrow/Sources/Arrow/ArrowWriter.swift | 8 +- Arrow/Sources/Arrow/ProtoUtil.swift | 9 ++- Arrow/Tests/ArrowTests/ArrayTests.swift | 82 ++++++++++++++++++++ Arrow/Tests/ArrowTests/IPCTests.swift | 8 +- Arrow/Tests/ArrowTests/TableTests.swift | 4 +- 11 files changed, 363 insertions(+), 31 deletions(-) diff --git a/Arrow/Sources/Arrow/ArrowArray.swift b/Arrow/Sources/Arrow/ArrowArray.swift index d4ee873..87dfc26 100644 --- a/Arrow/Sources/Arrow/ArrowArray.swift +++ b/Arrow/Sources/Arrow/ArrowArray.swift @@ -115,6 +115,8 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder { return try ArrowArrayHolderImpl(BinaryArray(with)) case .strct: return try ArrowArrayHolderImpl(StructArray(with)) + case .list: + return try ArrowArrayHolderImpl(ListArray(with)) default: throw ArrowError.invalid("Array not found for type: \(arrowType)") } @@ -405,3 +407,69 @@ public class StructArray: ArrowArray<[Any?]> { return output } } + +public class ListArray: ArrowArray<[Any?]> { + public private(set) var values: ArrowArrayHolder? + + public required init(_ arrowData: ArrowData) throws { + try super.init(arrowData) + guard arrowData.children.count == 1 else { + throw ArrowError.invalid("List array must have exactly one child") + } + + guard let listType = arrowData.type as? ArrowTypeList else { + throw ArrowError.invalid("Expected ArrowTypeList") + } + + self.values = try ArrowArrayHolderImpl.loadArray( + listType.elementType, + with: arrowData.children[0] + ) + } + + public override subscript(_ index: UInt) -> [Any?]? { + guard let values = self.values else { return nil } + + if self.arrowData.isNull(index) { + return nil + } + + let offsets = self.arrowData.buffers[1] + let offsetIndex = Int(index) * MemoryLayout.stride + + let startOffset = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self) + let endOffset = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout.stride).load(as: Int32.self) + + var items = [Any?]() + for i in startOffset.. String { + guard let list = self[index] else { + return "null" + } + + var output = "[" + + for (i, item) in list.enumerated() { + if i > 0 { + output.append(",") + } + + if item == nil { + output.append("null") + } else if let asStringItem = item as? AsString { + output.append(asStringItem.asString(0)) + } else { + output.append("\(item!)") + } + } + + output.append("]") + return output + } +} diff --git a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index 493e43a..d9d08c2 100644 --- a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -131,7 +131,7 @@ public class StructArrayBuilder: ArrowArrayBuilder { + let valueBuilder: any ArrowArrayHolderBuilder + + public override init(_ elementType: ArrowType) throws { + self.valueBuilder = try ArrowArrayBuilders.loadBuilder(arrowType: elementType) + try super.init(ArrowTypeList(elementType)) + } + + public override func append(_ values: [Any?]?) { + self.bufferBuilder.append(values) + if let vals = values { + for val in vals { + self.valueBuilder.appendAny(val) + } + } + } + + public override func finish() throws -> ListArray { + let buffers = self.bufferBuilder.finish() + let childData = try valueBuilder.toHolder().array.arrowData + let arrowData = try ArrowData(self.type, buffers: buffers, children: [childData], nullCount: self.nullCount, length: self.length) + return try ListArray(arrowData) + } +} + public class ArrowArrayBuilders { public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity _ builderType: Any.Type) throws -> ArrowArrayHolderBuilder { @@ -290,6 +315,16 @@ public class ArrowArrayBuilders { throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") } return try TimestampArrayBuilder(timestampType.unit) + case .list: + guard let listType = arrowType as? ArrowTypeList else { + throw ArrowError.invalid("Expected ArrowTypeList for \(arrowType.id)") + } + return try ListArrayBuilder(listType.elementType) + case .strct: + guard let structType = arrowType as? ArrowTypeStruct else { + throw ArrowError.invalid("Expected ArrowStructType for \(arrowType.id)") + } + return try StructArrayBuilder(structType.fields) default: throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)") } @@ -352,5 +387,12 @@ public class ArrowArrayBuilders { public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder { return try TimestampArrayBuilder(unit, timezone: timezone) + + public static func loadStructArrayBuilder(_ fields: [ArrowField]) throws -> StructArrayBuilder { + return try StructArrayBuilder(fields) + } + + public static func loadListArrayBuilder(_ elementType: ArrowType) throws -> ListArrayBuilder { + return try ListArrayBuilder(elementType) } } diff --git a/Arrow/Sources/Arrow/ArrowBufferBuilder.swift b/Arrow/Sources/Arrow/ArrowBufferBuilder.swift index cc0bae0..c75a90a 100644 --- a/Arrow/Sources/Arrow/ArrowBufferBuilder.swift +++ b/Arrow/Sources/Arrow/ArrowBufferBuilder.swift @@ -338,14 +338,14 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder { public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { public typealias ItemType = [Any?] - var info: ArrowNestedType? + var info: ArrowTypeStruct? public init() throws { let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) super.init(nulls) } public func initializeTypeInfo(_ fields: [ArrowField]) { - info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields) + info = ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields) } public func append(_ newValue: [Any?]?) { @@ -379,3 +379,62 @@ public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { return [nulls] } } + +public class ListBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { + public typealias ItemType = [Any?] + var offsets: ArrowBuffer + + public required init() throws { + self.offsets = ArrowBuffer.createBuffer(1, size: UInt(MemoryLayout.stride)) + let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) + super.init(nulls) + self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self) + } + + public func append(_ newValue: [Any?]?) { + let index = UInt(self.length) + self.length += 1 + + if length >= self.offsets.length { + self.resize(length + 1) + } + + let offsetIndex = Int(index) * MemoryLayout.stride + let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self) + + if let vals = newValue { + BitUtility.setBit(index + self.offset, buffer: self.nulls) + let newOffset = currentOffset + Int32(vals.count) + self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout.stride).storeBytes(of: newOffset, as: Int32.self) + } else { + self.nullCount += 1 + BitUtility.clearBit(index + self.offset, buffer: self.nulls) + self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout.stride).storeBytes(of: currentOffset, as: Int32.self) + } + } + + public override func isNull(_ index: UInt) -> Bool { + return !BitUtility.isSet(index + self.offset, buffer: self.nulls) + } + + public func resize(_ length: UInt) { + if length > self.offsets.length { + let resizeLength = resizeLength(self.offsets) + var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout.size)) + var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity) + self.offsets = offsets + self.nulls = nulls + } + } + + public func finish() -> [ArrowBuffer] { + let length = self.length + var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size)) + var offsets = ArrowBuffer.createBuffer(length + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity) + ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: offsets.capacity) + return [nulls, offsets] + } +} diff --git a/Arrow/Sources/Arrow/ArrowReader.swift b/Arrow/Sources/Arrow/ArrowReader.swift index 8515a78..3313b0c 100644 --- a/Arrow/Sources/Arrow/ArrowReader.swift +++ b/Arrow/Sources/Arrow/ArrowReader.swift @@ -116,6 +116,35 @@ public class ArrowReader { // swiftlint:disable:this type_body_length rbLength: UInt(loadInfo.batchData.recordBatch.length)) } + private func loadListData(_ loadInfo: DataLoadInfo, field: org_apache_arrow_flatbuf_Field) -> Result { + guard let node = loadInfo.batchData.nextNode() else { + return .failure(.invalid("Node not found")) + } + + guard let nullBuffer = loadInfo.batchData.nextBuffer() else { + return .failure(.invalid("Null buffer not found")) + } + + guard let offsetBuffer = loadInfo.batchData.nextBuffer() else { + return .failure(.invalid("Offset buffer not found")) + } + + let nullLength = UInt(ceil(Double(node.length) / 8)) + let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData, length: nullLength, messageOffset: loadInfo.messageOffset) + let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData, length: UInt(node.length + 1), messageOffset: loadInfo.messageOffset) + + guard field.childrenCount == 1, let childField = field.children(at: 0) else { + return .failure(.invalid("List must have exactly one child")) + } + + switch loadField(loadInfo, field: childField) { + case .success(let childHolder): + return makeArrayHolder(field, buffers: [arrowNullBuffer, arrowOffsetBuffer], nullCount: UInt(node.nullCount), children: [childHolder.array.arrowData], rbLength: UInt(loadInfo.batchData.recordBatch.length)) + case .failure(let error): + return .failure(error) + } + } + private func loadPrimitiveData( _ loadInfo: DataLoadInfo, field: org_apache_arrow_flatbuf_Field) @@ -178,12 +207,17 @@ public class ArrowReader { // swiftlint:disable:this type_body_length _ loadInfo: DataLoadInfo, field: org_apache_arrow_flatbuf_Field) -> Result { - if isNestedType(field.typeType) { + switch field.typeType { + case .struct_: return loadStructData(loadInfo, field: field) - } else if isFixedPrimitive(field.typeType) { - return loadPrimitiveData(loadInfo, field: field) - } else { - return loadVariableData(loadInfo, field: field) + case .list: + return loadListData(loadInfo, field: field) + default: + if isFixedPrimitive(field.typeType) { + return loadPrimitiveData(loadInfo, field: field) + } else { + return loadVariableData(loadInfo, field: field) + } } } diff --git a/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/Arrow/Sources/Arrow/ArrowReaderHelper.swift index 37f4680..77caf85 100644 --- a/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -154,6 +154,23 @@ func makeStructHolder( } } +func makeListHolder( + _ field: ArrowField, + buffers: [ArrowBuffer], + nullCount: UInt, + children: [ArrowData], + rbLength: UInt +) -> Result { + do { + let arrowData = try ArrowData(field.type, buffers: buffers, children: children, nullCount: nullCount, length: rbLength) + return .success(ArrowArrayHolderImpl(try ListArray(arrowData))) + } catch let error as ArrowError { + return .failure(error) + } catch { + return .failure(.unknownError("\(error)")) + } +} + func makeArrayHolder( _ field: org_apache_arrow_flatbuf_Field, buffers: [ArrowBuffer], @@ -208,6 +225,8 @@ func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity return makeTimestampHolder(field, buffers: buffers, nullCount: nullCount) case .strct: return makeStructHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength) + case .list: + return makeListHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength) default: return .failure(.unknownType("Type \(typeId) currently not supported")) } @@ -230,15 +249,6 @@ func isFixedPrimitive(_ type: org_apache_arrow_flatbuf_Type_) -> Bool { } } -func isNestedType(_ type: org_apache_arrow_flatbuf_Type_) -> Bool { - switch type { - case .struct_: - return true - default: - return false - } -} - func findArrowType( // swiftlint:disable:this cyclomatic_complexity function_body_length _ field: org_apache_arrow_flatbuf_Field) -> ArrowType { let type = field.typeType @@ -307,7 +317,13 @@ func findArrowType( // swiftlint:disable:this cyclomatic_complexity function_bod ArrowField(childField.name ?? "", type: childType, isNullable: childField.nullable)) } - return ArrowNestedType(ArrowType.ArrowStruct, fields: fields) + return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields) + case .list: + guard field.childrenCount == 1, let childField = field.children(at: 0) else { + return ArrowType(ArrowType.ArrowUnknown) + } + let childType = findArrowType(childField) + return ArrowTypeList(childType) default: return ArrowType(ArrowType.ArrowUnknown) } diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift index 9b6091f..fa564ad 100644 --- a/Arrow/Sources/Arrow/ArrowType.swift +++ b/Arrow/Sources/Arrow/ArrowType.swift @@ -173,6 +173,15 @@ public class ArrowNestedType: ArrowType { } } +public class ArrowTypeList: ArrowType { + let elementType: ArrowType + + public init(_ elementType: ArrowType) { + self.elementType = elementType + super.init(ArrowType.ArrowList) + } +} + public class ArrowType { public private(set) var info: ArrowType.Info public static let ArrowInt8 = Info.primitiveInfo(ArrowTypeId.int8) @@ -195,6 +204,7 @@ public class ArrowType { public static let ArrowTime64 = Info.timeInfo(ArrowTypeId.time64) public static let ArrowTimestamp = Info.timeInfo(ArrowTypeId.timestamp) public static let ArrowStruct = Info.complexInfo(ArrowTypeId.strct) + public static let ArrowList = Info.complexInfo(ArrowTypeId.list) public init(_ info: ArrowType.Info) { self.info = info @@ -320,7 +330,7 @@ public class ArrowType { return MemoryLayout.stride case .string: return MemoryLayout.stride - case .strct: + case .strct, .list: return 0 default: fatalError("Stride requested for unknown type: \(self)") @@ -375,6 +385,20 @@ public class ArrowType { return "z" case ArrowTypeId.string: return "u" + case ArrowTypeId.strct: + if let structType = self as? ArrowTypeStruct { + var format = "+s" + for field in structType.fields { + format += try field.type.cDataFormatId + } + return format + } + throw ArrowError.invalid("Invalid struct type") + case ArrowTypeId.list: + if let listType = self as? ArrowTypeList { + return "+l" + (try listType.elementType.cDataFormatId) + } + throw ArrowError.invalid("Invalid list type") default: throw ArrowError.notImplemented } diff --git a/Arrow/Sources/Arrow/ArrowWriter.swift b/Arrow/Sources/Arrow/ArrowWriter.swift index 3aa25b6..b697b1a 100644 --- a/Arrow/Sources/Arrow/ArrowWriter.swift +++ b/Arrow/Sources/Arrow/ArrowWriter.swift @@ -72,7 +72,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length private func writeField(_ fbb: inout FlatBufferBuilder, field: ArrowField) -> Result { var fieldsOffset: Offset? - if let nestedField = field.type as? ArrowNestedType { + if let nestedField = field.type as? ArrowTypeStruct { var offsets = [Offset]() for field in nestedField.fields { switch writeField(&fbb, field: field) { @@ -169,7 +169,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length org_apache_arrow_flatbuf_FieldNode(length: Int64(column.length), nullCount: Int64(column.nullCount)) offsets.append(fbb.create(struct: fieldNode)) - if let nestedType = column.type as? ArrowNestedType { + if let nestedType = column.type as? ArrowTypeStruct { let structArray = column.array as? StructArray writeFieldNodes(nestedType.fields, columns: structArray!.arrowFields!, offsets: &offsets, fbb: &fbb) } @@ -189,7 +189,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length let buffer = org_apache_arrow_flatbuf_Buffer(offset: Int64(bufferOffset), length: Int64(bufferDataSize)) buffers.append(buffer) bufferOffset += bufferDataSize - if let nestedType = column.type as? ArrowNestedType { + if let nestedType = column.type as? ArrowTypeStruct { let structArray = column.array as? StructArray writeBufferInfo(nestedType.fields, columns: structArray!.arrowFields!, bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb) @@ -246,7 +246,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length for var bufferData in colBufferData { addPadForAlignment(&bufferData) writer.append(bufferData) - if let nestedType = column.type as? ArrowNestedType { + if let nestedType = column.type as? ArrowTypeStruct { guard let structArray = column.array as? StructArray else { return .failure(.invalid("Struct type array expected for nested type")) } diff --git a/Arrow/Sources/Arrow/ProtoUtil.swift b/Arrow/Sources/Arrow/ProtoUtil.swift index e91580e..e6c7910 100644 --- a/Arrow/Sources/Arrow/ProtoUtil.swift +++ b/Arrow/Sources/Arrow/ProtoUtil.swift @@ -87,7 +87,14 @@ func fromProto( // swiftlint:disable:this cyclomatic_complexity function_body_le children.append(fromProto(field: childField)) } - arrowType = ArrowNestedType(ArrowType.ArrowStruct, fields: children) + arrowType = ArrowTypeStruct(ArrowType.ArrowStruct, fields: children) + case .list: + guard field.childrenCount == 1, let childField = field.children(at: 0) else { + arrowType = ArrowType(ArrowType.ArrowUnknown) + break + } + let childArrowField = fromProto(field: childField) + arrowType = ArrowTypeList(childArrowField.type) default: arrowType = ArrowType(ArrowType.ArrowUnknown) } diff --git a/Arrow/Tests/ArrowTests/ArrayTests.swift b/Arrow/Tests/ArrowTests/ArrayTests.swift index c7142c5..27a0e1c 100644 --- a/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -438,4 +438,86 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length boolBuilder.append([true, false, true, false]) XCTAssertEqual(try boolBuilder.finish()[2], true) } + + func testListArrayPrimitive() throws { + let listBuilder = try ListArrayBuilder(ArrowType(ArrowType.ArrowInt32)) + + listBuilder.append([Int32(1), Int32(2), Int32(3)]) + listBuilder.append([Int32(4), Int32(5)]) + listBuilder.append(nil) + listBuilder.append([Int32(6), Int32(7), Int32(8), Int32(9)]) + + XCTAssertEqual(listBuilder.length, 4) + XCTAssertEqual(listBuilder.nullCount, 1) + + let listArray = try listBuilder.finish() + XCTAssertEqual(listArray.length, 4) + + let firstList = listArray[0] + XCTAssertNotNil(firstList, "First list should not be nil") + XCTAssertEqual(firstList!.count, 3, "First list should have 3 elements") + XCTAssertEqual(firstList![0] as? Int32, 1) + XCTAssertEqual(firstList![1] as? Int32, 2) + XCTAssertEqual(firstList![2] as? Int32, 3) + + let secondList = listArray[1] + XCTAssertEqual(secondList!.count, 2) + XCTAssertEqual(secondList![0] as? Int32, 4) + XCTAssertEqual(secondList![1] as? Int32, 5) + + XCTAssertNil(listArray[2]) + + let fourthList = listArray[3] + XCTAssertEqual(fourthList!.count, 4) + XCTAssertEqual(fourthList![0] as? Int32, 6) + XCTAssertEqual(fourthList![3] as? Int32, 9) + } + + func testNestedListArray() throws { + let innerListType = ArrowTypeList(ArrowType(ArrowType.ArrowInt32)) + let outerListBuilder = try ListArrayBuilder(innerListType) + + let innerListBuilder = outerListBuilder.valueBuilder as! ListArrayBuilder + + outerListBuilder.bufferBuilder.append([nil, nil]) + innerListBuilder.append([Int32(1), Int32(2)]) + innerListBuilder.append([Int32(3), Int32(4), Int32(5)]) + + outerListBuilder.bufferBuilder.append([nil]) + innerListBuilder.append([Int32(6)]) + + outerListBuilder.bufferBuilder.append(nil) + + outerListBuilder.bufferBuilder.append([]) + + let nestedArray = try outerListBuilder.finish() + XCTAssertEqual(nestedArray.length, 4) + XCTAssertEqual(nestedArray.nullCount, 1) + + let firstOuterList = nestedArray[0]! + XCTAssertEqual(firstOuterList.count, 2) + + let firstInnerList = firstOuterList[0] as! [Any?] + XCTAssertEqual(firstInnerList.count, 2) + XCTAssertEqual(firstInnerList[0] as? Int32, 1) + XCTAssertEqual(firstInnerList[1] as? Int32, 2) + + let secondInnerList = firstOuterList[1] as! [Any?] + XCTAssertEqual(secondInnerList.count, 3) + XCTAssertEqual(secondInnerList[0] as? Int32, 3) + XCTAssertEqual(secondInnerList[1] as? Int32, 4) + XCTAssertEqual(secondInnerList[2] as? Int32, 5) + + let secondOuterList = nestedArray[1]! + XCTAssertEqual(secondOuterList.count, 1) + + let thirdInnerList = secondOuterList[0] as! [Any?] + XCTAssertEqual(thirdInnerList.count, 1) + XCTAssertEqual(thirdInnerList[0] as? Int32, 6) + + XCTAssertNil(nestedArray[2]) + + let emptyList = nestedArray[3]! + XCTAssertEqual(emptyList.count, 0) + } } diff --git a/Arrow/Tests/ArrowTests/IPCTests.swift b/Arrow/Tests/ArrowTests/IPCTests.swift index 26f38ce..735dc1b 100644 --- a/Arrow/Tests/ArrowTests/IPCTests.swift +++ b/Arrow/Tests/ArrowTests/IPCTests.swift @@ -121,14 +121,14 @@ func makeSchema() -> ArrowSchema { func makeStructSchema() -> ArrowSchema { let testObj = StructTest() var fields = [ArrowField]() - let buildStructType = {() -> ArrowNestedType in + let buildStructType = {() -> ArrowTypeStruct in let mirror = Mirror(reflecting: testObj) for (property, value) in mirror.children { let arrowType = ArrowType(ArrowType.infoForType(type(of: value))) fields.append(ArrowField(property!, type: arrowType, isNullable: true)) } - return ArrowNestedType(ArrowType.ArrowStruct, fields: fields) + return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields) } return ArrowSchema.Builder() @@ -515,8 +515,8 @@ final class IPCFileReaderTests: XCTestCase { // swiftlint:disable:this type_body XCTAssertEqual(recordBatch.schema.fields.count, 1) XCTAssertEqual(recordBatch.schema.fields[0].name, "struct1") XCTAssertEqual(recordBatch.schema.fields[0].type.id, .strct) - XCTAssertTrue(recordBatch.schema.fields[0].type is ArrowNestedType) - let nestedType = (recordBatch.schema.fields[0].type as? ArrowNestedType)! + XCTAssertTrue(recordBatch.schema.fields[0].type is ArrowTypeStruct) + let nestedType = (recordBatch.schema.fields[0].type as? ArrowTypeStruct)! XCTAssertEqual(nestedType.fields.count, 14) let columns = recordBatch.columns XCTAssertEqual(columns[0].nullCount, 1) diff --git a/Arrow/Tests/ArrowTests/TableTests.swift b/Arrow/Tests/ArrowTests/TableTests.swift index dc5cabc..6f5482e 100644 --- a/Arrow/Tests/ArrowTests/TableTests.swift +++ b/Arrow/Tests/ArrowTests/TableTests.swift @@ -53,14 +53,14 @@ final class TableTests: XCTestCase { let testObj = StructTest() var fields = [ArrowField]() - let buildStructType = {() -> ArrowNestedType in + let buildStructType = {() -> ArrowTypeStruct in let mirror = Mirror(reflecting: testObj) for (property, value) in mirror.children { let arrowType = ArrowType(ArrowType.infoForType(type(of: value))) fields.append(ArrowField(property!, type: arrowType, isNullable: true)) } - return ArrowNestedType(ArrowType.ArrowStruct, fields: fields) + return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields) } let structType = buildStructType() From a52b2d898bdae47ad62c6bf51db32b5b73e86493 Mon Sep 17 00:00:00 2001 From: Marco Date: Tue, 17 Jun 2025 15:26:28 -0300 Subject: [PATCH 2/2] refactor: Rename ArrowNestedType to ArrowTypeStruct for clarity --- Arrow/Sources/Arrow/ArrowArrayBuilder.swift | 1 + Arrow/Sources/Arrow/ArrowType.swift | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index d9d08c2..0d662dd 100644 --- a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -387,6 +387,7 @@ public class ArrowArrayBuilders { public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder { return try TimestampArrayBuilder(unit, timezone: timezone) + } public static func loadStructArrayBuilder(_ fields: [ArrowField]) throws -> StructArrayBuilder { return try StructArrayBuilder(fields) diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift index fa564ad..92913fe 100644 --- a/Arrow/Sources/Arrow/ArrowType.swift +++ b/Arrow/Sources/Arrow/ArrowType.swift @@ -165,7 +165,7 @@ public class ArrowTypeTimestamp: ArrowType { } } -public class ArrowNestedType: ArrowType { +public class ArrowTypeStruct: ArrowType { let fields: [ArrowField] public init(_ info: ArrowType.Info, fields: [ArrowField]) { self.fields = fields