From 60041b03894a4d57fd62a651196d5d77a466482a Mon Sep 17 00:00:00 2001 From: Marco Date: Tue, 10 Jun 2025 15:17:38 -0300 Subject: [PATCH 1/8] [FEATURE] Add support for Timestamp data type and related functionality in Arrow --- Arrow/Sources/Arrow/ArrowArray.swift | 67 +++++++++++++++ Arrow/Sources/Arrow/ArrowArrayBuilder.swift | 15 ++++ Arrow/Sources/Arrow/ArrowReaderHelper.swift | 40 ++++++++- Arrow/Sources/Arrow/ArrowType.swift | 71 ++++++++++++++++ Arrow/Sources/Arrow/ArrowWriterHelper.swift | 42 ++++++++-- Arrow/Sources/Arrow/ProtoUtil.swift | 16 ++++ Arrow/Tests/ArrowTests/ArrayTests.swift | 90 ++++++++++++++++++--- Arrow/Tests/ArrowTests/CDataTests.swift | 6 +- 8 files changed, 327 insertions(+), 20 deletions(-) diff --git a/Arrow/Sources/Arrow/ArrowArray.swift b/Arrow/Sources/Arrow/ArrowArray.swift index 4fc1b8b..1a4c7fa 100644 --- a/Arrow/Sources/Arrow/ArrowArray.swift +++ b/Arrow/Sources/Arrow/ArrowArray.swift @@ -105,6 +105,8 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder { return try ArrowArrayHolderImpl(Time32Array(with)) case .time64: return try ArrowArrayHolderImpl(Time64Array(with)) + case .timestamp: + return try ArrowArrayHolderImpl(TimestampArray(with)) case .string: return try ArrowArrayHolderImpl(StringArray(with)) case .boolean: @@ -233,6 +235,71 @@ public class Date64Array: ArrowArray { public class Time32Array: FixedArray {} public class Time64Array: FixedArray {} +public class TimestampArray: FixedArray { + + public struct FormattingOptions { + public var dateFormat: String = "yyyy-MM-dd HH:mm:ss.SSS" + public var locale: Locale = .current + public var includeTimezone: Bool = true + public var fallbackToRaw: Bool = true + + public init(dateFormat: String = "yyyy-MM-dd HH:mm:ss.SSS", + locale: Locale = .current, + includeTimezone: Bool = true, + fallbackToRaw: Bool = true) { + self.dateFormat = dateFormat + self.locale = locale + self.includeTimezone = includeTimezone + self.fallbackToRaw = fallbackToRaw + } + } + + public func formattedDate(at index: UInt, options: FormattingOptions = FormattingOptions()) -> String? { + guard let timestamp = self[index] else { return nil } + + guard let timestampType = self.arrowData.type as? ArrowTypeTimestamp else { + return options.fallbackToRaw ? "\(timestamp)" : nil + } + + let date = dateFromTimestamp(timestamp, unit: timestampType.unit) + + let formatter = DateFormatter() + formatter.dateFormat = options.dateFormat + formatter.locale = options.locale + + if options.includeTimezone, let timezone = timestampType.timezone { + formatter.timeZone = TimeZone(identifier: timezone) + } + + return formatter.string(from: date) + } + + private func dateFromTimestamp(_ timestamp: Int64, unit: ArrowTimestampUnit) -> Date { + let timeInterval: TimeInterval + + switch unit { + case .seconds: + timeInterval = TimeInterval(timestamp) + case .milliseconds: + timeInterval = TimeInterval(timestamp) / 1_000 + case .microseconds: + timeInterval = TimeInterval(timestamp) / 1_000_000 + case .nanoseconds: + timeInterval = TimeInterval(timestamp) / 1_000_000_000 + } + + return Date(timeIntervalSince1970: timeInterval) + } + + public override func asString(_ index: UInt) -> String { + if let formatted = formattedDate(at: index) { + return formatted + } + + return super.asString(index) + } +} + public class BinaryArray: ArrowArray { public struct Options { public var printAsHex = false diff --git a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index 005cad7..dad8f81 100644 --- a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -119,6 +119,12 @@ public class Time64ArrayBuilder: ArrowArrayBuilder, T } } +public class TimestampArrayBuilder: ArrowArrayBuilder, TimestampArray> { + fileprivate convenience init(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws { + try self.init(ArrowTypeTimestamp(unit, timezone: timezone)) + } +} + public class StructArrayBuilder: ArrowArrayBuilder { let builders: [any ArrowArrayHolderBuilder] let fields: [ArrowField] @@ -279,6 +285,11 @@ public class ArrowArrayBuilders { throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") } return try Time64ArrayBuilder(timeType.unit) + case .timestamp: + guard let timestampType = arrowType as? ArrowTypeTimestamp else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try TimestampArrayBuilder(timestampType.unit) default: throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)") } @@ -338,4 +349,8 @@ public class ArrowArrayBuilders { public static func loadTime64ArrayBuilder(_ unit: ArrowTime64Unit) throws -> Time64ArrayBuilder { return try Time64ArrayBuilder(unit) } + + public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder { + return try TimestampArrayBuilder(unit, timezone: timezone) + } } diff --git a/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/Arrow/Sources/Arrow/ArrowReaderHelper.swift index 78ad280..3f69360 100644 --- a/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -90,6 +90,24 @@ private func makeTimeHolder(_ field: ArrowField, } } +private func makeTimestampHolder(_ field: ArrowField, + buffers: [ArrowBuffer], + nullCount: UInt +) -> Result { + do { + if let arrowType = field.type as? ArrowTypeTimestamp { + let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) + return .success(ArrowArrayHolderImpl(try TimestampArray(arrowData))) + } else { + return .failure(.invalid("Incorrect field type for timestamp: \(field.type)")) + } + } catch let error as ArrowError { + return .failure(error) + } catch { + return .failure(.unknownError("\(error)")) + } +} + private func makeBoolHolder(_ buffers: [ArrowBuffer], nullCount: UInt) -> Result { do { @@ -117,7 +135,7 @@ private func makeFixedHolder( } } -func makeStructHolder( + func makeStructHolder( _ field: ArrowField, buffers: [ArrowBuffer], nullCount: UInt, @@ -186,6 +204,8 @@ func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity return makeDateHolder(field, buffers: buffers, nullCount: nullCount) case .time32, .time64: return makeTimeHolder(field, buffers: buffers, nullCount: nullCount) + case .timestamp: + return makeTimestampHolder(field, buffers: buffers, nullCount: nullCount) case .strct: return makeStructHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength) default: @@ -203,7 +223,7 @@ func makeBuffer(_ buffer: org_apache_arrow_flatbuf_Buffer, fileData: Data, func isFixedPrimitive(_ type: org_apache_arrow_flatbuf_Type_) -> Bool { switch type { - case .int, .bool, .floatingpoint, .date, .time: + case .int, .bool, .floatingpoint, .date, .time, .timestamp: return true default: return false @@ -261,6 +281,22 @@ func findArrowType( // swiftlint:disable:this cyclomatic_complexity function_bod } return ArrowTypeTime64(timeType.unit == .microsecond ? .microseconds : .nanoseconds) + case .timestamp: + let timestampType = field.type(type: org_apache_arrow_flatbuf_Timestamp.self)! + let arrowUnit: ArrowTimestampUnit + switch timestampType.unit { + case .second: + arrowUnit = .seconds + case .millisecond: + arrowUnit = .milliseconds + case .microsecond: + arrowUnit = .microseconds + case .nanosecond: + arrowUnit = .nanoseconds + } + + let timezone = timestampType.timezone + return ArrowTypeTimestamp(arrowUnit, timezone: timezone) case .struct_: _ = field.type(type: org_apache_arrow_flatbuf_Struct_.self)! var fields = [ArrowField]() diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift index b44f859..8454b0e 100644 --- a/Arrow/Sources/Arrow/ArrowType.swift +++ b/Arrow/Sources/Arrow/ArrowType.swift @@ -21,6 +21,7 @@ public typealias Time32 = Int32 public typealias Time64 = Int64 public typealias Date32 = Int32 public typealias Date64 = Int64 +public typealias Timestamp = Int64 func FlatBuffersVersion_23_1_4() { // swiftlint:disable:this identifier_name } @@ -65,6 +66,7 @@ public enum ArrowTypeId { case strct case time32 case time64 + case timestamp case time case uint16 case uint32 @@ -122,6 +124,49 @@ public class ArrowTypeTime64: ArrowType { } } +public enum ArrowTimestampUnit { + case seconds + case milliseconds + case microseconds + case nanoseconds +} + +public class ArrowTypeTimestamp: ArrowType { + let unit: ArrowTimestampUnit + let timezone: String? + + public init(_ unit: ArrowTimestampUnit, timezone: String? = nil) { + self.unit = unit + self.timezone = timezone + + super.init(ArrowType.ArrowTimestamp) + } + + public convenience init(type: ArrowTypeId) { + self.init(.milliseconds, timezone: nil) + } + + public override var cDataFormatId: String { + get throws { + let unitChar: String + switch self.unit { + case .seconds: unitChar = "s" + case .milliseconds: unitChar = "m" + case .microseconds: unitChar = "u" + case .nanoseconds: unitChar = "n" + } + + if let timezone = self.timezone { + return "ts\(unitChar):\(timezone)" + } else { + return "ts\(unitChar)" + } + } + } + +} + + public class ArrowNestedType: ArrowType { let fields: [ArrowField] public init(_ info: ArrowType.Info, fields: [ArrowField]) { @@ -150,6 +195,7 @@ public class ArrowType { public static let ArrowBinary = Info.variableInfo(ArrowTypeId.binary) public static let ArrowTime32 = Info.timeInfo(ArrowTypeId.time32) public static let ArrowTime64 = Info.timeInfo(ArrowTypeId.time64) + public static let ArrowTimestamp = Info.timeInfo(ArrowTypeId.timestamp) public static let ArrowStruct = Info.complexInfo(ArrowTypeId.strct) public init(_ info: ArrowType.Info) { @@ -270,6 +316,8 @@ public class ArrowType { return MemoryLayout.stride case .time64: return MemoryLayout.stride + case .timestamp: + return MemoryLayout.stride case .binary: return MemoryLayout.stride case .string: @@ -320,6 +368,11 @@ public class ArrowType { return try time64.cDataFormatId } return "ttu" + case ArrowTypeId.timestamp: + if let timestamp = self as? ArrowTypeTimestamp { + return try timestamp.cDataFormatId + } + return "tsu" case ArrowTypeId.binary: return "z" case ArrowTypeId.string: @@ -366,6 +419,24 @@ public class ArrowType { return ArrowTypeTime64(.microseconds) } else if from == "ttn" { return ArrowTypeTime64(.nanoseconds) + } else if from.starts(with: "ts") { + let components = from.split(separator: ":", maxSplits: 1) + guard let unitPart = components.first, unitPart.count == 3 else { + throw ArrowError.notImplemented + } + + let unitChar = unitPart.suffix(1) + let unit: ArrowTimestampUnit + switch unitChar { + case "s": unit = .seconds + case "m": unit = .milliseconds + case "u": unit = .microseconds + case "n": unit = .nanoseconds + default: unit = .milliseconds + } + + let timezone = components.count > 1 ? String(components[1]) : nil + return ArrowTypeTimestamp(unit, timezone: timezone) } else if from == "z" { return ArrowType(ArrowType.ArrowBinary) } else if from == "u" { diff --git a/Arrow/Sources/Arrow/ArrowWriterHelper.swift b/Arrow/Sources/Arrow/ArrowWriterHelper.swift index b3fa2b4..4aa8269 100644 --- a/Arrow/Sources/Arrow/ArrowWriterHelper.swift +++ b/Arrow/Sources/Arrow/ArrowWriterHelper.swift @@ -41,6 +41,8 @@ func toFBTypeEnum(_ arrowType: ArrowType) -> Result Date: Wed, 11 Jun 2025 15:24:03 -0300 Subject: [PATCH 2/8] refactor(Arrow): :rotating_light: Attempt to fix linter errors on CI/CD --- Arrow/Sources/Arrow/ArrowReaderHelper.swift | 26 ++++++++++----------- Arrow/Sources/Arrow/ArrowType.swift | 21 ++++++++--------- Arrow/Sources/Arrow/ArrowWriterHelper.swift | 8 +++---- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/Arrow/Sources/Arrow/ArrowReaderHelper.swift index 3f69360..37f4680 100644 --- a/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -91,21 +91,21 @@ private func makeTimeHolder(_ field: ArrowField, } private func makeTimestampHolder(_ field: ArrowField, - buffers: [ArrowBuffer], - nullCount: UInt + buffers: [ArrowBuffer], + nullCount: UInt ) -> Result { do { - if let arrowType = field.type as? ArrowTypeTimestamp { - let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(try TimestampArray(arrowData))) - } else { - return .failure(.invalid("Incorrect field type for timestamp: \(field.type)")) - } - } catch let error as ArrowError { - return .failure(error) - } catch { - return .failure(.unknownError("\(error)")) + if let arrowType = field.type as? ArrowTypeTimestamp { + let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) + return .success(ArrowArrayHolderImpl(try TimestampArray(arrowData))) + } else { + return .failure(.invalid("Incorrect field type for timestamp: \(field.type)")) } + } catch let error as ArrowError { + return .failure(error) + } catch { + return .failure(.unknownError("\(error)")) + } } private func makeBoolHolder(_ buffers: [ArrowBuffer], @@ -135,7 +135,7 @@ private func makeFixedHolder( } } - func makeStructHolder( +func makeStructHolder( _ field: ArrowField, buffers: [ArrowBuffer], nullCount: UInt, diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift index 8454b0e..27a39b8 100644 --- a/Arrow/Sources/Arrow/ArrowType.swift +++ b/Arrow/Sources/Arrow/ArrowType.swift @@ -134,28 +134,28 @@ public enum ArrowTimestampUnit { public class ArrowTypeTimestamp: ArrowType { let unit: ArrowTimestampUnit let timezone: String? - + public init(_ unit: ArrowTimestampUnit, timezone: String? = nil) { self.unit = unit self.timezone = timezone super.init(ArrowType.ArrowTimestamp) } - + public convenience init(type: ArrowTypeId) { self.init(.milliseconds, timezone: nil) } - + public override var cDataFormatId: String { get throws { let unitChar: String switch self.unit { - case .seconds: unitChar = "s" - case .milliseconds: unitChar = "m" - case .microseconds: unitChar = "u" - case .nanoseconds: unitChar = "n" + case .seconds: unitChar = "s" + case .milliseconds: unitChar = "m" + case .microseconds: unitChar = "u" + case .nanoseconds: unitChar = "n" } - + if let timezone = self.timezone { return "ts\(unitChar):\(timezone)" } else { @@ -163,7 +163,6 @@ public class ArrowTypeTimestamp: ArrowType { } } } - } @@ -424,7 +423,7 @@ public class ArrowType { guard let unitPart = components.first, unitPart.count == 3 else { throw ArrowError.notImplemented } - + let unitChar = unitPart.suffix(1) let unit: ArrowTimestampUnit switch unitChar { @@ -434,7 +433,7 @@ public class ArrowType { case "n": unit = .nanoseconds default: unit = .milliseconds } - + let timezone = components.count > 1 ? String(components[1]) : nil return ArrowTypeTimestamp(unit, timezone: timezone) } else if from == "z" { diff --git a/Arrow/Sources/Arrow/ArrowWriterHelper.swift b/Arrow/Sources/Arrow/ArrowWriterHelper.swift index 4aa8269..0d86be0 100644 --- a/Arrow/Sources/Arrow/ArrowWriterHelper.swift +++ b/Arrow/Sources/Arrow/ArrowWriterHelper.swift @@ -108,7 +108,7 @@ func toFBType( // swiftlint:disable:this cyclomatic_complexity function_body_len case .timestamp: if let timestampType = arrowType as? ArrowTypeTimestamp { let startOffset = org_apache_arrow_flatbuf_Timestamp.startTimestamp(&fbb) - + let fbUnit: org_apache_arrow_flatbuf_TimeUnit switch timestampType.unit { case .seconds: @@ -121,15 +121,15 @@ func toFBType( // swiftlint:disable:this cyclomatic_complexity function_body_len fbUnit = .nanosecond } org_apache_arrow_flatbuf_Timestamp.add(unit: fbUnit, &fbb) - + if let timezone = timestampType.timezone { let timezoneOffset = fbb.create(string: timezone) org_apache_arrow_flatbuf_Timestamp.add(timezone: timezoneOffset, &fbb) } - + return .success(org_apache_arrow_flatbuf_Timestamp.endTimestamp(&fbb, start: startOffset)) } - + return .failure(.invalid("Unable to cast to Timestamp")) case .strct: let startOffset = org_apache_arrow_flatbuf_Struct_.startStruct_(&fbb) From 17544004cd6902df7d2d1d9de4715a83384d95fd Mon Sep 17 00:00:00 2001 From: Marco Date: Fri, 13 Jun 2025 11:41:01 -0300 Subject: [PATCH 3/8] refactor(Arrow): :rotating_light: Attempt to fix linter errors on CI/CD --- Arrow/Sources/Arrow/ArrowArray.swift | 39 ++++++++++++++------- Arrow/Sources/Arrow/ArrowArrayBuilder.swift | 2 +- Arrow/Sources/Arrow/ArrowType.swift | 3 +- Arrow/Sources/Arrow/ArrowWriterHelper.swift | 14 ++++---- Arrow/Sources/Arrow/ProtoUtil.swift | 2 +- Arrow/Tests/ArrowTests/ArrayTests.swift | 22 ++++++------ 6 files changed, 47 insertions(+), 35 deletions(-) diff --git a/Arrow/Sources/Arrow/ArrowArray.swift b/Arrow/Sources/Arrow/ArrowArray.swift index 1a4c7fa..d4ee873 100644 --- a/Arrow/Sources/Arrow/ArrowArray.swift +++ b/Arrow/Sources/Arrow/ArrowArray.swift @@ -236,13 +236,13 @@ public class Time32Array: FixedArray {} public class Time64Array: FixedArray {} public class TimestampArray: FixedArray { - - public struct FormattingOptions { + + public struct FormattingOptions: Equatable { public var dateFormat: String = "yyyy-MM-dd HH:mm:ss.SSS" public var locale: Locale = .current public var includeTimezone: Bool = true public var fallbackToRaw: Bool = true - + public init(dateFormat: String = "yyyy-MM-dd HH:mm:ss.SSS", locale: Locale = .current, includeTimezone: Bool = true, @@ -252,8 +252,18 @@ public class TimestampArray: FixedArray { self.includeTimezone = includeTimezone self.fallbackToRaw = fallbackToRaw } + + public static func == (lhs: FormattingOptions, rhs: FormattingOptions) -> Bool { + return lhs.dateFormat == rhs.dateFormat && + lhs.locale.identifier == rhs.locale.identifier && + lhs.includeTimezone == rhs.includeTimezone && + lhs.fallbackToRaw == rhs.fallbackToRaw + } } - + + private var cachedFormatter: DateFormatter? + private var cachedOptions: FormattingOptions? + public func formattedDate(at index: UInt, options: FormattingOptions = FormattingOptions()) -> String? { guard let timestamp = self[index] else { return nil } @@ -263,17 +273,20 @@ public class TimestampArray: FixedArray { let date = dateFromTimestamp(timestamp, unit: timestampType.unit) - let formatter = DateFormatter() - formatter.dateFormat = options.dateFormat - formatter.locale = options.locale - - if options.includeTimezone, let timezone = timestampType.timezone { - formatter.timeZone = TimeZone(identifier: timezone) + if cachedFormatter == nil || cachedOptions != options { + let formatter = DateFormatter() + formatter.dateFormat = options.dateFormat + formatter.locale = options.locale + if options.includeTimezone, let timezone = timestampType.timezone { + formatter.timeZone = TimeZone(identifier: timezone) + } + cachedFormatter = formatter + cachedOptions = options } - return formatter.string(from: date) + return cachedFormatter?.string(from: date) } - + private func dateFromTimestamp(_ timestamp: Int64, unit: ArrowTimestampUnit) -> Date { let timeInterval: TimeInterval @@ -290,7 +303,7 @@ public class TimestampArray: FixedArray { return Date(timeIntervalSince1970: timeInterval) } - + public override func asString(_ index: UInt) -> String { if let formatted = formattedDate(at: index) { return formatted diff --git a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index dad8f81..493e43a 100644 --- a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -349,7 +349,7 @@ public class ArrowArrayBuilders { public static func loadTime64ArrayBuilder(_ unit: ArrowTime64Unit) throws -> Time64ArrayBuilder { return try Time64ArrayBuilder(unit) } - + public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder { return try TimestampArrayBuilder(unit, timezone: timezone) } diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift index 27a39b8..988fa19 100644 --- a/Arrow/Sources/Arrow/ArrowType.swift +++ b/Arrow/Sources/Arrow/ArrowType.swift @@ -138,7 +138,7 @@ public class ArrowTypeTimestamp: ArrowType { public init(_ unit: ArrowTimestampUnit, timezone: String? = nil) { self.unit = unit self.timezone = timezone - + super.init(ArrowType.ArrowTimestamp) } @@ -165,7 +165,6 @@ public class ArrowTypeTimestamp: ArrowType { } } - public class ArrowNestedType: ArrowType { let fields: [ArrowField] public init(_ info: ArrowType.Info, fields: [ArrowField]) { diff --git a/Arrow/Sources/Arrow/ArrowWriterHelper.swift b/Arrow/Sources/Arrow/ArrowWriterHelper.swift index 0d86be0..7ecb3ab 100644 --- a/Arrow/Sources/Arrow/ArrowWriterHelper.swift +++ b/Arrow/Sources/Arrow/ArrowWriterHelper.swift @@ -58,29 +58,29 @@ func toFBType( // swiftlint:disable:this cyclomatic_complexity function_body_len switch arrowType.id { case .int8, .uint8: return .success(org_apache_arrow_flatbuf_Int.createInt( - &fbb, bitWidth: 8, isSigned: infoType == ArrowType.ArrowInt8)) + &fbb, bitWidth: 8, isSigned: infoType == ArrowType.ArrowInt8)) case .int16, .uint16: return .success(org_apache_arrow_flatbuf_Int.createInt( - &fbb, bitWidth: 16, isSigned: infoType == ArrowType.ArrowInt16)) + &fbb, bitWidth: 16, isSigned: infoType == ArrowType.ArrowInt16)) case .int32, .uint32: return .success(org_apache_arrow_flatbuf_Int.createInt( - &fbb, bitWidth: 32, isSigned: infoType == ArrowType.ArrowInt32)) + &fbb, bitWidth: 32, isSigned: infoType == ArrowType.ArrowInt32)) case .int64, .uint64: return .success(org_apache_arrow_flatbuf_Int.createInt( - &fbb, bitWidth: 64, isSigned: infoType == ArrowType.ArrowInt64)) + &fbb, bitWidth: 64, isSigned: infoType == ArrowType.ArrowInt64)) case .float: return .success(org_apache_arrow_flatbuf_FloatingPoint.createFloatingPoint(&fbb, precision: .single)) case .double: return .success(org_apache_arrow_flatbuf_FloatingPoint.createFloatingPoint(&fbb, precision: .double)) case .string: return .success(org_apache_arrow_flatbuf_Utf8.endUtf8( - &fbb, start: org_apache_arrow_flatbuf_Utf8.startUtf8(&fbb))) + &fbb, start: org_apache_arrow_flatbuf_Utf8.startUtf8(&fbb))) case .binary: return .success(org_apache_arrow_flatbuf_Binary.endBinary( - &fbb, start: org_apache_arrow_flatbuf_Binary.startBinary(&fbb))) + &fbb, start: org_apache_arrow_flatbuf_Binary.startBinary(&fbb))) case .boolean: return .success(org_apache_arrow_flatbuf_Bool.endBool( - &fbb, start: org_apache_arrow_flatbuf_Bool.startBool(&fbb))) + &fbb, start: org_apache_arrow_flatbuf_Bool.startBool(&fbb))) case .date32: let startOffset = org_apache_arrow_flatbuf_Date.startDate(&fbb) org_apache_arrow_flatbuf_Date.add(unit: .day, &fbb) diff --git a/Arrow/Sources/Arrow/ProtoUtil.swift b/Arrow/Sources/Arrow/ProtoUtil.swift index ebf4384..e91580e 100644 --- a/Arrow/Sources/Arrow/ProtoUtil.swift +++ b/Arrow/Sources/Arrow/ProtoUtil.swift @@ -77,7 +77,7 @@ func fromProto( // swiftlint:disable:this cyclomatic_complexity function_body_le case .nanosecond: arrowUnit = .nanoseconds } - + let timezone = timestampType.timezone arrowType = ArrowTypeTimestamp(arrowUnit, timezone: timezone?.isEmpty == true ? nil : timezone) case .struct_: diff --git a/Arrow/Tests/ArrowTests/ArrayTests.swift b/Arrow/Tests/ArrowTests/ArrayTests.swift index e59a14b..1a5c624 100644 --- a/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -211,7 +211,7 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length XCTAssertEqual(microArray[1], 20000) XCTAssertEqual(microArray[2], 987654321) } - + func testTimestampArray() throws { // Test timestamp with seconds unit let secBuilder = try ArrowArrayBuilders.loadTimestampArrayBuilder(.seconds, timezone: nil) @@ -340,14 +340,14 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length dateFormatter.timeStyle = .full XCTAssertTrue( dateFormatter.string(from: (structArray[0]![STIndex.date.rawValue] as? Date)!) == - dateFormatter.string(from: dateNow)) + dateFormatter.string(from: dateNow)) } func checkHolderForType(_ checkType: ArrowType) throws { let buffers = [ArrowBuffer(length: 0, capacity: 0, - rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)), + rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)), ArrowBuffer(length: 0, capacity: 0, - rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))] + rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))] let field = ArrowField("", type: checkType, isNullable: true) switch makeArrayHolder(field, buffers: buffers, nullCount: 0, children: nil, rbLength: 0) { case .success(let holder): @@ -390,13 +390,13 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length let stringHBuilder: ArrowArrayHolderBuilder = (try ArrowArrayBuilders.loadStringArrayBuilder()) - for index in 0..<100 { - if index % 10 == 9 { - stringHBuilder.appendAny(nil) - } else { - stringHBuilder.appendAny("test" + String(index)) - } - } + for index in 0..<100 { + if index % 10 == 9 { + stringHBuilder.appendAny(nil) + } else { + stringHBuilder.appendAny("test" + String(index)) + } + } let stringHolder = try stringHBuilder.toHolder() XCTAssertEqual(stringHolder.nullCount, 10) From 559bbe3ff8e016a8631b25d4affe8f65a15f84f4 Mon Sep 17 00:00:00 2001 From: Marco Antonio Date: Mon, 16 Jun 2025 09:57:27 -0300 Subject: [PATCH 4/8] Better column's name and ordering for schema tests Co-authored-by: Sutou Kouhei --- Arrow/Tests/ArrowTests/CDataTests.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Arrow/Tests/ArrowTests/CDataTests.swift b/Arrow/Tests/ArrowTests/CDataTests.swift index 1c7f979..e48ebd0 100644 --- a/Arrow/Tests/ArrowTests/CDataTests.swift +++ b/Arrow/Tests/ArrowTests/CDataTests.swift @@ -44,10 +44,10 @@ final class CDataTests: XCTestCase { .addField("colTime64u", type: ArrowTypeTime64(.microseconds), isNullable: false) .addField("colTime64n", type: ArrowTypeTime64(.nanoseconds), isNullable: false) .addField("colTimestamp", type: ArrowType(ArrowType.ArrowTimestamp), isNullable: false) - .addField("colTimestamptsn", type: ArrowTypeTimestamp(.nanoseconds), isNullable: false) - .addField("colTimestamptsu", type: ArrowTypeTimestamp(.microseconds), isNullable: false) - .addField("colTimestamptsm", type: ArrowTypeTimestamp(.milliseconds), isNullable: false) - .addField("colTimestamptss", type: ArrowTypeTimestamp(.seconds), isNullable: false) + .addField("colTimestampts", type: ArrowTypeTimestamp(.seconds), isNullable: false) + .addField("colTimestamptm", type: ArrowTypeTimestamp(.milliseconds), isNullable: false) + .addField("colTimestamptu", type: ArrowTypeTimestamp(.microseconds), isNullable: false) + .addField("colTimestamptn", type: ArrowTypeTimestamp(.nanoseconds), isNullable: false) .addField("colFloat", type: ArrowType(ArrowType.ArrowFloat), isNullable: false) .addField("colDouble", type: ArrowType(ArrowType.ArrowDouble), isNullable: false) .finish() From d84ccd0b81a3fea5c3f1378aa6bf57b7d792c006 Mon Sep 17 00:00:00 2001 From: Marco Antonio Date: Mon, 16 Jun 2025 09:59:04 -0300 Subject: [PATCH 5/8] Fixing typo in comments Co-authored-by: Sutou Kouhei --- Arrow/Tests/ArrowTests/ArrayTests.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Arrow/Tests/ArrowTests/ArrayTests.swift b/Arrow/Tests/ArrowTests/ArrayTests.swift index 1a5c624..c7142c5 100644 --- a/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -247,7 +247,7 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length XCTAssertNil(msArray[1]) XCTAssertEqual(msArray[2], 1609545600000) - // Test timestamp with microseconds unit and timezone America/New_York + // Test timestamp with microseconds unit and timezone UTC let usBuilder = try ArrowArrayBuilders.loadTimestampArrayBuilder(.microseconds, timezone: "UTC") usBuilder.append(1609459200000000) // 2021-01-01 00:00:00.000000 usBuilder.append(1609545600000000) // 2021-01-02 00:00:00.000000 From b55f3301bb99951cb7e3e69f6c58aa9031679a16 Mon Sep 17 00:00:00 2001 From: Marco Antonio Date: Mon, 16 Jun 2025 10:04:07 -0300 Subject: [PATCH 6/8] Removing redundant boolean comparison Co-authored-by: Sutou Kouhei --- Arrow/Sources/Arrow/ProtoUtil.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Arrow/Sources/Arrow/ProtoUtil.swift b/Arrow/Sources/Arrow/ProtoUtil.swift index e91580e..00cbb19 100644 --- a/Arrow/Sources/Arrow/ProtoUtil.swift +++ b/Arrow/Sources/Arrow/ProtoUtil.swift @@ -79,7 +79,7 @@ func fromProto( // swiftlint:disable:this cyclomatic_complexity function_body_le } let timezone = timestampType.timezone - arrowType = ArrowTypeTimestamp(arrowUnit, timezone: timezone?.isEmpty == true ? nil : timezone) + arrowType = ArrowTypeTimestamp(arrowUnit, timezone: timezone?.isEmpty ? nil : timezone) case .struct_: var children = [ArrowField]() for index in 0.. Date: Mon, 16 Jun 2025 10:22:14 -0300 Subject: [PATCH 7/8] fix(Arrow): improve error handling for invalid timestamp formats --- Arrow/Sources/Arrow/ArrowType.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift index 988fa19..9b6091f 100644 --- a/Arrow/Sources/Arrow/ArrowType.swift +++ b/Arrow/Sources/Arrow/ArrowType.swift @@ -420,7 +420,7 @@ public class ArrowType { } else if from.starts(with: "ts") { let components = from.split(separator: ":", maxSplits: 1) guard let unitPart = components.first, unitPart.count == 3 else { - throw ArrowError.notImplemented + throw ArrowError.invalid("Invalid timestamp format '\(from)'. Expected format 'ts[s|m|u|n][:timezone]'") } let unitChar = unitPart.suffix(1) @@ -430,7 +430,7 @@ public class ArrowType { case "m": unit = .milliseconds case "u": unit = .microseconds case "n": unit = .nanoseconds - default: unit = .milliseconds + default: throw ArrowError.invalid("Unrecognized timestamp unit '\(unitChar)'. Expected 's', 'm', 'u', or 'n'.") } let timezone = components.count > 1 ? String(components[1]) : nil From 352de686d078d331fb6e48abc15df2496c52d011 Mon Sep 17 00:00:00 2001 From: Marco Date: Mon, 16 Jun 2025 12:41:32 -0300 Subject: [PATCH 8/8] fix(ProtoUtil): correct timezone nil check for ArrowTypeTimestamp --- Arrow/Sources/Arrow/ProtoUtil.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Arrow/Sources/Arrow/ProtoUtil.swift b/Arrow/Sources/Arrow/ProtoUtil.swift index 00cbb19..e91580e 100644 --- a/Arrow/Sources/Arrow/ProtoUtil.swift +++ b/Arrow/Sources/Arrow/ProtoUtil.swift @@ -79,7 +79,7 @@ func fromProto( // swiftlint:disable:this cyclomatic_complexity function_body_le } let timezone = timestampType.timezone - arrowType = ArrowTypeTimestamp(arrowUnit, timezone: timezone?.isEmpty ? nil : timezone) + arrowType = ArrowTypeTimestamp(arrowUnit, timezone: timezone?.isEmpty == true ? nil : timezone) case .struct_: var children = [ArrowField]() for index in 0..