diff --git a/Arrow/Sources/Arrow/ArrowArray.swift b/Arrow/Sources/Arrow/ArrowArray.swift index 4fc1b8b..d4ee873 100644 --- a/Arrow/Sources/Arrow/ArrowArray.swift +++ b/Arrow/Sources/Arrow/ArrowArray.swift @@ -105,6 +105,8 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder { return try ArrowArrayHolderImpl(Time32Array(with)) case .time64: return try ArrowArrayHolderImpl(Time64Array(with)) + case .timestamp: + return try ArrowArrayHolderImpl(TimestampArray(with)) case .string: return try ArrowArrayHolderImpl(StringArray(with)) case .boolean: @@ -233,6 +235,84 @@ public class Date64Array: ArrowArray { public class Time32Array: FixedArray {} public class Time64Array: FixedArray {} +public class TimestampArray: FixedArray { + + public struct FormattingOptions: Equatable { + public var dateFormat: String = "yyyy-MM-dd HH:mm:ss.SSS" + public var locale: Locale = .current + public var includeTimezone: Bool = true + public var fallbackToRaw: Bool = true + + public init(dateFormat: String = "yyyy-MM-dd HH:mm:ss.SSS", + locale: Locale = .current, + includeTimezone: Bool = true, + fallbackToRaw: Bool = true) { + self.dateFormat = dateFormat + self.locale = locale + self.includeTimezone = includeTimezone + self.fallbackToRaw = fallbackToRaw + } + + public static func == (lhs: FormattingOptions, rhs: FormattingOptions) -> Bool { + return lhs.dateFormat == rhs.dateFormat && + lhs.locale.identifier == rhs.locale.identifier && + lhs.includeTimezone == rhs.includeTimezone && + lhs.fallbackToRaw == rhs.fallbackToRaw + } + } + + private var cachedFormatter: DateFormatter? + private var cachedOptions: FormattingOptions? + + public func formattedDate(at index: UInt, options: FormattingOptions = FormattingOptions()) -> String? { + guard let timestamp = self[index] else { return nil } + + guard let timestampType = self.arrowData.type as? ArrowTypeTimestamp else { + return options.fallbackToRaw ? "\(timestamp)" : nil + } + + let date = dateFromTimestamp(timestamp, unit: timestampType.unit) + + if cachedFormatter == nil || cachedOptions != options { + let formatter = DateFormatter() + formatter.dateFormat = options.dateFormat + formatter.locale = options.locale + if options.includeTimezone, let timezone = timestampType.timezone { + formatter.timeZone = TimeZone(identifier: timezone) + } + cachedFormatter = formatter + cachedOptions = options + } + + return cachedFormatter?.string(from: date) + } + + private func dateFromTimestamp(_ timestamp: Int64, unit: ArrowTimestampUnit) -> Date { + let timeInterval: TimeInterval + + switch unit { + case .seconds: + timeInterval = TimeInterval(timestamp) + case .milliseconds: + timeInterval = TimeInterval(timestamp) / 1_000 + case .microseconds: + timeInterval = TimeInterval(timestamp) / 1_000_000 + case .nanoseconds: + timeInterval = TimeInterval(timestamp) / 1_000_000_000 + } + + return Date(timeIntervalSince1970: timeInterval) + } + + public override func asString(_ index: UInt) -> String { + if let formatted = formattedDate(at: index) { + return formatted + } + + return super.asString(index) + } +} + public class BinaryArray: ArrowArray { public struct Options { public var printAsHex = false diff --git a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index 005cad7..493e43a 100644 --- a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -119,6 +119,12 @@ public class Time64ArrayBuilder: ArrowArrayBuilder, T } } +public class TimestampArrayBuilder: ArrowArrayBuilder, TimestampArray> { + fileprivate convenience init(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws { + try self.init(ArrowTypeTimestamp(unit, timezone: timezone)) + } +} + public class StructArrayBuilder: ArrowArrayBuilder { let builders: [any ArrowArrayHolderBuilder] let fields: [ArrowField] @@ -279,6 +285,11 @@ public class ArrowArrayBuilders { throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") } return try Time64ArrayBuilder(timeType.unit) + case .timestamp: + guard let timestampType = arrowType as? ArrowTypeTimestamp else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try TimestampArrayBuilder(timestampType.unit) default: throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)") } @@ -338,4 +349,8 @@ public class ArrowArrayBuilders { public static func loadTime64ArrayBuilder(_ unit: ArrowTime64Unit) throws -> Time64ArrayBuilder { return try Time64ArrayBuilder(unit) } + + public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder { + return try TimestampArrayBuilder(unit, timezone: timezone) + } } diff --git a/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/Arrow/Sources/Arrow/ArrowReaderHelper.swift index 78ad280..37f4680 100644 --- a/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -90,6 +90,24 @@ private func makeTimeHolder(_ field: ArrowField, } } +private func makeTimestampHolder(_ field: ArrowField, + buffers: [ArrowBuffer], + nullCount: UInt +) -> Result { + do { + if let arrowType = field.type as? ArrowTypeTimestamp { + let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) + return .success(ArrowArrayHolderImpl(try TimestampArray(arrowData))) + } else { + return .failure(.invalid("Incorrect field type for timestamp: \(field.type)")) + } + } catch let error as ArrowError { + return .failure(error) + } catch { + return .failure(.unknownError("\(error)")) + } +} + private func makeBoolHolder(_ buffers: [ArrowBuffer], nullCount: UInt) -> Result { do { @@ -186,6 +204,8 @@ func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity return makeDateHolder(field, buffers: buffers, nullCount: nullCount) case .time32, .time64: return makeTimeHolder(field, buffers: buffers, nullCount: nullCount) + case .timestamp: + return makeTimestampHolder(field, buffers: buffers, nullCount: nullCount) case .strct: return makeStructHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength) default: @@ -203,7 +223,7 @@ func makeBuffer(_ buffer: org_apache_arrow_flatbuf_Buffer, fileData: Data, func isFixedPrimitive(_ type: org_apache_arrow_flatbuf_Type_) -> Bool { switch type { - case .int, .bool, .floatingpoint, .date, .time: + case .int, .bool, .floatingpoint, .date, .time, .timestamp: return true default: return false @@ -261,6 +281,22 @@ func findArrowType( // swiftlint:disable:this cyclomatic_complexity function_bod } return ArrowTypeTime64(timeType.unit == .microsecond ? .microseconds : .nanoseconds) + case .timestamp: + let timestampType = field.type(type: org_apache_arrow_flatbuf_Timestamp.self)! + let arrowUnit: ArrowTimestampUnit + switch timestampType.unit { + case .second: + arrowUnit = .seconds + case .millisecond: + arrowUnit = .milliseconds + case .microsecond: + arrowUnit = .microseconds + case .nanosecond: + arrowUnit = .nanoseconds + } + + let timezone = timestampType.timezone + return ArrowTypeTimestamp(arrowUnit, timezone: timezone) case .struct_: _ = field.type(type: org_apache_arrow_flatbuf_Struct_.self)! var fields = [ArrowField]() diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift index b44f859..9b6091f 100644 --- a/Arrow/Sources/Arrow/ArrowType.swift +++ b/Arrow/Sources/Arrow/ArrowType.swift @@ -21,6 +21,7 @@ public typealias Time32 = Int32 public typealias Time64 = Int64 public typealias Date32 = Int32 public typealias Date64 = Int64 +public typealias Timestamp = Int64 func FlatBuffersVersion_23_1_4() { // swiftlint:disable:this identifier_name } @@ -65,6 +66,7 @@ public enum ArrowTypeId { case strct case time32 case time64 + case timestamp case time case uint16 case uint32 @@ -122,6 +124,47 @@ public class ArrowTypeTime64: ArrowType { } } +public enum ArrowTimestampUnit { + case seconds + case milliseconds + case microseconds + case nanoseconds +} + +public class ArrowTypeTimestamp: ArrowType { + let unit: ArrowTimestampUnit + let timezone: String? + + public init(_ unit: ArrowTimestampUnit, timezone: String? = nil) { + self.unit = unit + self.timezone = timezone + + super.init(ArrowType.ArrowTimestamp) + } + + public convenience init(type: ArrowTypeId) { + self.init(.milliseconds, timezone: nil) + } + + public override var cDataFormatId: String { + get throws { + let unitChar: String + switch self.unit { + case .seconds: unitChar = "s" + case .milliseconds: unitChar = "m" + case .microseconds: unitChar = "u" + case .nanoseconds: unitChar = "n" + } + + if let timezone = self.timezone { + return "ts\(unitChar):\(timezone)" + } else { + return "ts\(unitChar)" + } + } + } +} + public class ArrowNestedType: ArrowType { let fields: [ArrowField] public init(_ info: ArrowType.Info, fields: [ArrowField]) { @@ -150,6 +193,7 @@ public class ArrowType { public static let ArrowBinary = Info.variableInfo(ArrowTypeId.binary) public static let ArrowTime32 = Info.timeInfo(ArrowTypeId.time32) public static let ArrowTime64 = Info.timeInfo(ArrowTypeId.time64) + public static let ArrowTimestamp = Info.timeInfo(ArrowTypeId.timestamp) public static let ArrowStruct = Info.complexInfo(ArrowTypeId.strct) public init(_ info: ArrowType.Info) { @@ -270,6 +314,8 @@ public class ArrowType { return MemoryLayout.stride case .time64: return MemoryLayout.stride + case .timestamp: + return MemoryLayout.stride case .binary: return MemoryLayout.stride case .string: @@ -320,6 +366,11 @@ public class ArrowType { return try time64.cDataFormatId } return "ttu" + case ArrowTypeId.timestamp: + if let timestamp = self as? ArrowTypeTimestamp { + return try timestamp.cDataFormatId + } + return "tsu" case ArrowTypeId.binary: return "z" case ArrowTypeId.string: @@ -366,6 +417,24 @@ public class ArrowType { return ArrowTypeTime64(.microseconds) } else if from == "ttn" { return ArrowTypeTime64(.nanoseconds) + } else if from.starts(with: "ts") { + let components = from.split(separator: ":", maxSplits: 1) + guard let unitPart = components.first, unitPart.count == 3 else { + throw ArrowError.invalid("Invalid timestamp format '\(from)'. Expected format 'ts[s|m|u|n][:timezone]'") + } + + let unitChar = unitPart.suffix(1) + let unit: ArrowTimestampUnit + switch unitChar { + case "s": unit = .seconds + case "m": unit = .milliseconds + case "u": unit = .microseconds + case "n": unit = .nanoseconds + default: throw ArrowError.invalid("Unrecognized timestamp unit '\(unitChar)'. Expected 's', 'm', 'u', or 'n'.") + } + + let timezone = components.count > 1 ? String(components[1]) : nil + return ArrowTypeTimestamp(unit, timezone: timezone) } else if from == "z" { return ArrowType(ArrowType.ArrowBinary) } else if from == "u" { diff --git a/Arrow/Sources/Arrow/ArrowWriterHelper.swift b/Arrow/Sources/Arrow/ArrowWriterHelper.swift index b3fa2b4..7ecb3ab 100644 --- a/Arrow/Sources/Arrow/ArrowWriterHelper.swift +++ b/Arrow/Sources/Arrow/ArrowWriterHelper.swift @@ -41,6 +41,8 @@ func toFBTypeEnum(_ arrowType: ArrowType) -> Result