Skip to content

Discard delimiters inside double quote #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 76 additions & 13 deletions src/iterators.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! [Released under GNU LGPLv3]
const std = @import("std");
const TableError = @import("zig-csv.zig").TableError;
const Allocator = std.mem.Allocator;

/// A struct for iterating over or fetching rows from a parsed table
pub const TableIterator = struct {
Expand All @@ -10,6 +11,8 @@ pub const TableIterator = struct {
delimiter: []const u8,
header: []const []const u8,
body: []const []const u8,
allocator: Allocator,
check_quote: bool,

/// Reset the iterator for the function TableIterator.next
pub fn reset(self: *TableIterator) void {
Expand All @@ -23,6 +26,8 @@ pub const TableIterator = struct {
const row = RowIterator{
.header = self.header,
.row = std.mem.splitSequence(u8, self.body[self.iterator_index], self.delimiter),
.allocator = self.allocator,
.check_quote = self.check_quote,
};

self.iterator_index += 1;
Expand All @@ -37,6 +42,8 @@ pub const TableIterator = struct {
return RowIterator{
.header = self.header,
.row = std.mem.splitSequence(u8, self.body[row_index], self.delimiter),
.allocator = self.allocator,
.check_quote = self.check_quote,
};
}
};
Expand All @@ -57,6 +64,8 @@ pub const RowIterator = struct {
iterator_index: usize = 0,
header: []const []const u8,
row: std.mem.SplitIterator(u8, .sequence),
allocator: Allocator,
check_quote: bool,

/// Reset the iterator for the function RowIterator.next
pub fn reset(self: *RowIterator) void {
Expand All @@ -69,12 +78,18 @@ pub const RowIterator = struct {
const value = self.row.next();
if (value == null) return null;

const item = RowItem{
var item = RowItem{
.column_index = self.iterator_index,
.key = self.header[self.iterator_index],
.value = value.?,
};

if (self.check_quote and item.value.len > 0 and item.value[0] == '"' and item.value[item.value.len - 1] != '"') {
while (item.value[item.value.len - 1] != '"') {
item.value = std.mem.concat(self.allocator, u8, &[_][]const u8{ item.value, self.row.delimiter, self.row.next().? }) catch item.value;
}
}

self.iterator_index += 1;

return item;
Expand All @@ -85,16 +100,23 @@ pub const RowIterator = struct {
var iterator = std.mem.splitSequence(u8, self.row.buffer, self.row.delimiter);
var current_column_index: usize = 0;

while (iterator.next()) |value| : (current_column_index += 1) {
if (current_column_index == target_column_index) {
return RowItem{
.column_index = current_column_index,
.key = self.header[current_column_index],
.value = value,
};
if (self.check_quote) {
return RowItem{
.column_index = target_column_index,
.key = self.header[target_column_index],
.value = try getColumnItemInQuote(u8, &iterator, target_column_index, self.allocator),
};
} else {
while (iterator.next()) |value| : (current_column_index += 1) {
if (current_column_index == target_column_index) {
return RowItem{
.column_index = current_column_index,
.key = self.header[current_column_index],
.value = value,
};
}
}
}

return TableError.IndexNotFound;
}
};
Expand All @@ -114,20 +136,32 @@ pub const ColumnIterator = struct {
column_index: usize,
delimiter: []const u8,
body: []const []const u8,
allocator: Allocator,
check_quote: bool,

// Create a ColumnItem from a row
fn rowToColumnItem(self: ColumnIterator, row: []const u8) ColumnItem {
var item: ColumnItem = undefined;
var values = std.mem.splitSequence(u8, row, self.delimiter);

var current_index: usize = 0;
while (values.next()) |value| : (current_index += 1) {
if (current_index == self.column_index) {
if (self.check_quote) {
const value: ?[]const u8 = getColumnItemInQuote(u8, &values, self.column_index, self.allocator) catch null;
if (value != null) {
item = ColumnItem{
.row_index = self.iterator_index,
.value = value,
.value = value.?,
};
}
} else {
var current_index: usize = 0;
while (values.next()) |value| : (current_index += 1) {
if (current_index == self.column_index) {
item = ColumnItem{
.row_index = self.iterator_index,
.value = value,
};
}
}
}

return item;
Expand Down Expand Up @@ -160,3 +194,32 @@ pub const ColumnIterator = struct {
return item;
}
};

/// Return the value of a column in a row, while discarding delimiters inside "double quotes"
pub fn getColumnItemInQuote(comptime T: type, split_iterator: *std.mem.SplitIterator(T, .sequence), target_index: usize, allocator: std.mem.Allocator) TableError![]const T {
var index: usize = 0;
var in_quote = false;
var item_in_quote: []const u8 = "";

while (split_iterator.next()) |item| {
if (!in_quote and item.len > 1 and item[0] == '"' and item[item.len - 1] != '"') { // check if item is the beginning of a double quoted value
in_quote = true;
if (index == target_index) item_in_quote = item;
continue;
} else if (in_quote) { // process item inside double quote
// allocate if item needs to be returned
if (index == target_index) {
item_in_quote = try std.mem.concat(allocator, u8, &[_][]const u8{ item_in_quote, split_iterator.delimiter, item });
}
if (item.len == 0 or item[item.len - 1] != '"') continue;
// item is the end of the double quoted value
in_quote = false;
}

// return item value
if (item_in_quote.len > 0) return item_in_quote else if (index == target_index) return item;
index += 1;
}

return TableError.IndexNotFound;
}
39 changes: 27 additions & 12 deletions src/zig-csv.zig
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ const ArrayList = std.ArrayList;
const TableIterator = @import("iterators.zig").TableIterator;
const RowIterator = @import("iterators.zig").RowIterator;
const ColumnIterator = @import("iterators.zig").ColumnIterator;
const getColumnItemInQuote = @import("iterators.zig").getColumnItemInQuote;

/// A structure for storing settings for use with struct Table
pub const Settings = struct {
/// The delimiter that separates the values (aka. separator)
delimiter: []const u8,
/// The terminator that defines when a row of delimiter-separated values is terminated
terminator: []const u8,
/// The check_quote discards delimiters inside "double quotes" when separating values
check_quote: bool = false,

/// A function that returns the default settings that are most commonly used for CSV data
/// { .delimiter = ",", .terminator = "\n" }
Expand Down Expand Up @@ -59,16 +62,22 @@ pub const Table = struct {
body: std.ArrayListAligned([]const u8, null),

// Return the item with the matching index from an iterator struct std.mem.SplitIterator(T)
fn splitIteratorGetIndex(comptime T: type, split_iterator: *std.mem.SplitIterator(T, .sequence), target_index: usize) TableError![]const T {
var index: usize = 0;
fn splitIteratorGetIndex(self: *Table, comptime T: type, split_iterator: *std.mem.SplitIterator(T, .sequence), target_index: usize) TableError![]const T {
if (self.settings.check_quote) {
return getColumnItemInQuote(u8, split_iterator, target_index, self.arena_allocator.allocator());
} else {
var index: usize = 0;

while (split_iterator.next()) |item| : (index += 1) {
if (index == target_index) {
return item;
}
}
if (!self.settings.check_quote) {
while (split_iterator.next()) |item| : (index += 1) {
if (index == target_index) {
return item;
}
}
} else {}

return TableError.IndexNotFound;
return TableError.IndexNotFound;
}
}

/// Initialize struct Table
Expand Down Expand Up @@ -120,11 +129,13 @@ pub const Table = struct {
}

/// Returns a struct TableIterator containing all rows inside struct Table
pub fn getAllRows(self: Table) TableIterator {
pub fn getAllRows(self: *Table) TableIterator {
return TableIterator{
.delimiter = self.settings.delimiter,
.header = self.header.items,
.body = self.body.items,
.allocator = self.arena_allocator.allocator(),
.check_quote = self.settings.check_quote,
};
}

Expand All @@ -144,7 +155,7 @@ pub const Table = struct {
}

/// Return a slice of row indexes by a provided column index and searched value
pub fn findRowIndexesByValue(self: Table, allocator: Allocator, column_index: usize, searched_value: []const u8) TableError![]usize {
pub fn findRowIndexesByValue(self: *Table, allocator: Allocator, column_index: usize, searched_value: []const u8) TableError![]usize {
var row_indexes = ArrayList(usize).init(allocator);

if (column_index >= self.header.items.len) return TableError.IndexNotFound;
Expand All @@ -153,7 +164,7 @@ pub const Table = struct {
const row_count = std.mem.count(u8, row, self.settings.delimiter) + 1;
var row_values = std.mem.splitSequence(u8, row, self.settings.delimiter);
if (column_index >= row_count) return TableError.MissingValue;
const value = try Table.splitIteratorGetIndex(u8, &row_values, column_index);
const value = try self.splitIteratorGetIndex(u8, &row_values, column_index);

if (std.mem.eql(u8, value, searched_value)) {
try row_indexes.append(row_index);
Expand All @@ -166,11 +177,13 @@ pub const Table = struct {
}

/// Returns a struct ColumnIterator, containing all elements of a given column by its index
pub fn getColumnByIndex(self: Table, column_index: usize) ColumnIterator {
pub fn getColumnByIndex(self: *Table, column_index: usize) ColumnIterator {
return ColumnIterator{
.body = self.body.items,
.delimiter = self.settings.delimiter,
.column_index = column_index,
.allocator = self.arena_allocator.allocator(),
.check_quote = self.settings.check_quote,
};
}

Expand All @@ -181,6 +194,8 @@ pub const Table = struct {
return RowIterator{
.header = self.header.items,
.row = std.mem.splitSequence(u8, self.body.items[row_index], self.settings.delimiter),
.allocator = self.arena_allocator.allocator(),
.check_quote = self.settings.check_quote,
};
}

Expand Down
Loading