From 628bf6dcb6ef652c5b11875d3fa255a1add63ddc Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Sat, 29 Jun 2024 14:09:37 +0800 Subject: [PATCH 1/7] adding feature to output CSV files --- .rubocop.yml | 3 + CHANGELOG.md | 3 + lib/smarter_csv.rb | 1 + lib/smarter_csv/generator.rb | 56 ++++++++++++++++++ spec/smarter_csv/generator_spec.rb | 93 ++++++++++++++++++++++++++++++ 5 files changed, 156 insertions(+) create mode 100644 lib/smarter_csv/generator.rb create mode 100644 spec/smarter_csv/generator_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index 38f58068..a3d8ddfe 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -112,6 +112,9 @@ Style/SlicingWithRange: Style/SpecialGlobalVars: # DANGER: unsafe rule!! Enabled: false +Style/StringConcatenation: + Enabled: false + Style/StringLiterals: Enabled: false EnforcedStyle: double_quotes diff --git a/CHANGELOG.md b/CHANGELOG.md index 0103584a..55eec4d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # SmarterCSV 1.x Change Log +## 1.11.0 + * added feature to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44)) + ## 1.10.3 (2024-03-10) * fixed issue when frozen options are handed in (thanks to Daniel Pepper) * cleaned-up rspec tests (thanks to Daniel Pepper) diff --git a/lib/smarter_csv.rb b/lib/smarter_csv.rb index 26b8914d..72c2464a 100644 --- a/lib/smarter_csv.rb +++ b/lib/smarter_csv.rb @@ -10,6 +10,7 @@ require "smarter_csv/headers" require "smarter_csv/hash_transformations" require "smarter_csv/parse" +require "smarter_csv/generator" # load the C-extension: case RUBY_ENGINE diff --git a/lib/smarter_csv/generator.rb b/lib/smarter_csv/generator.rb new file mode 100644 index 00000000..0a4735d5 --- /dev/null +++ b/lib/smarter_csv/generator.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module SmarterCSV + # + # Generate CSV files from batches of array_of_hashes data + # - automatically generates the header on-the-fly + # - automatically quotes fields containing the col_sep + # + # Optionally headers can be passed-in via the options, + # If any new headers are fund in the data, they will be appended to the headers. + # + class Generator + def initialize(file_path, options = {}) + @options = options + @headers = options[:headers] || [] + @col_sep = options[:col_sep] || ',' + @force_quotes = options[:force_quotes] + @map_headers = options[:map_headers] || {} + @file = File.open(file_path, 'w+') + end + + def append(array_of_hashes) + array_of_hashes.each do |hash| + hash_keys = hash.keys + new_keys = hash_keys - @headers + @headers.concat(new_keys) + + # Reorder the hash to match the current headers order and fill missing fields + ordered_row = @headers.map { |header| hash[header] || '' } + + @file.puts ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + end + end + + def finalize + # Map headers if :map_headers option is provided + mapped_headers = @headers.map { |header| @map_headers[header] || header } + + # Rewind to the beginning of the file to write the headers + @file.rewind + @file.write(mapped_headers.join(@col_sep) + "\n") + @file.flush # Ensure all data is written to the file + @file.close + end + + private + + def escape_csv_field(field) + if @force_quotes || field.to_s.include?(@col_sep) + "\"#{field}\"" + else + field.to_s + end + end + end +end diff --git a/spec/smarter_csv/generator_spec.rb b/spec/smarter_csv/generator_spec.rb new file mode 100644 index 00000000..63d84134 --- /dev/null +++ b/spec/smarter_csv/generator_spec.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +RSpec.describe SmarterCSV::Generator do + let(:file_path) { 'test_output.csv' } + + after(:each) do + File.delete(file_path) if File.exist?(file_path) + end + + context 'when headers are given in advance' do + let(:options) { { headers: %w[name age city] } } + let(:data_batches) do + [ + [ + { name: 'John', age: 30, city: 'New York' }, + { name: 'Jane', age: 25, country: 'USA' } + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ] + ] + end + + it 'writes the given headers and data correctly' do + generator = SmarterCSV::Generator.new(file_path, options) + data_batches.each { |batch| generator.append(batch) } + generator.finalize + + output = File.read(file_path) + expect(output).to include("name,age,city\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,\n") + expect(output).to include("Mike,35,Chicago\n") + end + end + + context 'when headers are automatically discovered' do + let(:data_batches) do + [ + [ + { name: 'John', age: 30, city: 'New York' }, + { name: 'Jane', age: 25, country: 'USA' } + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ] + ] + end + + it 'writes the discovered headers and data correctly' do + generator = SmarterCSV::Generator.new(file_path) + data_batches.each { |batch| generator.append(batch) } + generator.finalize + + output = File.read(file_path) + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York,,\n") + expect(output).to include("Jane,25,,USA,\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'when headers are mapped' do + let(:options) do + { + map_headers: { name: 'Full Name', age: 'Age', city: 'City', country: 'Country', state: 'State' } + } + end + let(:data_batches) do + [ + [ + { name: 'John', age: 30, city: 'New York' }, + { name: 'Jane', age: 25, country: 'USA' } + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ] + ] + end + + it 'writes the mapped headers and data correctly' do + generator = SmarterCSV::Generator.new(file_path, options) + data_batches.each { |batch| generator.append(batch) } + generator.finalize + + output = File.read(file_path) + expect(output).to include("Full Name,Age,City,Country,State\n") + expect(output).to include("John,30,New York,,\n") + expect(output).to include("Jane,25,,USA,\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end +end From d542daa3a9e63cbca18012e3877d894c846e4206 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Sat, 29 Jun 2024 16:06:38 +0800 Subject: [PATCH 2/7] update --- README.md | 1 + lib/smarter_csv/generator.rb | 1 + smarter_csv.gemspec | 4 +- spec/smarter_csv/generator_spec.rb | 62 +++++++++++------------------- 4 files changed, 27 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 4b25a98b..8544d279 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![codecov](https://codecov.io/gh/tilo/smarter_csv/branch/main/graph/badge.svg?token=1L7OD80182)](https://codecov.io/gh/tilo/smarter_csv) [![Gem Version](https://badge.fury.io/rb/smarter_csv.svg)](http://badge.fury.io/rb/smarter_csv) +This library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed. #### LATEST CHANGES diff --git a/lib/smarter_csv/generator.rb b/lib/smarter_csv/generator.rb index 0a4735d5..bd2fc182 100644 --- a/lib/smarter_csv/generator.rb +++ b/lib/smarter_csv/generator.rb @@ -12,6 +12,7 @@ module SmarterCSV class Generator def initialize(file_path, options = {}) @options = options + @discover_headers = options.has_key?(:discover_headers) ? (options[:discover_headers] == true) : true @headers = options[:headers] || [] @col_sep = options[:col_sep] || ',' @force_quotes = options[:force_quotes] diff --git a/smarter_csv.gemspec b/smarter_csv.gemspec index dea77138..909919eb 100644 --- a/smarter_csv.gemspec +++ b/smarter_csv.gemspec @@ -9,8 +9,8 @@ Gem::Specification.new do |spec| spec.authors = ["Tilo Sloboda"] spec.email = ["tilo.sloboda@gmail.com"] - spec.summary = "Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files" - spec.description = "Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys" + spec.summary = "CSV Reading and Writing" + spec.description = "Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with lots of features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys" spec.homepage = "https://github.com/tilo/smarter_csv" spec.license = 'MIT' diff --git a/spec/smarter_csv/generator_spec.rb b/spec/smarter_csv/generator_spec.rb index 63d84134..d2ba7ff0 100644 --- a/spec/smarter_csv/generator_spec.rb +++ b/spec/smarter_csv/generator_spec.rb @@ -7,19 +7,20 @@ File.delete(file_path) if File.exist?(file_path) end - context 'when headers are given in advance' do - let(:options) { { headers: %w[name age city] } } - let(:data_batches) do + let(:data_batches) do + [ + [ + { name: 'John', age: 30, city: 'New York' }, + { name: 'Jane', age: 25, country: 'USA' } + ], [ - [ - { name: 'John', age: 30, city: 'New York' }, - { name: 'Jane', age: 25, country: 'USA' } - ], - [ - { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } - ] + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } ] - end + ] + end + + context 'when headers are given in advance' do + let(:options) { { headers: %w[name age city] } } it 'writes the given headers and data correctly' do generator = SmarterCSV::Generator.new(file_path, options) @@ -27,26 +28,14 @@ generator.finalize output = File.read(file_path) - expect(output).to include("name,age,city\n") - expect(output).to include("John,30,New York\n") - expect(output).to include("Jane,25,\n") - expect(output).to include("Mike,35,Chicago\n") + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York,,\n") + expect(output).to include("Jane,25,,USA,\n") + expect(output).to include("Mike,35,Chicago,,IL\n") end end context 'when headers are automatically discovered' do - let(:data_batches) do - [ - [ - { name: 'John', age: 30, city: 'New York' }, - { name: 'Jane', age: 25, country: 'USA' } - ], - [ - { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } - ] - ] - end - it 'writes the discovered headers and data correctly' do generator = SmarterCSV::Generator.new(file_path) data_batches.each { |batch| generator.append(batch) } @@ -63,20 +52,15 @@ context 'when headers are mapped' do let(:options) do { - map_headers: { name: 'Full Name', age: 'Age', city: 'City', country: 'Country', state: 'State' } + map_headers: { + name: 'Full Name', + age: 'Age', + city: 'City', + country: 'Country', + state: 'State', + } } end - let(:data_batches) do - [ - [ - { name: 'John', age: 30, city: 'New York' }, - { name: 'Jane', age: 25, country: 'USA' } - ], - [ - { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } - ] - ] - end it 'writes the mapped headers and data correctly' do generator = SmarterCSV::Generator.new(file_path, options) From bd77cb5b1d6fe2976953f7c1381dddb273734e2b Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 2 Jul 2024 09:15:08 +0800 Subject: [PATCH 3/7] adding more tests to Writer class --- CHANGELOG.md | 3 +- lib/smarter_csv.rb | 2 +- lib/smarter_csv/{generator.rb => writer.rb} | 20 +- spec/smarter_csv/generator_spec.rb | 77 ------- spec/smarter_csv/writer_spec.rb | 229 ++++++++++++++++++++ 5 files changed, 243 insertions(+), 88 deletions(-) rename lib/smarter_csv/{generator.rb => writer.rb} (74%) delete mode 100644 spec/smarter_csv/generator_spec.rb create mode 100644 spec/smarter_csv/writer_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 55eec4d3..76afad3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ # SmarterCSV 1.x Change Log ## 1.11.0 - * added feature to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44)) + * added SmarterCSV::Writer to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44)) + * added SmarterCSV::Reader to isolate parsing of CSV files ## 1.10.3 (2024-03-10) * fixed issue when frozen options are handed in (thanks to Daniel Pepper) diff --git a/lib/smarter_csv.rb b/lib/smarter_csv.rb index 72c2464a..f0ef1b78 100644 --- a/lib/smarter_csv.rb +++ b/lib/smarter_csv.rb @@ -10,7 +10,7 @@ require "smarter_csv/headers" require "smarter_csv/hash_transformations" require "smarter_csv/parse" -require "smarter_csv/generator" +require "smarter_csv/writer" # load the C-extension: case RUBY_ENGINE diff --git a/lib/smarter_csv/generator.rb b/lib/smarter_csv/writer.rb similarity index 74% rename from lib/smarter_csv/generator.rb rename to lib/smarter_csv/writer.rb index bd2fc182..81cb69e5 100644 --- a/lib/smarter_csv/generator.rb +++ b/lib/smarter_csv/writer.rb @@ -9,7 +9,7 @@ module SmarterCSV # Optionally headers can be passed-in via the options, # If any new headers are fund in the data, they will be appended to the headers. # - class Generator + class Writer def initialize(file_path, options = {}) @options = options @discover_headers = options.has_key?(:discover_headers) ? (options[:discover_headers] == true) : true @@ -17,7 +17,8 @@ def initialize(file_path, options = {}) @col_sep = options[:col_sep] || ',' @force_quotes = options[:force_quotes] @map_headers = options[:map_headers] || {} - @file = File.open(file_path, 'w+') + @temp_file = Tempfile.new('tempfile', '/tmp') + @output_file = File.open(file_path, 'w+') end def append(array_of_hashes) @@ -29,7 +30,7 @@ def append(array_of_hashes) # Reorder the hash to match the current headers order and fill missing fields ordered_row = @headers.map { |header| hash[header] || '' } - @file.puts ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @temp_file.puts ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) end end @@ -37,17 +38,18 @@ def finalize # Map headers if :map_headers option is provided mapped_headers = @headers.map { |header| @map_headers[header] || header } - # Rewind to the beginning of the file to write the headers - @file.rewind - @file.write(mapped_headers.join(@col_sep) + "\n") - @file.flush # Ensure all data is written to the file - @file.close + @temp_file.rewind + @output_file.write(mapped_headers.join(@col_sep) + "\n") + @output_file.write(@temp_file.read) + @output_file.flush + @output_file.close end private + SPECIAL_CHARS = /[,\"\n]/ def escape_csv_field(field) - if @force_quotes || field.to_s.include?(@col_sep) + if @force_quotes || field.to_s.match(SPECIAL_CHARS) "\"#{field}\"" else field.to_s diff --git a/spec/smarter_csv/generator_spec.rb b/spec/smarter_csv/generator_spec.rb deleted file mode 100644 index d2ba7ff0..00000000 --- a/spec/smarter_csv/generator_spec.rb +++ /dev/null @@ -1,77 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe SmarterCSV::Generator do - let(:file_path) { 'test_output.csv' } - - after(:each) do - File.delete(file_path) if File.exist?(file_path) - end - - let(:data_batches) do - [ - [ - { name: 'John', age: 30, city: 'New York' }, - { name: 'Jane', age: 25, country: 'USA' } - ], - [ - { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } - ] - ] - end - - context 'when headers are given in advance' do - let(:options) { { headers: %w[name age city] } } - - it 'writes the given headers and data correctly' do - generator = SmarterCSV::Generator.new(file_path, options) - data_batches.each { |batch| generator.append(batch) } - generator.finalize - - output = File.read(file_path) - expect(output).to include("name,age,city,country,state\n") - expect(output).to include("John,30,New York,,\n") - expect(output).to include("Jane,25,,USA,\n") - expect(output).to include("Mike,35,Chicago,,IL\n") - end - end - - context 'when headers are automatically discovered' do - it 'writes the discovered headers and data correctly' do - generator = SmarterCSV::Generator.new(file_path) - data_batches.each { |batch| generator.append(batch) } - generator.finalize - - output = File.read(file_path) - expect(output).to include("name,age,city,country,state\n") - expect(output).to include("John,30,New York,,\n") - expect(output).to include("Jane,25,,USA,\n") - expect(output).to include("Mike,35,Chicago,,IL\n") - end - end - - context 'when headers are mapped' do - let(:options) do - { - map_headers: { - name: 'Full Name', - age: 'Age', - city: 'City', - country: 'Country', - state: 'State', - } - } - end - - it 'writes the mapped headers and data correctly' do - generator = SmarterCSV::Generator.new(file_path, options) - data_batches.each { |batch| generator.append(batch) } - generator.finalize - - output = File.read(file_path) - expect(output).to include("Full Name,Age,City,Country,State\n") - expect(output).to include("John,30,New York,,\n") - expect(output).to include("Jane,25,,USA,\n") - expect(output).to include("Mike,35,Chicago,,IL\n") - end - end -end diff --git a/spec/smarter_csv/writer_spec.rb b/spec/smarter_csv/writer_spec.rb new file mode 100644 index 00000000..62b0ff33 --- /dev/null +++ b/spec/smarter_csv/writer_spec.rb @@ -0,0 +1,229 @@ +# frozen_string_literal: true + +RSpec.describe SmarterCSV::Writer do + subject(:create_csv_file) do + writer = SmarterCSV::Writer.new(file_path, options) + data_batches.each { |batch| writer.append(batch) } + writer.finalize + end + let(:file_path) { '/tmp/test_output.csv' } + + after(:each) do + File.delete(file_path) if File.exist?(file_path) + end + + let(:data_batches) do + [ + [ + { name: 'John', age: 30, city: 'New York' }, + { name: 'Jane', age: 25, country: 'USA' } + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ] + ] + end + + context 'when headers are given in advance' do + let(:options) { { headers: %i[name age city] } } + + it 'writes the given headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'when headers are automatically discovered' do + let(:options) { {} } + + it 'writes the discovered headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'when headers are mapped' do + let(:options) do + { + map_headers: { + name: 'Full Name', + age: 'Age', + city: 'City', + country: 'Country', + state: 'State', + } + } + end + + it 'writes the mapped headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("Full Name,Age,City,Country,State\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'Initialization with Default Options' do + it 'initializes with default options' do + writer = SmarterCSV::Writer.new(file_path) + expect(writer.instance_variable_get(:@discover_headers)).to be true + expect(writer.instance_variable_get(:@headers)).to eq([]) + expect(writer.instance_variable_get(:@col_sep)).to eq(',') + end + end + + context 'Initialization with Custom Options' do + it 'initializes with custom options' do + options = { discover_headers: false, headers: ['a', 'b'], col_sep: ';', force_quotes: true, map_headers: { 'a' => 'A' } } + writer = SmarterCSV::Writer.new(file_path, options) + expect(writer.instance_variable_get(:@discover_headers)).to be false + expect(writer.instance_variable_get(:@headers)).to eq(['a', 'b']) + expect(writer.instance_variable_get(:@col_sep)).to eq(';') + expect(writer.instance_variable_get(:@force_quotes)).to be true + expect(writer.instance_variable_get(:@map_headers)).to eq({ 'a' => 'A' }) + end + end + + context 'Appending Data' do + it 'appends multiple hashes over multiple calls' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }, {c: 3}]) + writer.append([{ d: 4, a: 5 }]) + writer.finalize + output = File.read(file_path) + + expect(output).to include("a,b,c,d\n") + expect(output).to include("1,2\n") + expect(output).to include(",,3\n") + expect(output).to include("5,,,4\n") + end + + it 'appends with existing headers' do + options = { headers: [:a] } + writer = SmarterCSV::Writer.new(file_path, options) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n") + end + + it 'appends with missing fields' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }, { a: 3 }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n3,\n") + end + end + + context 'Finalizing the Output File' do + it 'maps headers' do + options = { map_headers: { a: 'A', b: 'B' } } + writer = SmarterCSV::Writer.new(file_path, options) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File.read(file_path)).to eq("A,B\n1,2\n") + end + + it 'writes header and appends content to output file' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n") + end + + it 'properly closes the output file' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File).to be_exist(file_path) + end + end + + context 'CSV Field Escaping' do + it 'does not quote fields without commas unless force_quotes is enabled' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 'hello', b: 'world' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\nhello,world\n") + end + + it 'quotes fields with column separator' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 'hello, world', b: 'test' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello, world\",test\n") + end + + it 'quotes all fields when force_quotes is enabled' do + options = { force_quotes: true } + writer = SmarterCSV::Writer.new(file_path, options) + writer.append([{ a: 'hello', b: 'world' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello\",\"world\"\n") + end + end + + context 'Edge Cases' do + it 'handles empty hash' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{}]) + writer.finalize + + expect(File.read(file_path)).to eq("\n\n") + end + + it 'handles empty array' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([]) + writer.finalize + + expect(File.read(file_path)).to eq("\n") + end + + it 'handles special characters in data' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: "hello\nworld", b: 'quote"test' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello\nworld\",\"quote\"test\"\n") + end + end + + context 'Error Handling' do + it 'handles file access issues' do + allow(File).to receive(:open).and_raise(Errno::EACCES) + + expect { + SmarterCSV::Writer.new(file_path) + }.to raise_error(Errno::EACCES) + end + + it 'handles tempfile issues' do + allow(Tempfile).to receive(:new).and_raise(Errno::ENOENT) + + expect { + SmarterCSV::Writer.new(file_path) + }.to raise_error(Errno::ENOENT) + end + end +end From 813c1f4e5f2f99082b1ef86ad37e158303482a9f Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 2 Jul 2024 16:15:15 +0800 Subject: [PATCH 4/7] update --- lib/smarter_csv/writer.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/smarter_csv/writer.rb b/lib/smarter_csv/writer.rb index 81cb69e5..a24035c8 100644 --- a/lib/smarter_csv/writer.rb +++ b/lib/smarter_csv/writer.rb @@ -14,11 +14,14 @@ def initialize(file_path, options = {}) @options = options @discover_headers = options.has_key?(:discover_headers) ? (options[:discover_headers] == true) : true @headers = options[:headers] || [] + @row_sep = options[:row_sep] || "\n" # RFC4180 "\r\n" @col_sep = options[:col_sep] || ',' + @quote_char = '"' @force_quotes = options[:force_quotes] @map_headers = options[:map_headers] || {} @temp_file = Tempfile.new('tempfile', '/tmp') @output_file = File.open(file_path, 'w+') + @quote_regex = Regexp.union(@col_sep, @row_sep, @quote_char) end def append(array_of_hashes) @@ -30,7 +33,7 @@ def append(array_of_hashes) # Reorder the hash to match the current headers order and fill missing fields ordered_row = @headers.map { |header| hash[header] || '' } - @temp_file.puts ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @temp_file.write ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @row_sep end end @@ -39,7 +42,7 @@ def finalize mapped_headers = @headers.map { |header| @map_headers[header] || header } @temp_file.rewind - @output_file.write(mapped_headers.join(@col_sep) + "\n") + @output_file.write(mapped_headers.join(@col_sep) + @row_sep) @output_file.write(@temp_file.read) @output_file.flush @output_file.close @@ -47,9 +50,8 @@ def finalize private - SPECIAL_CHARS = /[,\"\n]/ def escape_csv_field(field) - if @force_quotes || field.to_s.match(SPECIAL_CHARS) + if @force_quotes || field.to_s.match(@quote_regex) "\"#{field}\"" else field.to_s From 8acbcfd2909c8b359957b1c03995db710aa65d79 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 2 Jul 2024 17:37:13 +0800 Subject: [PATCH 5/7] refactor --- lib/smarter_csv/writer.rb | 43 ++++++++++--- spec/smarter_csv/writer_spec.rb | 107 ++++++++++++++++++++++++++------ 2 files changed, 121 insertions(+), 29 deletions(-) diff --git a/lib/smarter_csv/writer.rb b/lib/smarter_csv/writer.rb index a24035c8..3a02a61d 100644 --- a/lib/smarter_csv/writer.rb +++ b/lib/smarter_csv/writer.rb @@ -9,6 +9,18 @@ module SmarterCSV # Optionally headers can be passed-in via the options, # If any new headers are fund in the data, they will be appended to the headers. # + # col_sep : defaults to , but can be set to any other character + # row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else + # quote_char : defaults to " + # discover_headers : defaults to true + # headers : defaults to [] + + # IMPORTANT NOTES: + # 1) Data hashes could contain strings or symbols as keys. + # Make sure to use the correct form when specifying headers manually, + # in combination with the :discover_headers option + # 2) + class Writer def initialize(file_path, options = {}) @options = options @@ -24,16 +36,16 @@ def initialize(file_path, options = {}) @quote_regex = Regexp.union(@col_sep, @row_sep, @quote_char) end - def append(array_of_hashes) - array_of_hashes.each do |hash| - hash_keys = hash.keys - new_keys = hash_keys - @headers - @headers.concat(new_keys) - - # Reorder the hash to match the current headers order and fill missing fields - ordered_row = @headers.map { |header| hash[header] || '' } - - @temp_file.write ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @row_sep + def <<(data) + case data + when Hash + process_hash(data) + when Array + data.each { |item| self << item } + when NilClass + # ignore + else + raise ArgumentError, "Invalid data type: #{data.class}. Must be a Hash or an Array." end end @@ -50,6 +62,17 @@ def finalize private + def process_hash(hash) + hash_keys = hash.keys + new_keys = hash_keys - @headers + @headers.concat(new_keys) + + # Reorder the hash to match the current headers order and fill missing fields + ordered_row = @headers.map { |header| hash[header] || '' } + + @temp_file.write ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @row_sep + end + def escape_csv_field(field) if @force_quotes || field.to_s.match(@quote_regex) "\"#{field}\"" diff --git a/spec/smarter_csv/writer_spec.rb b/spec/smarter_csv/writer_spec.rb index 62b0ff33..b30586f9 100644 --- a/spec/smarter_csv/writer_spec.rb +++ b/spec/smarter_csv/writer_spec.rb @@ -1,9 +1,10 @@ # frozen_string_literal: true +# rubocop:disable Style/WordArray RSpec.describe SmarterCSV::Writer do subject(:create_csv_file) do writer = SmarterCSV::Writer.new(file_path, options) - data_batches.each { |batch| writer.append(batch) } + data_batches.each { |batch| writer << batch } writer.finalize end let(:file_path) { '/tmp/test_output.csv' } @@ -20,10 +21,76 @@ ], [ { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } - ] + ], + {name: 'Alex', country: 'USA'} ] end + context 'simplest case: one hash given' do + let(:options) { {} } + let(:data) do + { name: 'John', age: 30, city: 'New York' } + end + + it 'writes the given headers and data correctly' do + writer = SmarterCSV::Writer.new(file_path, options) + writer << data + writer.finalize + output = File.read(file_path) + + expect(output).to include("name,age,city\n") + expect(output).to include("John,30,New York\n") + end + end + + context 'case: array of hashes given' do + let(:options) { {} } + let(:data) do + { name: 'John', age: 30, city: 'New York' } + end + + it 'writes the given headers and data correctly' do + writer = SmarterCSV::Writer.new(file_path, options) + writer << data_batches[0] + writer << data_batches[1] + writer.finalize + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context "when deeply nested data" do + let(:options) { {} } + let(:data_batches) do + [[[ + [ + { name: 'John', age: 30, city: 'New York' }, + [{ name: 'Jane', age: 25, country: 'USA' }, nil], + [] + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ] + ]], + {name: 'Alex', country: 'USA'}] + end + + it 'writes the given headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + expect(output).to include("Alex,,,USA,\n") + end + end + context 'when headers are given in advance' do let(:options) { { headers: %i[name age city] } } @@ -35,6 +102,7 @@ expect(output).to include("John,30,New York\n") expect(output).to include("Jane,25,,USA\n") expect(output).to include("Mike,35,Chicago,,IL\n") + expect(output).to include("Alex,,,USA,\n") end end @@ -100,8 +168,8 @@ context 'Appending Data' do it 'appends multiple hashes over multiple calls' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{ a: 1, b: 2 }, {c: 3}]) - writer.append([{ d: 4, a: 5 }]) + writer << [{ a: 1, b: 2 }, {c: 3}] + writer << [{ d: 4, a: 5 }] writer.finalize output = File.read(file_path) @@ -114,7 +182,7 @@ it 'appends with existing headers' do options = { headers: [:a] } writer = SmarterCSV::Writer.new(file_path, options) - writer.append([{ a: 1, b: 2 }]) + writer << [{ a: 1, b: 2 }] writer.finalize expect(File.read(file_path)).to eq("a,b\n1,2\n") @@ -122,7 +190,7 @@ it 'appends with missing fields' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{ a: 1, b: 2 }, { a: 3 }]) + writer << [{ a: 1, b: 2 }, { a: 3 }] writer.finalize expect(File.read(file_path)).to eq("a,b\n1,2\n3,\n") @@ -133,7 +201,7 @@ it 'maps headers' do options = { map_headers: { a: 'A', b: 'B' } } writer = SmarterCSV::Writer.new(file_path, options) - writer.append([{ a: 1, b: 2 }]) + writer << [{ a: 1, b: 2 }] writer.finalize expect(File.read(file_path)).to eq("A,B\n1,2\n") @@ -141,7 +209,7 @@ it 'writes header and appends content to output file' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{ a: 1, b: 2 }]) + writer << [{ a: 1, b: 2 }] writer.finalize expect(File.read(file_path)).to eq("a,b\n1,2\n") @@ -149,7 +217,7 @@ it 'properly closes the output file' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{ a: 1, b: 2 }]) + writer << [{ a: 1, b: 2 }] writer.finalize expect(File).to be_exist(file_path) @@ -159,7 +227,7 @@ context 'CSV Field Escaping' do it 'does not quote fields without commas unless force_quotes is enabled' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{ a: 'hello', b: 'world' }]) + writer << [{ a: 'hello', b: 'world' }] writer.finalize expect(File.read(file_path)).to eq("a,b\nhello,world\n") @@ -167,7 +235,7 @@ it 'quotes fields with column separator' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{ a: 'hello, world', b: 'test' }]) + writer << [{ a: 'hello, world', b: 'test' }] writer.finalize expect(File.read(file_path)).to eq("a,b\n\"hello, world\",test\n") @@ -176,7 +244,7 @@ it 'quotes all fields when force_quotes is enabled' do options = { force_quotes: true } writer = SmarterCSV::Writer.new(file_path, options) - writer.append([{ a: 'hello', b: 'world' }]) + writer << [{ a: 'hello', b: 'world' }] writer.finalize expect(File.read(file_path)).to eq("a,b\n\"hello\",\"world\"\n") @@ -186,7 +254,7 @@ context 'Edge Cases' do it 'handles empty hash' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{}]) + writer << [{}] writer.finalize expect(File.read(file_path)).to eq("\n\n") @@ -194,7 +262,7 @@ it 'handles empty array' do writer = SmarterCSV::Writer.new(file_path) - writer.append([]) + writer << [] writer.finalize expect(File.read(file_path)).to eq("\n") @@ -202,7 +270,7 @@ it 'handles special characters in data' do writer = SmarterCSV::Writer.new(file_path) - writer.append([{ a: "hello\nworld", b: 'quote"test' }]) + writer << [{ a: "hello\nworld", b: 'quote"test' }] writer.finalize expect(File.read(file_path)).to eq("a,b\n\"hello\nworld\",\"quote\"test\"\n") @@ -213,17 +281,18 @@ it 'handles file access issues' do allow(File).to receive(:open).and_raise(Errno::EACCES) - expect { + expect do SmarterCSV::Writer.new(file_path) - }.to raise_error(Errno::EACCES) + end.to raise_error(Errno::EACCES) end it 'handles tempfile issues' do allow(Tempfile).to receive(:new).and_raise(Errno::ENOENT) - expect { + expect do SmarterCSV::Writer.new(file_path) - }.to raise_error(Errno::ENOENT) + end.to raise_error(Errno::ENOENT) end end end +# rubocop:enable Style/WordArray From 312b5406f9c15b471d1425d4fc17d37cfb0fd736 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 2 Jul 2024 17:56:31 +0800 Subject: [PATCH 6/7] update --- lib/smarter_csv/writer.rb | 15 ++++++++------- spec/smarter_csv/writer_spec.rb | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/lib/smarter_csv/writer.rb b/lib/smarter_csv/writer.rb index 3a02a61d..cef0e1ce 100644 --- a/lib/smarter_csv/writer.rb +++ b/lib/smarter_csv/writer.rb @@ -16,10 +16,9 @@ module SmarterCSV # headers : defaults to [] # IMPORTANT NOTES: - # 1) Data hashes could contain strings or symbols as keys. - # Make sure to use the correct form when specifying headers manually, - # in combination with the :discover_headers option - # 2) + # * Data hashes could contain strings or symbols as keys. + # Make sure to use the correct form when specifying headers manually, + # in combination with the :discover_headers option class Writer def initialize(file_path, options = {}) @@ -63,9 +62,11 @@ def finalize private def process_hash(hash) - hash_keys = hash.keys - new_keys = hash_keys - @headers - @headers.concat(new_keys) + if @discover_headers + hash_keys = hash.keys + new_keys = hash_keys - @headers + @headers.concat(new_keys) + end # Reorder the hash to match the current headers order and fill missing fields ordered_row = @headers.map { |header| hash[header] || '' } diff --git a/spec/smarter_csv/writer_spec.rb b/spec/smarter_csv/writer_spec.rb index b30586f9..41f59b22 100644 --- a/spec/smarter_csv/writer_spec.rb +++ b/spec/smarter_csv/writer_spec.rb @@ -89,6 +89,21 @@ expect(output).to include("Mike,35,Chicago,,IL\n") expect(output).to include("Alex,,,USA,\n") end + + context "when discover_headers is turned off" do + let(:options) { {discover_headers: false, headers: [:name, :country]} } + + it 'writes the given headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,country\n") + expect(output).to include("John,\n") + expect(output).to include("Jane,USA\n") + expect(output).to include("Mike,\n") + expect(output).to include("Alex,USA\n") + end + end end context 'when headers are given in advance' do From c1b65acc2f3a5b3209a090681b3ad4bad8487899 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Tue, 2 Jul 2024 21:06:04 +0800 Subject: [PATCH 7/7] update version, README, CHANGELOG --- CHANGELOG.md | 3 +-- README.md | 4 ++-- lib/smarter_csv/version.rb | 2 +- lib/smarter_csv/writer.rb | 41 +++++++++++++++++++++++++++----------- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76afad3e..b28c5c71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,8 @@ # SmarterCSV 1.x Change Log -## 1.11.0 +## 1.11.0 (2024-07-02) * added SmarterCSV::Writer to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44)) - * added SmarterCSV::Reader to isolate parsing of CSV files ## 1.10.3 (2024-03-10) * fixed issue when frozen options are handed in (thanks to Daniel Pepper) diff --git a/README.md b/README.md index 8544d279..0f817c5a 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ This library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed. -#### LATEST CHANGES +#### BREAKING CHANGES -* Version 1.10.0 has BREAKING CHANGES: +* Version 1.10.0 had BREAKING CHANGES: Changed behavior: + when `user_provided_headers` are provided: diff --git a/lib/smarter_csv/version.rb b/lib/smarter_csv/version.rb index 7373e596..255a0b91 100644 --- a/lib/smarter_csv/version.rb +++ b/lib/smarter_csv/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SmarterCSV - VERSION = "1.10.3" + VERSION = "1.11.0" end diff --git a/lib/smarter_csv/writer.rb b/lib/smarter_csv/writer.rb index cef0e1ce..a92a13e6 100644 --- a/lib/smarter_csv/writer.rb +++ b/lib/smarter_csv/writer.rb @@ -2,18 +2,33 @@ module SmarterCSV # - # Generate CSV files from batches of array_of_hashes data - # - automatically generates the header on-the-fly - # - automatically quotes fields containing the col_sep + # Generate CSV files # - # Optionally headers can be passed-in via the options, - # If any new headers are fund in the data, they will be appended to the headers. + # Create an instance of the Writer class with the filename and options. + # call `<<` one or mulltiple times to append data to the file. + # call `finalize` to save the file. # - # col_sep : defaults to , but can be set to any other character - # row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else - # quote_char : defaults to " - # discover_headers : defaults to true - # headers : defaults to [] + # The `<<` method can take different arguments: + # * a signle Hash + # * an array of Hashes + # * nested arrays of arrays of Hashes + # + # By default SmarterCSV::Writer automatically discovers all headers that are present + # in the data on-the-fly. This can be disabled, then only given headers are used. + # Disabling can be useful when you want to select attributes from hashes, or ActiveRecord instances. + # + # If `discover_headers` is enabled, and headers are given, any new headers that are found in the data will still be appended. + # + # The Writer automatically quotes fields containing the col_sep, row_sep, or the quote_char. + # + # Options: + # col_sep : defaults to , but can be set to any other character + # row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else + # quote_char : defaults to " + # discover_headers : defaults to true + # headers : defaults to [] + # force_quotes: defaults to false + # map_headers: defaults to {}, can be a hash of key -> value mappings # IMPORTANT NOTES: # * Data hashes could contain strings or symbols as keys. @@ -28,10 +43,11 @@ def initialize(file_path, options = {}) @row_sep = options[:row_sep] || "\n" # RFC4180 "\r\n" @col_sep = options[:col_sep] || ',' @quote_char = '"' - @force_quotes = options[:force_quotes] + @force_quotes = options[:force_quotes] == true @map_headers = options[:map_headers] || {} - @temp_file = Tempfile.new('tempfile', '/tmp') @output_file = File.open(file_path, 'w+') + # hidden state: + @temp_file = Tempfile.new('tempfile', '/tmp') @quote_regex = Regexp.union(@col_sep, @row_sep, @quote_char) end @@ -57,6 +73,7 @@ def finalize @output_file.write(@temp_file.read) @output_file.flush @output_file.close + @temp_file.delete end private