diff --git a/.rubocop.yml b/.rubocop.yml index 38f5806..a3d8ddf 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -112,6 +112,9 @@ Style/SlicingWithRange: Style/SpecialGlobalVars: # DANGER: unsafe rule!! Enabled: false +Style/StringConcatenation: + Enabled: false + Style/StringLiterals: Enabled: false EnforcedStyle: double_quotes diff --git a/CHANGELOG.md b/CHANGELOG.md index 0103584..b28c5c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # SmarterCSV 1.x Change Log +## 1.11.0 (2024-07-02) + * added SmarterCSV::Writer to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44)) + ## 1.10.3 (2024-03-10) * fixed issue when frozen options are handed in (thanks to Daniel Pepper) * cleaned-up rspec tests (thanks to Daniel Pepper) diff --git a/README.md b/README.md index 4b25a98..0f817c5 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,11 @@ [![codecov](https://codecov.io/gh/tilo/smarter_csv/branch/main/graph/badge.svg?token=1L7OD80182)](https://codecov.io/gh/tilo/smarter_csv) [![Gem Version](https://badge.fury.io/rb/smarter_csv.svg)](http://badge.fury.io/rb/smarter_csv) +This library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed. -#### LATEST CHANGES +#### BREAKING CHANGES -* Version 1.10.0 has BREAKING CHANGES: +* Version 1.10.0 had BREAKING CHANGES: Changed behavior: + when `user_provided_headers` are provided: diff --git a/lib/smarter_csv.rb b/lib/smarter_csv.rb index 26b8914..f0ef1b7 100644 --- a/lib/smarter_csv.rb +++ b/lib/smarter_csv.rb @@ -10,6 +10,7 @@ require "smarter_csv/headers" require "smarter_csv/hash_transformations" require "smarter_csv/parse" +require "smarter_csv/writer" # load the C-extension: case RUBY_ENGINE diff --git a/lib/smarter_csv/version.rb b/lib/smarter_csv/version.rb index 7373e59..255a0b9 100644 --- a/lib/smarter_csv/version.rb +++ b/lib/smarter_csv/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SmarterCSV - VERSION = "1.10.3" + VERSION = "1.11.0" end diff --git a/lib/smarter_csv/writer.rb b/lib/smarter_csv/writer.rb new file mode 100644 index 0000000..a92a13e --- /dev/null +++ b/lib/smarter_csv/writer.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +module SmarterCSV + # + # Generate CSV files + # + # Create an instance of the Writer class with the filename and options. + # call `<<` one or mulltiple times to append data to the file. + # call `finalize` to save the file. + # + # The `<<` method can take different arguments: + # * a signle Hash + # * an array of Hashes + # * nested arrays of arrays of Hashes + # + # By default SmarterCSV::Writer automatically discovers all headers that are present + # in the data on-the-fly. This can be disabled, then only given headers are used. + # Disabling can be useful when you want to select attributes from hashes, or ActiveRecord instances. + # + # If `discover_headers` is enabled, and headers are given, any new headers that are found in the data will still be appended. + # + # The Writer automatically quotes fields containing the col_sep, row_sep, or the quote_char. + # + # Options: + # col_sep : defaults to , but can be set to any other character + # row_sep : defaults to LF \n , but can be set to \r\n or \r or anything else + # quote_char : defaults to " + # discover_headers : defaults to true + # headers : defaults to [] + # force_quotes: defaults to false + # map_headers: defaults to {}, can be a hash of key -> value mappings + + # IMPORTANT NOTES: + # * Data hashes could contain strings or symbols as keys. + # Make sure to use the correct form when specifying headers manually, + # in combination with the :discover_headers option + + class Writer + def initialize(file_path, options = {}) + @options = options + @discover_headers = options.has_key?(:discover_headers) ? (options[:discover_headers] == true) : true + @headers = options[:headers] || [] + @row_sep = options[:row_sep] || "\n" # RFC4180 "\r\n" + @col_sep = options[:col_sep] || ',' + @quote_char = '"' + @force_quotes = options[:force_quotes] == true + @map_headers = options[:map_headers] || {} + @output_file = File.open(file_path, 'w+') + # hidden state: + @temp_file = Tempfile.new('tempfile', '/tmp') + @quote_regex = Regexp.union(@col_sep, @row_sep, @quote_char) + end + + def <<(data) + case data + when Hash + process_hash(data) + when Array + data.each { |item| self << item } + when NilClass + # ignore + else + raise ArgumentError, "Invalid data type: #{data.class}. Must be a Hash or an Array." + end + end + + def finalize + # Map headers if :map_headers option is provided + mapped_headers = @headers.map { |header| @map_headers[header] || header } + + @temp_file.rewind + @output_file.write(mapped_headers.join(@col_sep) + @row_sep) + @output_file.write(@temp_file.read) + @output_file.flush + @output_file.close + @temp_file.delete + end + + private + + def process_hash(hash) + if @discover_headers + hash_keys = hash.keys + new_keys = hash_keys - @headers + @headers.concat(new_keys) + end + + # Reorder the hash to match the current headers order and fill missing fields + ordered_row = @headers.map { |header| hash[header] || '' } + + @temp_file.write ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @row_sep + end + + def escape_csv_field(field) + if @force_quotes || field.to_s.match(@quote_regex) + "\"#{field}\"" + else + field.to_s + end + end + end +end diff --git a/smarter_csv.gemspec b/smarter_csv.gemspec index dea7713..909919e 100644 --- a/smarter_csv.gemspec +++ b/smarter_csv.gemspec @@ -9,8 +9,8 @@ Gem::Specification.new do |spec| spec.authors = ["Tilo Sloboda"] spec.email = ["tilo.sloboda@gmail.com"] - spec.summary = "Ruby Gem for smarter importing of CSV Files (and CSV-like files), with lots of optional features, e.g. chunked processing for huge CSV files" - spec.description = "Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with optional features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys" + spec.summary = "CSV Reading and Writing" + spec.description = "Ruby Gem for smarter importing of CSV Files as Array(s) of Hashes, with lots of features for processing large files in parallel, embedded comments, unusual field- and record-separators, flexible mapping of CSV-headers to Hash-keys" spec.homepage = "https://github.com/tilo/smarter_csv" spec.license = 'MIT' diff --git a/spec/smarter_csv/writer_spec.rb b/spec/smarter_csv/writer_spec.rb new file mode 100644 index 0000000..41f59b2 --- /dev/null +++ b/spec/smarter_csv/writer_spec.rb @@ -0,0 +1,313 @@ +# frozen_string_literal: true + +# rubocop:disable Style/WordArray +RSpec.describe SmarterCSV::Writer do + subject(:create_csv_file) do + writer = SmarterCSV::Writer.new(file_path, options) + data_batches.each { |batch| writer << batch } + writer.finalize + end + let(:file_path) { '/tmp/test_output.csv' } + + after(:each) do + File.delete(file_path) if File.exist?(file_path) + end + + let(:data_batches) do + [ + [ + { name: 'John', age: 30, city: 'New York' }, + { name: 'Jane', age: 25, country: 'USA' } + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ], + {name: 'Alex', country: 'USA'} + ] + end + + context 'simplest case: one hash given' do + let(:options) { {} } + let(:data) do + { name: 'John', age: 30, city: 'New York' } + end + + it 'writes the given headers and data correctly' do + writer = SmarterCSV::Writer.new(file_path, options) + writer << data + writer.finalize + output = File.read(file_path) + + expect(output).to include("name,age,city\n") + expect(output).to include("John,30,New York\n") + end + end + + context 'case: array of hashes given' do + let(:options) { {} } + let(:data) do + { name: 'John', age: 30, city: 'New York' } + end + + it 'writes the given headers and data correctly' do + writer = SmarterCSV::Writer.new(file_path, options) + writer << data_batches[0] + writer << data_batches[1] + writer.finalize + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context "when deeply nested data" do + let(:options) { {} } + let(:data_batches) do + [[[ + [ + { name: 'John', age: 30, city: 'New York' }, + [{ name: 'Jane', age: 25, country: 'USA' }, nil], + [] + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ] + ]], + {name: 'Alex', country: 'USA'}] + end + + it 'writes the given headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + expect(output).to include("Alex,,,USA,\n") + end + + context "when discover_headers is turned off" do + let(:options) { {discover_headers: false, headers: [:name, :country]} } + + it 'writes the given headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,country\n") + expect(output).to include("John,\n") + expect(output).to include("Jane,USA\n") + expect(output).to include("Mike,\n") + expect(output).to include("Alex,USA\n") + end + end + end + + context 'when headers are given in advance' do + let(:options) { { headers: %i[name age city] } } + + it 'writes the given headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + expect(output).to include("Alex,,,USA,\n") + end + end + + context 'when headers are automatically discovered' do + let(:options) { {} } + + it 'writes the discovered headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'when headers are mapped' do + let(:options) do + { + map_headers: { + name: 'Full Name', + age: 'Age', + city: 'City', + country: 'Country', + state: 'State', + } + } + end + + it 'writes the mapped headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("Full Name,Age,City,Country,State\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'Initialization with Default Options' do + it 'initializes with default options' do + writer = SmarterCSV::Writer.new(file_path) + expect(writer.instance_variable_get(:@discover_headers)).to be true + expect(writer.instance_variable_get(:@headers)).to eq([]) + expect(writer.instance_variable_get(:@col_sep)).to eq(',') + end + end + + context 'Initialization with Custom Options' do + it 'initializes with custom options' do + options = { discover_headers: false, headers: ['a', 'b'], col_sep: ';', force_quotes: true, map_headers: { 'a' => 'A' } } + writer = SmarterCSV::Writer.new(file_path, options) + expect(writer.instance_variable_get(:@discover_headers)).to be false + expect(writer.instance_variable_get(:@headers)).to eq(['a', 'b']) + expect(writer.instance_variable_get(:@col_sep)).to eq(';') + expect(writer.instance_variable_get(:@force_quotes)).to be true + expect(writer.instance_variable_get(:@map_headers)).to eq({ 'a' => 'A' }) + end + end + + context 'Appending Data' do + it 'appends multiple hashes over multiple calls' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{ a: 1, b: 2 }, {c: 3}] + writer << [{ d: 4, a: 5 }] + writer.finalize + output = File.read(file_path) + + expect(output).to include("a,b,c,d\n") + expect(output).to include("1,2\n") + expect(output).to include(",,3\n") + expect(output).to include("5,,,4\n") + end + + it 'appends with existing headers' do + options = { headers: [:a] } + writer = SmarterCSV::Writer.new(file_path, options) + writer << [{ a: 1, b: 2 }] + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n") + end + + it 'appends with missing fields' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{ a: 1, b: 2 }, { a: 3 }] + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n3,\n") + end + end + + context 'Finalizing the Output File' do + it 'maps headers' do + options = { map_headers: { a: 'A', b: 'B' } } + writer = SmarterCSV::Writer.new(file_path, options) + writer << [{ a: 1, b: 2 }] + writer.finalize + + expect(File.read(file_path)).to eq("A,B\n1,2\n") + end + + it 'writes header and appends content to output file' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{ a: 1, b: 2 }] + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n") + end + + it 'properly closes the output file' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{ a: 1, b: 2 }] + writer.finalize + + expect(File).to be_exist(file_path) + end + end + + context 'CSV Field Escaping' do + it 'does not quote fields without commas unless force_quotes is enabled' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{ a: 'hello', b: 'world' }] + writer.finalize + + expect(File.read(file_path)).to eq("a,b\nhello,world\n") + end + + it 'quotes fields with column separator' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{ a: 'hello, world', b: 'test' }] + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello, world\",test\n") + end + + it 'quotes all fields when force_quotes is enabled' do + options = { force_quotes: true } + writer = SmarterCSV::Writer.new(file_path, options) + writer << [{ a: 'hello', b: 'world' }] + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello\",\"world\"\n") + end + end + + context 'Edge Cases' do + it 'handles empty hash' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{}] + writer.finalize + + expect(File.read(file_path)).to eq("\n\n") + end + + it 'handles empty array' do + writer = SmarterCSV::Writer.new(file_path) + writer << [] + writer.finalize + + expect(File.read(file_path)).to eq("\n") + end + + it 'handles special characters in data' do + writer = SmarterCSV::Writer.new(file_path) + writer << [{ a: "hello\nworld", b: 'quote"test' }] + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello\nworld\",\"quote\"test\"\n") + end + end + + context 'Error Handling' do + it 'handles file access issues' do + allow(File).to receive(:open).and_raise(Errno::EACCES) + + expect do + SmarterCSV::Writer.new(file_path) + end.to raise_error(Errno::EACCES) + end + + it 'handles tempfile issues' do + allow(Tempfile).to receive(:new).and_raise(Errno::ENOENT) + + expect do + SmarterCSV::Writer.new(file_path) + end.to raise_error(Errno::ENOENT) + end + end +end +# rubocop:enable Style/WordArray