Skip to content

Commit 1914630

Browse files
authored
Merge pull request #185 from mcode/allow-empty-csv
Allowing CSVs with no rows + adding trimming to CSV parsing
2 parents 9093795 + 90dc90d commit 1914630

File tree

5 files changed

+29
-13
lines changed

5 files changed

+29
-13
lines changed

src/helpers/csvParsingUtils.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ const DEFAULT_OPTIONS = {
4444
columns: (header) => header.map((column) => stringNormalizer(column)),
4545
// https://csv.js.org/parse/options/bom/
4646
bom: true,
47+
// https://csv.js.org/parse/options/trim/
48+
trim: true,
4749
// https://csv.js.org/parse/options/skip_empty_lines/
4850
skip_empty_lines: true,
4951
// NOTE: This will skip any records with empty values, not just skip the empty values themselves
@@ -57,9 +59,17 @@ function csvParse(csvData, options = {}) {
5759
return parse(csvData, { ...DEFAULT_OPTIONS, ...options });
5860
}
5961

62+
function getCSVHeader(csvData) {
63+
return parse(csvData, {
64+
bom: true,
65+
trim: true,
66+
to: 1,
67+
})[0].map((h) => h.toLowerCase());
68+
}
6069

6170
module.exports = {
6271
stringNormalizer,
6372
normalizeEmptyValues,
6473
csvParse,
74+
getCSVHeader,
6575
};

src/helpers/csvValidator.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@ const logger = require('./logger');
33

44
// Validates csvData against the csvSchema
55
// Uses the csvFileIdentifier in logs for readability
6-
function validateCSV(csvFileIdentifier, csvSchema, csvData) {
6+
function validateCSV(csvFileIdentifier, csvSchema, csvData, headers) {
77
let isValid = true;
88

99
// Check headers
10-
const headers = Object.keys(csvData[0]).map((h) => h.toLowerCase());
1110
const schemaDiff = _.difference(csvSchema.headers.map((h) => h.name.toLowerCase()), headers);
1211
const fileDiff = _.difference(headers, csvSchema.headers.map((h) => h.name.toLowerCase()));
1312

src/modules/CSVFileModule.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@ const fs = require('fs');
22
const moment = require('moment');
33
const logger = require('../helpers/logger');
44
const { validateCSV } = require('../helpers/csvValidator');
5-
const { csvParse, stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils');
5+
const { csvParse, stringNormalizer, normalizeEmptyValues, getCSVHeader } = require('../helpers/csvParsingUtils');
66

77
class CSVFileModule {
88
constructor(csvFilePath, unalterableColumns, parserOptions) {
99
// Parse then normalize the data
10-
const parsedData = csvParse(fs.readFileSync(csvFilePath), parserOptions);
10+
const csvData = fs.readFileSync(csvFilePath);
11+
const parsedData = csvParse(csvData, parserOptions);
1112
this.filePath = csvFilePath;
1213
this.data = normalizeEmptyValues(parsedData, unalterableColumns);
14+
this.header = getCSVHeader(csvData);
1315
}
1416

1517
async get(key, value, fromDate, toDate) {
@@ -32,7 +34,7 @@ class CSVFileModule {
3234
async validate(csvSchema) {
3335
if (csvSchema) {
3436
logger.info(`Validating CSV file for ${this.filePath}`);
35-
return validateCSV(this.filePath, csvSchema, this.data);
37+
return validateCSV(this.filePath, csvSchema, this.data, this.header);
3638
}
3739
logger.warn(`No CSV schema provided for ${this.filePath}`);
3840
return true;

src/modules/CSVURLModule.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ const axios = require('axios');
22
const moment = require('moment');
33
const logger = require('../helpers/logger');
44
const { validateCSV } = require('../helpers/csvValidator');
5-
const { csvParse, stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils');
5+
const { csvParse, stringNormalizer, normalizeEmptyValues, getCSVHeader } = require('../helpers/csvParsingUtils');
66

77
class CSVURLModule {
88
constructor(url, unalterableColumns, parserOptions) {
@@ -28,6 +28,7 @@ class CSVURLModule {
2828
const parsedData = csvParse(csvData, this.parserOptions);
2929
logger.debug('CSV Data parsing successful');
3030
this.data = normalizeEmptyValues(parsedData, this.unalterableColumns);
31+
this.header = getCSVHeader(csvData);
3132
}
3233
}
3334

@@ -55,7 +56,7 @@ class CSVURLModule {
5556

5657
if (csvSchema) {
5758
this.data = normalizeEmptyValues(this.data, this.unalterableColumns);
58-
return validateCSV(this.url, csvSchema, this.data);
59+
return validateCSV(this.url, csvSchema, this.data, this.header);
5960
}
6061
logger.warn(`No CSV schema provided for data found at ${this.url}`);
6162
return true;

test/helpers/csvValidator.test.js

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,26 +83,30 @@ const schema = {
8383

8484
describe('csvValidator', () => {
8585
test('simple data validates', () => {
86-
expect(validateCSV('', schema, SIMPLE_DATA)).toBe(true);
86+
expect(validateCSV('', schema, SIMPLE_DATA, Object.keys(SIMPLE_DATA[0]))).toBe(true);
8787
});
8888

8989
test('data missing required value does not validate', () => {
90-
expect(validateCSV('', schema, SIMPLE_DATA_MISSING_REQUIRED_VALUE)).toBe(false);
90+
expect(validateCSV('', schema, SIMPLE_DATA_MISSING_REQUIRED_VALUE, Object.keys(SIMPLE_DATA_MISSING_REQUIRED_VALUE[0]).map((h) => h.toLowerCase()))).toBe(false);
9191
});
9292

9393
test('data missing required header does not validate', () => {
94-
expect(validateCSV('', schema, SIMPLE_DATA_MISSING_HEADER)).toBe(false);
94+
expect(validateCSV('', schema, SIMPLE_DATA_MISSING_HEADER, Object.keys(SIMPLE_DATA_MISSING_HEADER[0]).map((h) => h.toLowerCase()))).toBe(false);
9595
});
9696

9797
test('data with erroneous column should still validate', () => {
98-
expect(validateCSV('', schema, SIMPLE_DATA_EXTRA_COLUMNS)).toBe(true);
98+
expect(validateCSV('', schema, SIMPLE_DATA_EXTRA_COLUMNS, Object.keys(SIMPLE_DATA_EXTRA_COLUMNS[0]).map((h) => h.toLowerCase()))).toBe(true);
9999
});
100100

101101
test('data missing an optional column should still validate', () => {
102-
expect(validateCSV('', schema, SIMPLE_DATA_MISSING_OPTIONAL_COLUMN)).toBe(true);
102+
expect(validateCSV('', schema, SIMPLE_DATA_MISSING_OPTIONAL_COLUMN, Object.keys(SIMPLE_DATA_MISSING_OPTIONAL_COLUMN[0]).map((h) => h.toLowerCase()))).toBe(true);
103103
});
104104

105105
test('data with different casing in the column header should still validate', () => {
106-
expect(validateCSV('', schema, SIMPLE_DATA_DIFFERENT_CASING)).toBe(true);
106+
expect(validateCSV('', schema, SIMPLE_DATA_DIFFERENT_CASING, Object.keys(SIMPLE_DATA_DIFFERENT_CASING[0]).map((h) => h.toLowerCase()))).toBe(true);
107+
});
108+
109+
test('data with only the header but no rows should still validate', () => {
110+
expect(validateCSV('', schema, [], ['header1', 'header2', 'header3'])).toBe(true);
107111
});
108112
});

0 commit comments

Comments
 (0)