Skip to content

Commit 0bd315d

Browse files
authored
Merge pull request #169 from mcode/support-blank-lines
Support CSVs with blank lines and empty values
2 parents 7533e92 + cbdaaa9 commit 0bd315d

File tree

7 files changed

+63
-19
lines changed

7 files changed

+63
-19
lines changed

src/helpers/appUtils.js

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
const fs = require('fs');
22
const path = require('path');
3-
const parse = require('csv-parse/lib/sync');
3+
const { csvParse } = require('./csvParsingUtils');
44

55
/**
66
* Parses a provided CSV with MRN column into string array of IDs
@@ -11,10 +11,7 @@ const parse = require('csv-parse/lib/sync');
1111
function parsePatientIds(pathToCSV) {
1212
// Parse CSV for list of patient IDs
1313
const patientIdsCsvPath = path.resolve(pathToCSV);
14-
const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), {
15-
columns: (header) => header.map((column) => column.toLowerCase()),
16-
bom: true,
17-
}).map((row) => {
14+
const patientIds = csvParse(fs.readFileSync(patientIdsCsvPath, 'utf8')).map((row) => {
1815
if (!row.mrn) {
1916
throw new Error(`${pathToCSV} has no "mrn" column`);
2017
}

src/helpers/csvParsingUtils.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
const parse = require('csv-parse/lib/sync');
12
const logger = require('./logger');
23

34
// The standard string normalizer function
@@ -38,7 +39,27 @@ function normalizeEmptyValues(data, unalterableColumns = []) {
3839
return newData;
3940
}
4041

42+
// Default options for CSV parsing
43+
const DEFAULT_OPTIONS = {
44+
columns: (header) => header.map((column) => stringNormalizer(column)),
45+
// https://csv.js.org/parse/options/bom/
46+
bom: true,
47+
// https://csv.js.org/parse/options/skip_empty_lines/
48+
skip_empty_lines: true,
49+
// NOTE: This will skip any records with empty values, not just skip the empty values themselves
50+
// NOTE-2: The name of the flag changed from v4 (what we use) to v5 (what is documented)
51+
// https://csv.js.org/parse/options/skip_records_with_empty_values/
52+
skip_lines_with_empty_values: true,
53+
};
54+
55+
// Common utility for parsing CSV files
56+
function csvParse(csvData, options = {}) {
57+
return parse(csvData, { ...DEFAULT_OPTIONS, ...options });
58+
}
59+
60+
4161
module.exports = {
4262
stringNormalizer,
4363
normalizeEmptyValues,
64+
csvParse,
4465
};

src/modules/CSVFileModule.js

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
11
const fs = require('fs');
22
const moment = require('moment');
3-
const parse = require('csv-parse/lib/sync');
43
const logger = require('../helpers/logger');
54
const { validateCSV } = require('../helpers/csvValidator');
6-
const { stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils');
5+
const { csvParse, stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils');
76

87
class CSVFileModule {
98
constructor(csvFilePath, unalterableColumns) {
109
// Parse then normalize the data
11-
const parsedData = parse(fs.readFileSync(csvFilePath), {
12-
columns: (header) => header.map((column) => stringNormalizer(column)),
13-
bom: true,
14-
});
10+
const parsedData = csvParse(fs.readFileSync(csvFilePath));
1511
this.filePath = csvFilePath;
16-
1712
this.data = normalizeEmptyValues(parsedData, unalterableColumns);
1813
}
1914

src/modules/CSVURLModule.js

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
const axios = require('axios');
22
const moment = require('moment');
3-
const parse = require('csv-parse/lib/sync');
43
const logger = require('../helpers/logger');
54
const { validateCSV } = require('../helpers/csvValidator');
6-
const { stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils');
5+
const { csvParse, stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils');
76

87
class CSVURLModule {
98
constructor(url, unalterableColumns) {
@@ -25,10 +24,7 @@ class CSVURLModule {
2524
});
2625
logger.debug('Web request successful');
2726
// Parse then normalize the data
28-
const parsedData = parse(csvData, {
29-
columns: (header) => header.map((column) => stringNormalizer(column)),
30-
bom: true,
31-
});
27+
const parsedData = csvParse(csvData);
3228
logger.debug('CSV Data parsing successful');
3329
this.data = normalizeEmptyValues(parsedData, this.unalterableColumns);
3430
}

test/modules/CSVFileModule.test.js

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ const exampleResponse = require('./fixtures/csv-response.json');
44

55
const INVALID_MRN = 'INVALID MRN';
66
const csvFileModule = new CSVFileModule(path.join(__dirname, './fixtures/example-csv.csv'));
7-
const csvFileModuleWithBOMs = new CSVFileModule(path.join(__dirname, './fixtures/example-csv-bom.csv'));
87

98

109
describe('CSVFileModule', () => {
@@ -15,10 +14,36 @@ describe('CSVFileModule', () => {
1514
});
1615

1716
test('Reads data from CSV with a Byte Order Mark', async () => {
17+
const csvFileModuleWithBOMs = new CSVFileModule(
18+
path.join(__dirname, './fixtures/example-csv-bom.csv'),
19+
);
20+
1821
const data = await csvFileModuleWithBOMs.get('mrn', 'example-mrn-1');
1922
expect(data).toEqual(exampleResponse);
2023
});
2124

25+
test('Reads data from CSV with Empty Values', async () => {
26+
// Five row file, with three rows of empty values
27+
// Should be just two rows of data after ingestion
28+
const csvFileModuleWithEmptyValues = new CSVFileModule(
29+
path.join(__dirname, './fixtures/example-csv-empty-values.csv'),
30+
);
31+
const data = await csvFileModuleWithEmptyValues.get('mrn', 'example-mrn-1');
32+
expect(data).toEqual(exampleResponse);
33+
const data2 = await csvFileModuleWithEmptyValues.get('mrn', 'example-mrn-not-ignored');
34+
expect(data2).toHaveLength(1);
35+
// Should be just two rows of data after ingestion
36+
expect(csvFileModuleWithEmptyValues.data).toHaveLength(2);
37+
});
38+
39+
test('Reads data from CSV with Empty Lines', async () => {
40+
const csvFileModuleWithEmptyLines = new CSVFileModule(
41+
path.join(__dirname, './fixtures/example-csv-empty-line.csv'),
42+
);
43+
const data = await csvFileModuleWithEmptyLines.get('mrn', 'example-mrn-1');
44+
expect(data).toEqual(exampleResponse);
45+
});
46+
2247
test('Returns multiple rows', async () => {
2348
const data = await csvFileModule.get('mrn', 'example-mrn-2');
2449
expect(data).toHaveLength(2);
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
mrn,trialSubjectID,enrollmentStatus,trialResearchID,trialStatus,dateRecorded
2+
example-mrn-1,subjectId-1,status-1,researchId-1,trialStatus-1,2020-01-10
3+
4+
example-mrn-2,subjectId-3,status-3,researchId-3,trialStatus-3,2020-06-10
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
mrn,trialSubjectID,enrollmentStatus,trialResearchID,trialStatus,dateRecorded
2+
example-mrn-1,subjectId-1,status-1,researchId-1,trialStatus-1,2020-01-10
3+
, , , , ,
4+
, , , ,,
5+
,,, , ,
6+
example-mrn-not-ignored,,,,,

0 commit comments

Comments
 (0)