Skip to content

Commit 0aedf4b

Browse files
authored
Merge pull request #122 from mcode/develop
Merge for v1.0.1
2 parents 9258212 + 8f79a1b commit 0aedf4b

File tree

8 files changed

+142
-52
lines changed

8 files changed

+142
-52
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ A Node.js framework for extracting mCODE FHIR resources. All resources are profi
2020
- [Extraction Date Range](#extraction-date-range)
2121
- [CLI From-Date and To-Date (NOT recommended use)](#cli-from-date-and-to-date-not-recommended-use)
2222
- [Troubleshooting](#troubleshooting)
23+
- [NULL/NIL values found and replaced with empty-strings](#nullnil-values-found-and-replaced-with-empty-strings)
2324
- [Byte Order Markers in CSV Files](#byte-order-markers-in-csv-files)
2425
- [Terminology and Architecture](#terminology-and-architecture)
2526
- [Glossary](#glossary)
@@ -165,6 +166,10 @@ npm start -- --entries-filter --from-date <YYYY-MM-DD> --to-date <YYYY-MM-DD> --
165166

166167
### Troubleshooting
167168

169+
#### NULL/NIL values found and replaced with empty-strings
170+
171+
When CSV files are provided containing NULL/NIL values, those values are treated as empty values and are translated into ''. Each Extractor, however, defines a set of `unalterableColumns` which will be immune from this NULL/NIL correction. All values that are corrected will produce a `debug`-level message, and can be seen by running the extractor with the debug flag set.
172+
168173
#### Byte Order Markers in CSV Files
169174

170175
The extraction client has built-in handling of byte order markers for CSV files in UTF-8 and UTF-16LE encodings. When using CSV files in other encodings, if you experience unexpected errors be sure to check for a byte order marker at the beginning of the file. One way to check is to run the following command from the command line:

package-lock.json

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
{
22
"name": "mcode-extraction-framework",
3-
"version": "1.0.0",
3+
"version": "1.0.1",
44
"description": "",
55
"contributors": [
66
"Julia Afeltra <jafeltra@mitre.org>",
77
"Julian Carter <jacarter@mitre.org>",
88
"Matthew Gramigna <mgramigna@mitre.org>",
99
"Daniel Lee <daniellee@mitre.org>",
1010
"Dylan Mahalingam <kmahalingam@mitre.org>",
11+
"Dylan Mendelowitz <dmendelowitz@mitre.org>",
1112
"Dylan Phelan <dphelan@mitre.org>"
1213
],
1314
"main": "src/",
@@ -26,9 +27,9 @@
2627
"csv-parse": "^4.8.8",
2728
"fhir-crud-client": "^1.2.2",
2829
"fhirpath": "2.1.5",
29-
"lodash": "^4.17.19",
30+
"lodash": "^4.17.21",
3031
"moment": "^2.26.0",
31-
"nodemailer": "^6.4.14",
32+
"nodemailer": "^6.4.16",
3233
"sha.js": "^2.4.9",
3334
"winston": "^3.2.1"
3435
},

src/cli/app.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ async function mcodeApp(Client, fromDate, toDate, pathToConfig, pathToRunLogs, d
8484

8585
// Parse CSV for list of patient mrns
8686
const patientIdsCsvPath = path.resolve(config.patientIdCsvPath);
87-
const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true }).map((row) => row.mrn);
87+
const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { columns: true, bom: true }).map((row) => row.mrn);
8888

8989
// Get RunInstanceLogger for recording new runs and inferring dates from previous runs
9090
const runLogger = allEntries ? null : new RunInstanceLogger(pathToRunLogs);

src/extractors/BaseCSVExtractor.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@ const { validateCSV } = require('../helpers/csvValidator');
55
const logger = require('../helpers/logger');
66

77
class BaseCSVExtractor extends Extractor {
8-
constructor({ filePath, csvSchema }) {
8+
constructor({ filePath, csvSchema, unalterableColumns }) {
99
super();
10+
this.unalterableColumns = unalterableColumns || [];
1011
this.csvSchema = csvSchema;
1112
this.filePath = path.resolve(filePath);
12-
this.csvModule = new CSVModule(this.filePath);
13+
this.csvModule = new CSVModule(this.filePath, this.unalterableColumns);
1314
}
1415

1516
validate() {

src/extractors/CSVPatientExtractor.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ function joinAndReformatData(patientData) {
5555

5656
class CSVPatientExtractor extends BaseCSVExtractor {
5757
constructor({ filePath, mask = [] }) {
58-
super({ filePath, csvSchema: CSVPatientSchema });
58+
// Define CSV Columns whose values should never be altered
59+
const unalterableColumns = ['familyName', 'givenName'];
60+
super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns });
5961
this.mask = mask;
6062
}
6163

src/modules/CSVModule.js

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,54 @@ const moment = require('moment');
33
const parse = require('csv-parse/lib/sync');
44
const logger = require('../helpers/logger');
55

6+
// The standard string normalizer function
7+
function stringNormalizer(str) {
8+
return str.toLowerCase();
9+
}
10+
11+
// For translating null/nil-like values into empty strings
12+
function normalizeEmptyValues(data, unalterableColumns = []) {
13+
const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer);
14+
const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer);
15+
// Flag tracking if empty values were normalized or not.
16+
let wasEmptyNormalized = false;
17+
const newData = data.map((row, i) => {
18+
const newRow = { ...row };
19+
// Filter out unalterable columns
20+
const columnsToNormalize = Object.keys(row).filter((col) => !normalizedUnalterableColumns.includes(stringNormalizer(col)));
21+
columnsToNormalize.forEach((col) => {
22+
const value = newRow[col];
23+
// If the value for this row-col combo is a value that should be empty, replace it
24+
if (EMPTY_VALUES.includes(stringNormalizer(value))) {
25+
logger.debug(`NULL/NIL values '${value}' found in row-${i}, col-${col}`);
26+
wasEmptyNormalized = true;
27+
newRow[col] = '';
28+
}
29+
});
30+
return newRow;
31+
});
32+
33+
if (wasEmptyNormalized) {
34+
logger.warn('NULL/NIL values found and replaced with empty-strings');
35+
}
36+
return newData;
37+
}
38+
639
class CSVModule {
7-
constructor(csvFilePath) {
8-
this.data = parse(fs.readFileSync(csvFilePath), { columns: (header) => header.map((column) => column.toLowerCase()), bom: true });
40+
constructor(csvFilePath, unalterableColumns) {
41+
// Parse then normalize the data
42+
const parsedData = parse(fs.readFileSync(csvFilePath), {
43+
columns: (header) => header.map((column) => stringNormalizer(column)),
44+
bom: true,
45+
});
46+
this.data = normalizeEmptyValues(parsedData, unalterableColumns);
947
}
1048

1149
async get(key, value, fromDate, toDate) {
1250
logger.debug(`Get csvModule info by key '${key}'`);
1351
// return all rows if key and value aren't provided
1452
if (!key && !value) return this.data;
15-
let result = this.data.filter((d) => d[key.toLowerCase()] === value);
53+
let result = this.data.filter((d) => d[stringNormalizer(key)] === value);
1654
if (result.length === 0) {
1755
logger.warn(`CSV Record with provided key '${key}' and value was not found`);
1856
return result;

test/modules/CSVModule.test.js

Lines changed: 75 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,91 @@
11
const path = require('path');
2+
const rewire = require('rewire');
23
const { CSVModule } = require('../../src/modules');
34
const exampleResponse = require('./fixtures/csv-response.json');
45

6+
const CSVModuleRewired = rewire('../../src/modules/CSVModule.js');
7+
const normalizeEmptyValues = CSVModuleRewired.__get__('normalizeEmptyValues');
8+
59
const INVALID_MRN = 'INVALID MRN';
610
const csvModule = new CSVModule(path.join(__dirname, './fixtures/example-csv.csv'));
711
const csvModuleWithBOMs = new CSVModule(path.join(__dirname, './fixtures/example-csv-bom.csv'));
812

9-
test('Reads data from CSV', async () => {
10-
const data = await csvModule.get('mrn', 'example-mrn-1');
11-
expect(data).toEqual(exampleResponse);
12-
});
1313

14-
test('Reads data from CSV with a Byte Order Mark', async () => {
15-
const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
16-
expect(data).toEqual(exampleResponse);
17-
});
14+
describe('CSVModule', () => {
15+
describe('get', () => {
16+
test('Reads data from CSV', async () => {
17+
const data = await csvModule.get('mrn', 'example-mrn-1');
18+
expect(data).toEqual(exampleResponse);
19+
});
1820

19-
test('Returns multiple rows', async () => {
20-
const data = await csvModule.get('mrn', 'example-mrn-2');
21-
expect(data).toHaveLength(2);
22-
});
21+
test('Reads data from CSV with a Byte Order Mark', async () => {
22+
const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1');
23+
expect(data).toEqual(exampleResponse);
24+
});
2325

24-
test('Returns all rows when both key and value are undefined', async () => {
25-
const data = await csvModule.get();
26-
expect(data).toHaveLength(csvModule.data.length);
27-
expect(data).toEqual(csvModule.data);
28-
});
26+
test('Returns multiple rows', async () => {
27+
const data = await csvModule.get('mrn', 'example-mrn-2');
28+
expect(data).toHaveLength(2);
29+
});
2930

30-
test('Returns data with recordedDate after specified from date', async () => {
31-
const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
32-
expect(data).toHaveLength(1);
33-
});
31+
test('Returns all rows when both key and value are undefined', async () => {
32+
const data = await csvModule.get();
33+
expect(data).toHaveLength(csvModule.data.length);
34+
expect(data).toEqual(csvModule.data);
35+
});
3436

35-
test('Returns data with recordedDate before specified to date', async () => {
36-
const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
37-
expect(data).toHaveLength(1);
38-
});
37+
test('Returns data with recordedDate after specified from date', async () => {
38+
const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01');
39+
expect(data).toHaveLength(1);
40+
});
3941

40-
test('Should return an empty array when key-value pair does not exist', async () => {
41-
const data = await csvModule.get('mrn', INVALID_MRN);
42-
expect(data).toEqual([]);
43-
});
42+
test('Returns data with recordedDate before specified to date', async () => {
43+
const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01');
44+
expect(data).toHaveLength(1);
45+
});
46+
47+
test('Should return an empty array when key-value pair does not exist', async () => {
48+
const data = await csvModule.get('mrn', INVALID_MRN);
49+
expect(data).toEqual([]);
50+
});
51+
52+
test('Should return proper value regardless of key casing', async () => {
53+
const data = await csvModule.get('mRN', 'example-mrn-1');
54+
expect(data).toEqual(exampleResponse);
55+
});
56+
});
57+
58+
describe('normalizeEmptyValues', () => {
59+
it('Should turn "null" values into empty strings, regardless of case', () => {
60+
const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }];
61+
const normalizedData = normalizeEmptyValues(data);
62+
normalizedData.forEach((d) => {
63+
expect(d.key).toBe('');
64+
});
65+
});
66+
67+
it('Should turn "nil" values into empty strings, regardless of case', () => {
68+
const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
69+
const normalizedData = normalizeEmptyValues(data);
70+
normalizedData.forEach((d) => {
71+
expect(d.key).toBe('');
72+
});
73+
});
74+
75+
it('Should not modify unalterableColumns, regardless of their value', () => {
76+
const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }];
77+
const normalizedData = normalizeEmptyValues(data, ['key']);
78+
normalizedData.forEach((d) => {
79+
expect(d.key).not.toBe('');
80+
});
81+
});
4482

45-
test('Should return proper value regardless of key casing', async () => {
46-
const data = await csvModule.get('mRN', 'example-mrn-1');
47-
expect(data).toEqual(exampleResponse);
83+
it('Should leave all other values uneffected, regardless of case', () => {
84+
const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }];
85+
const normalizedData = normalizeEmptyValues(data);
86+
normalizedData.forEach((d) => {
87+
expect(d.key).not.toBe('');
88+
});
89+
});
90+
});
4891
});

0 commit comments

Comments
 (0)