Skip to content

Commit 7155039

Browse files
authored
Merge pull request #175 from mcode/config-csv-options
Passthrough csv-parse options supported in the config
2 parents 199cf8e + 816e766 commit 7155039

19 files changed

+84
-34
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,22 @@ cat -v <file.csv>
199199

200200
If there is an unexpected symbol at the beginning of the file, then there may be a byte order marker that needs to be removed.
201201

202+
#### Troubleshooting Additional Errors
203+
The mCODE Extraction Framework uses the node `csv-parse` library to parse specified CSV files. [Parsing options for the `csv-parse` library](https://csv.js.org/parse/options/) can be included in the configuration file within the `commonExtractorArgs.csvParse.options` section. For example, the following configuration will pass the `to` option to the `csv-parse` module, causing the mCODE Extraction Framework to only read CSV files up to the specified line number:
204+
205+
```
206+
"commonExtractorArgs": {
207+
"dataDirectory": "/Users/*****/Documents/dataDirectory",
208+
"csvParse": {
209+
"options": {
210+
"to": 3
211+
}
212+
}
213+
},
214+
```
215+
216+
**Note:** The mCODE Extraction Framework enables the `bom`, `skip_empty_lines`, and `skip_lines_with_empty_values` options by default, including these options in the configuration file will cause these default options to be overwritten.
217+
202218
## Terminology and Architecture
203219

204220
This framework consists of three key components: Extractors, Modules and Templates. Below is, in order:

src/application/app.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ async function mcodeApp(Client, fromDate, toDate, config, pathToRunLogs, debug,
3131

3232
// Parse CSV for list of patient mrns
3333
const dataDirectory = config.commonExtractorArgs && config.commonExtractorArgs.dataDirectory;
34-
const patientIds = parsePatientIds(config.patientIdCsvPath, dataDirectory);
34+
const parserOptions = config.commonExtractorArgs && config.commonExtractorArgs.csvParse && config.commonExtractorArgs.csvParse.options ? config.commonExtractorArgs.csvParse.options : {};
35+
const patientIds = parsePatientIds(config.patientIdCsvPath, dataDirectory, parserOptions);
3536

3637
// Get RunInstanceLogger for recording new runs and inferring dates from previous runs
3738
const runLogger = allEntries ? null : new RunInstanceLogger(pathToRunLogs);

src/extractors/BaseCSVExtractor.js

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,27 @@ const logger = require('../helpers/logger');
66

77
class BaseCSVExtractor extends Extractor {
88
constructor({
9-
filePath, url, fileName, dataDirectory, csvSchema, unalterableColumns,
9+
filePath, url, fileName, dataDirectory, csvSchema, unalterableColumns, csvParse,
1010
}) {
1111
super();
1212
this.unalterableColumns = unalterableColumns || [];
1313
this.csvSchema = csvSchema;
14+
this.parserOptions = csvParse && csvParse.options ? csvParse.options : {};
1415
if (url) {
1516
logger.debug('Found url argument; creating a CSVURLModule with the provided url');
1617
this.url = url;
17-
this.csvModule = new CSVURLModule(this.url, this.unalterableColumns);
18+
this.csvModule = new CSVURLModule(this.url, this.unalterableColumns, this.parserOptions);
1819
} else if (fileName && dataDirectory) {
1920
if (!path.isAbsolute(dataDirectory)) throw new Error('dataDirectory is not an absolutePath, it needs to be.');
2021
this.filePath = path.join(dataDirectory, fileName);
2122
logger.debug(
2223
'Found fileName and dataDirectory arguments; creating a CSVFileModule with the provided dataDirectory and fileName',
2324
);
24-
this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns);
25+
this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns, this.parserOptions);
2526
} else if (filePath) {
2627
logger.debug('Found filePath argument; creating a CSVFileModule with the provided filePath');
2728
this.filePath = filePath;
28-
this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns);
29+
this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns, this.parserOptions);
2930
} else {
3031
logger.debug(
3132
'Could not instantiate a CSVExtractor with the provided constructor args',

src/extractors/CSVAdverseEventExtractor.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ function formatData(adverseEventData, patientId) {
6969
}
7070

7171
class CSVAdverseEventExtractor extends BaseCSVExtractor {
72-
constructor({ filePath, url, fileName, dataDirectory }) {
73-
super({ filePath, url, fileName, dataDirectory });
72+
constructor({
73+
filePath, url, fileName, dataDirectory, csvParse,
74+
}) {
75+
super({ filePath, url, fileName, dataDirectory, csvParse });
7476
}
7577

7678
async getAdverseEventData(mrn) {

src/extractors/CSVCTCAdverseEventExtractor.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,10 @@ function formatData(adverseEventData, patientId) {
109109
}
110110

111111
class CSVCTCAdverseEventExtractor extends BaseCSVExtractor {
112-
constructor({ filePath, url, fileName, dataDirectory }) {
113-
super({ filePath, url, fileName, dataDirectory });
112+
constructor({
113+
filePath, url, fileName, dataDirectory, csvParse,
114+
}) {
115+
super({ filePath, url, fileName, dataDirectory, csvParse });
114116
}
115117

116118
async getAdverseEventData(mrn) {

src/extractors/CSVCancerDiseaseStatusExtractor.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ const { CSVCancerDiseaseStatusSchema } = require('../helpers/schemas/csv');
99

1010
class CSVCancerDiseaseStatusExtractor extends BaseCSVExtractor {
1111
constructor({
12-
filePath, url, fileName, dataDirectory, implementation,
12+
filePath, url, fileName, dataDirectory, csvParse, implementation,
1313
}) {
14-
super({ filePath, url, fileName, dataDirectory, csvSchema: CSVCancerDiseaseStatusSchema });
14+
super({ filePath, url, fileName, dataDirectory, csvSchema: CSVCancerDiseaseStatusSchema, csvParse });
1515
this.implementation = implementation;
1616
}
1717

src/extractors/CSVCancerRelatedMedicationAdministrationExtractor.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,10 @@ function formatData(medicationData, patientId) {
4646
}
4747

4848
class CSVCancerRelatedMedicationAdministrationExtractor extends BaseCSVExtractor {
49-
constructor({ filePath, url, fileName, dataDirectory }) {
50-
super({ filePath, url, fileName, dataDirectory });
49+
constructor({
50+
filePath, url, fileName, dataDirectory, csvParse,
51+
}) {
52+
super({ filePath, url, fileName, dataDirectory, csvParse });
5153
}
5254

5355
async getMedicationData(mrn) {

src/extractors/CSVCancerRelatedMedicationRequestExtractor.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ function formatData(medicationData, patientId) {
4848
}
4949

5050
class CSVCancerRelatedMedicationRequestExtractor extends BaseCSVExtractor {
51-
constructor({ filePath, url, fileName, dataDirectory }) {
52-
super({ filePath, url, fileName, dataDirectory });
51+
constructor({
52+
filePath, url, fileName, dataDirectory, csvParse,
53+
}) {
54+
super({ filePath, url, fileName, dataDirectory, csvParse });
5355
}
5456

5557
async getMedicationData(mrn) {

src/extractors/CSVClinicalTrialInformationExtractor.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ const { CSVClinicalTrialInformationSchema } = require('../helpers/schemas/csv');
99

1010
class CSVClinicalTrialInformationExtractor extends BaseCSVExtractor {
1111
constructor({
12-
filePath, url, fileName, dataDirectory, clinicalSiteID, clinicalSiteSystem,
12+
filePath, url, fileName, dataDirectory, csvParse, clinicalSiteID, clinicalSiteSystem,
1313
}) {
14-
super({ filePath, url, fileName, dataDirectory, csvSchema: CSVClinicalTrialInformationSchema });
14+
super({ filePath, url, fileName, dataDirectory, csvSchema: CSVClinicalTrialInformationSchema, csvParse });
1515
if (!clinicalSiteID) logger.warn(`${this.constructor.name} expects a value for clinicalSiteID but got ${clinicalSiteID}`);
1616
this.clinicalSiteID = clinicalSiteID;
1717
this.clinicalSiteSystem = clinicalSiteSystem;

src/extractors/CSVConditionExtractor.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ function formatData(conditionData, patientId) {
4949
}
5050

5151
class CSVConditionExtractor extends BaseCSVExtractor {
52-
constructor({ filePath, url, fileName, dataDirectory }) {
53-
super({ filePath, url, fileName, dataDirectory, csvSchema: CSVConditionSchema });
52+
constructor({
53+
filePath, url, fileName, dataDirectory, csvParse,
54+
}) {
55+
super({ filePath, url, fileName, dataDirectory, csvSchema: CSVConditionSchema, csvParse });
5456
}
5557

5658
async getConditionData(mrn) {

0 commit comments

Comments
 (0)