44import java .io .FileOutputStream ;
55import java .io .IOException ;
66import java .io .OutputStreamWriter ;
7+ import java .lang .NumberFormatException ;
78import java .nio .charset .Charset ;
9+ import java .nio .file .Files ;
810import java .nio .file .Path ;
911import java .util .Arrays ;
1012import java .util .Collections ;
1618import org .apache .commons .io .output .NullOutputStream ;
1719import org .mitre .synthea .export .CSVConstants ;
1820import org .mitre .synthea .helpers .Config ;
21+ import org .mitre .synthea .helpers .SimpleCSV ;
1922
2023public class CSVFileManager {
2124 /**
@@ -26,16 +29,11 @@ public class CSVFileManager {
2629 private Path outputDirectory ;
2730 private List <String > includedFiles ;
2831 private List <String > excludedFiles ;
29- private Map <String , String > filenameMap = initializeFilenameMap ();
32+ private Map <String , String > filenameMap = new HashMap <> ();
3033 private Map <String , OutputStreamWriter > writerMap = new HashMap <>();
31-
32- private Map <String , String > initializeFilenameMap () {
33- HashMap <String , String > map = new HashMap <>();
34-
35- map .putAll (CSVConstants .BASE_FILENAME_MAP );
36-
37- return map ;
38- }
34+ private Map <String , Integer > resourceCountMap = new HashMap <>();
35+ private int maxLinesPerFile ;
36+ private int fileNumberDigits ;
3937
4038 /**
4139 * "No-op" writer to use to prevent writing to excluded files.
@@ -50,6 +48,8 @@ private Map<String, String> initializeFilenameMap() {
5048 */
5149 public CSVFileManager () {
5250 initializeAppend ();
51+ initializeMaxLinesPerFile ();
52+ initializeFileNumberDigits ();
5353 initializeOutputDirectory ();
5454 initializeIncludedAndExcludedFiles ();
5555 }
@@ -58,6 +58,24 @@ private void initializeAppend() {
5858 append = Config .getAsBoolean ("exporter.csv.append_mode" );
5959 }
6060
61+ private void initializeMaxLinesPerFile () {
62+ try {
63+ maxLinesPerFile = Config .getAsInteger ("exporter.csv.max_lines_per_file" , 0 );
64+ } catch (NumberFormatException ex ) {
65+ // if the property is present but not a numeric string
66+ maxLinesPerFile = 0 ;
67+ }
68+ }
69+
70+ private void initializeFileNumberDigits () {
71+ try {
72+ fileNumberDigits = Config .getAsInteger ("exporter.csv.file_number_digits" , 1 );
73+ } catch (NumberFormatException ex ) {
74+ // if the property is present but not a numeric string
75+ fileNumberDigits = 1 ;
76+ }
77+ }
78+
6179 private void initializeOutputDirectory () {
6280 File output = Exporter .getOutputFolder ("csv" , null );
6381 output .mkdirs ();
@@ -72,6 +90,24 @@ private void initializeOutputDirectory() {
7290 }
7391 }
7492
93+ private boolean multipleFilesPerResource () {
94+ return maxLinesPerFile > 0 ;
95+ }
96+
97+ private String filename (String resourceKey ) {
98+ return resourceKey + ".csv" ;
99+ }
100+
101+ private String filename (String resourceKey , int fileNumber ) {
102+ String formattedNumber = String .valueOf (fileNumber );
103+
104+ if (fileNumberDigits > 1 ) {
105+ formattedNumber = String .format ("%0" + fileNumberDigits + "d" , fileNumber );
106+ }
107+
108+ return resourceKey + "-" + formattedNumber + ".csv" ;
109+ }
110+
75111 private void initializeIncludedAndExcludedFiles () {
76112 String includedFilesStr = Config .get ("exporter.csv.included_files" , "" ).trim ();
77113 String excludedFilesStr = Config .get ("exporter.csv.excluded_files" , "" ).trim ();
@@ -126,6 +162,25 @@ private static List<String> propStringToList(String fileListString) {
126162 return files ;
127163 }
128164
165+ private int incrementResourceCount (String resourceKey ) {
166+ Integer resourceCount = resourceCountMap .get (resourceKey );
167+
168+ if (resourceCount == null ) {
169+ resourceCount = 0 ;
170+ }
171+
172+ resourceCount ++;
173+ resourceCountMap .put (resourceKey , resourceCount );
174+
175+ return resourceCount ;
176+ }
177+
178+ private boolean resourceIsExcluded (String resourceKey ) {
179+ String baseFilename = filename (resourceKey );
180+ return (!includedFiles .isEmpty () && !includedFiles .contains (baseFilename ))
181+ || excludedFiles .contains (baseFilename );
182+ }
183+
129184 /**
130185 * Helper method to instantiate, if necessary, and return the writer for the
131186 * resource type's CSV file. Returns a "no-op" writer for any excluded files.
@@ -134,20 +189,87 @@ private static List<String> propStringToList(String fileListString) {
134189 *
135190 * @return OutputStreamWriter for the given resource type's CSV file
136191 */
137- private OutputStreamWriter getResourceWriter (String resourceKey ) throws IOException {
138- String baseFilename = CSVConstants .BASE_FILENAME_MAP .get (resourceKey );
139- boolean excluded = (!includedFiles .isEmpty () && !includedFiles .contains (baseFilename ))
140- || excludedFiles .contains (baseFilename );
141- if (excluded ) {
192+ private OutputStreamWriter initializeResourceWriter (String resourceKey ) throws IOException {
193+ if (resourceIsExcluded (resourceKey )) {
142194 return NO_OP ;
143195 }
144196
145- String filename = filenameMap . get (resourceKey );
197+ String filename = filename (resourceKey );
146198 File file = outputDirectory .resolve (filename ).toFile ();
147199 // file writing may fail if we tell it to append to a file that doesn't already exist
148200 boolean appendToThisFile = append && file .exists ();
149201
150- return new OutputStreamWriter (new FileOutputStream (file , appendToThisFile ), charset );
202+ OutputStreamWriter writer =
203+ new OutputStreamWriter (new FileOutputStream (file , appendToThisFile ), charset );
204+ if (!append ) {
205+ writer .write (CSVConstants .HEADER_LINE_MAP .get (resourceKey ));
206+ }
207+
208+ return writer ;
209+ }
210+
211+ /**
212+ * Helper method to instantiate, if necessary, and return the writer for the
213+ * resource type's CSV file. Returns a "no-op" writer for any excluded files.
214+ *
215+ * @param resourceKey Key from CSVConstants for the resource type being written
216+ *
217+ * @return OutputStreamWriter for the given resource type's CSV file
218+ */
219+ private OutputStreamWriter initializeResourceWriter (String resourceKey , int resourceCount )
220+ throws IOException {
221+ if (resourceIsExcluded (resourceKey )) {
222+ return NO_OP ;
223+ }
224+
225+ if (append && resourceCount == 1 ) {
226+ resourceCount = getResourceCount (resourceKey ) + 1 ;
227+ resourceCountMap .put (resourceKey , resourceCount );
228+ }
229+
230+ int fileNumber = (resourceCount - 1 ) / maxLinesPerFile + 1 ;
231+ String filename = filename (resourceKey , fileNumber );
232+
233+ File file = outputDirectory .resolve (filename ).toFile ();
234+ // file writing may fail if we tell it to append to a file that doesn't already exist
235+ boolean appendToThisFile = append && file .exists ();
236+
237+ OutputStreamWriter writer =
238+ new OutputStreamWriter (new FileOutputStream (file , appendToThisFile ), charset );
239+ if (!append || resourceCount % maxLinesPerFile == 1 ) {
240+ writer .write (CSVConstants .HEADER_LINE_MAP .get (resourceKey ));
241+ }
242+
243+ return writer ;
244+ }
245+
246+ private int getResourceCount (String resourceKey ) throws IOException {
247+ int fileNumber = 1 ;
248+
249+ String currentFilename = filename (resourceKey , fileNumber );
250+ File file = outputDirectory .resolve (currentFilename ).toFile ();
251+
252+ if (file .exists ()) {
253+ do {
254+ fileNumber ++;
255+ currentFilename = filename (resourceKey , fileNumber );
256+ file = outputDirectory .resolve (currentFilename ).toFile ();
257+ } while ((file .exists ()));
258+
259+ fileNumber --;
260+ }
261+
262+ currentFilename = filename (resourceKey , fileNumber );
263+ file = outputDirectory .resolve (currentFilename ).toFile ();
264+
265+ int resourceCount = (fileNumber - 1 ) * maxLinesPerFile ;
266+
267+ if (file .exists ()) {
268+ String csvData = new String (Files .readAllBytes (file .toPath ()));
269+ resourceCount += SimpleCSV .parse (csvData ).size ();
270+ }
271+
272+ return resourceCount ;
151273 }
152274
153275 /**
@@ -158,13 +280,35 @@ private OutputStreamWriter getResourceWriter(String resourceKey) throws IOExcept
158280 * @return OutputStreamWriter for the given resource type's CSV file
159281 */
160282 public OutputStreamWriter getWriter (String resourceKey ) throws IOException {
283+ if (multipleFilesPerResource ()) {
284+ return getWriterForMultipleFiles (resourceKey );
285+ }
286+
161287 OutputStreamWriter writer = writerMap .get (resourceKey );
162288 if (writer == null ) {
163- writer = getResourceWriter (resourceKey );
289+ writer = initializeResourceWriter (resourceKey );
164290 writerMap .put (resourceKey , writer );
165- if (!append ) {
166- writer .write (CSVConstants .HEADER_LINE_MAP .get (resourceKey ));
291+ }
292+
293+ return writer ;
294+ }
295+
296+ private OutputStreamWriter getWriterForMultipleFiles (String resourceKey ) throws IOException {
297+ if (resourceIsExcluded (resourceKey )) {
298+ return NO_OP ;
299+ }
300+
301+ int resourceCount = incrementResourceCount (resourceKey );
302+
303+ OutputStreamWriter writer = writerMap .get (resourceKey );
304+
305+ if (resourceCount % maxLinesPerFile == 1 ) {
306+ if (writer != null ) {
307+ writer .flush ();
167308 }
309+
310+ writer = initializeResourceWriter (resourceKey , resourceCount );
311+ writerMap .put (resourceKey , writer );
168312 }
169313
170314 return writer ;
@@ -177,7 +321,7 @@ public OutputStreamWriter getWriter(String resourceKey) throws IOException {
177321 */
178322 public void flushWriter (String resourceKey ) throws IOException {
179323 synchronized (resourceKey ) {
180- OutputStreamWriter writer = getWriter (resourceKey );
324+ OutputStreamWriter writer = writerMap . get (resourceKey );
181325 if (writer != null ) {
182326 writer .flush ();
183327 }
0 commit comments