@@ -255,7 +255,7 @@ public static void GenerateDatadictionaryCSV(CsvDefinition csvdef)
255
255
enumvals = string . Format ( "\" {0}\" " , enumvals ) ; // use quotes
256
256
}
257
257
258
- csvmeta . Append ( string . Format ( "{0},{1},{2},{3},{4},{5},{6}\r \n " , ( c + 1 ) , coldef . Name , dattyp , colwid , dec , mask , enumvals ) ) ;
258
+ csvmeta . Append ( string . Format ( "{0},\" {1}\" ,{2},{3},{4},{5},{6}\r \n " , ( c + 1 ) , coldef . Name , dattyp , colwid , dec , mask , enumvals ) ) ;
259
259
}
260
260
261
261
// create new file
@@ -771,10 +771,11 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
771
771
772
772
// start PowerShell script
773
773
ps1 . Append ( "# working directory and filename\r \n " ) ;
774
- ps1 . Append ( string . Format ( "$pathname = \" {0}\" ) \r \n " , FILE_PATH ) ) ;
774
+ ps1 . Append ( string . Format ( "$pathname = \" {0}\" \r \n " , FILE_PATH ) ) ;
775
775
ps1 . Append ( string . Format ( "$filename = $pathname + \" {0}\" \r \n \r \n " , FILE_NAME ) ) ;
776
776
777
777
var col_names = "" ;
778
+ var col_fixed = "" ;
778
779
var col_order = "" ;
779
780
var col_types = "" ;
780
781
var col_enums = "" ;
@@ -783,22 +784,30 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
783
784
var exampleDate = "" ;
784
785
785
786
var r_dec = "" ;
787
+ var startpos = 0 ;
786
788
789
+ // get max column name length, for aligning columns
790
+ var MAX_COLNAME = 1 ;
791
+ for ( int c = 0 ; c < csvdef . Fields . Count ; c ++ ) { if ( csvdef . Fields [ c ] . Name . Length > MAX_COLNAME ) MAX_COLNAME = csvdef . Fields [ c ] . Name . Length ; } ;
792
+
793
+ // process all columns
787
794
for ( int c = 0 ; c < csvdef . Fields . Count ; c ++ )
788
795
{
789
796
// next field
790
797
var coldef = csvdef . Fields [ c ] ;
791
798
792
799
// any characters are allowed in Python column names
793
800
var colname = coldef . Name ;
794
- //colname = Regex.Replace(colname, "[^a-zA-Z0-9]", "_"); // not letter or digit
795
- var colnamepad = colname . PadRight ( 15 , ' ' ) ;
801
+ var colname_fix = Regex . Replace ( colname , "[^a-zA-Z0-9]" , "_" ) ; // not letter or digit
802
+ if ( colname != colname_fix ) colname = string . Format ( "\" {0}\" " , colname ) ; // if columns name contains spaces
803
+
804
+ var colnamepad = colname . PadRight ( MAX_COLNAME , ' ' ) ;
796
805
797
806
var comma = ( c < csvdef . Fields . Count - 1 ? ", " : "" ) ;
798
807
799
808
// list all column names
800
- col_names += string . Format ( "\" {0}\" {1}" , colname , comma ) ;
801
- col_order += string . Format ( "\t {0} = $_.{1}\r \n " , colnamepad , colname ) ;
809
+ col_names += string . Format ( "\" {0}\" {1}" , coldef . Name , comma ) ;
810
+ col_order += string . Format ( "\t \t {0} = $_.{1}\r \n " , colnamepad , colname ) ;
802
811
803
812
// enumeration
804
813
if ( coldef . isCodedValue )
@@ -813,8 +822,8 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
813
822
{
814
823
enumvals = enumvals . Replace ( "\" " , "" ) ; // no quotes
815
824
} ;
816
- col_enums += string . Format ( "${0}_array = @({1})\r \n " , coldef . Name , enumvals ) ;
817
- check_enums += string . Format ( "\t if (!(${0}_array -contains $row.{0 })) {{$errmsg += \" $($row.{0}) is invalid {0} \" }}\r \n " , coldef . Name ) ;
825
+ col_enums += string . Format ( "${0}_array = @({1})\r \n " , colname_fix , enumvals ) ;
826
+ check_enums += string . Format ( "\t if ($row.{1} -and !(${0}_array -contains $row.{1 })) {{$errmsg += \" Invalid {2} \" \" $($row.{1}) \" \" \" }}\r \n " , colname_fix , colname , colname . Replace ( " \" " , " \" \" " ) ) ;
818
827
}
819
828
820
829
// indent for next lines
@@ -826,24 +835,33 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
826
835
case ColumnType . DateTime :
827
836
// build Python fomat example "M/d/yyyy HH:m:s" -> "%m/%d/%Y %H:%M:%S"
828
837
var msk = coldef . Mask ;
829
- msk = DateMaskStandardToCstr ( msk ) ;
830
- col_types += string . Format ( "\t $row.{0} = [datetime]::parseexact($row.{1}, '{2}', $null)\r \n " , colnamepad , colname , msk ) ;
838
+ // msk = DateMaskStandardToCstr(msk);
839
+ col_types += string . Format ( "\t \t $row.{0} = [datetime]::parseexact($row.{1}, '{2}', $null)\r \n " , colnamepad , colname , msk ) ;
831
840
if ( exampleDate == "" ) exampleDate = colname ;
832
841
break ;
833
842
case ColumnType . Integer :
834
- col_types += string . Format ( "\t $row.{0} = [int]($row.{1} -replace 'NaN ', '')\r \n " , colnamepad , colname ) ;
843
+ col_types += string . Format ( "\t \t $row.{0} = [int]($row.{1} -replace '{2} ', '')\r \n " , colnamepad , colname , Main . Settings . NullKeyword ) ;
835
844
836
845
break ;
837
846
case ColumnType . Decimal :
838
- col_types += string . Format ( "\t $row.{0} = [decimal]($row.{1} -replace ',', '.')\r \n " , colnamepad , colname ) ;
847
+ col_types += string . Format ( "\t \t $row.{0} = [decimal]($row.{1} -replace ',', '.')\r \n " , colnamepad , colname ) ;
839
848
840
849
// just use the first decimal symbol
841
850
if ( r_dec == "" ) r_dec = coldef . DecimalSymbol . ToString ( ) ;
842
851
break ;
843
- // default:
844
- // col_types += string.Format("#\t $row.{0} = $row.{1}\r\n", colnamepad, colname);
845
- // break;
852
+ default :
853
+ col_types += string . Format ( "\t \t $row.{0} = $row.{1}.Trim(' \" ') \r \n " , colnamepad , colname ) ;
854
+ break ;
846
855
} ;
856
+
857
+ // fixed width columns
858
+ if ( csvdef . Separator == '\0 ' )
859
+ {
860
+ var strpos = startpos . ToString ( ) . PadLeft ( 3 , ' ' ) ;
861
+ var strwid = coldef . MaxWidth . ToString ( ) . PadLeft ( 2 , ' ' ) ;
862
+ col_fixed += string . Format ( "\t \t {0} = $line.Substring({1}, {2}).Trim(' \" ')\r \n " , colnamepad , strpos , strwid ) ;
863
+ startpos += coldef . MaxWidth ;
864
+ }
847
865
}
848
866
849
867
// no decimals, then not technically needed but nice to have as example code
@@ -855,12 +873,12 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
855
873
if ( separator != "\0 " )
856
874
{
857
875
if ( separator == "\t " ) separator = "`t" ;
858
- nameparam = string . Format ( " -Delimiter \" {0}\" " , separator ) ;
876
+ nameparam + = string . Format ( " -Delimiter \" {0}\" " , separator ) ;
859
877
}
860
878
861
879
if ( ! csvdef . ColNameHeader )
862
880
{
863
- nameparam = string . Format ( " -Header @({0})" , col_names ) ;
881
+ nameparam + = string . Format ( " -Header @({0})" , col_names ) ;
864
882
}
865
883
866
884
// PowerShell skip comment lines
@@ -875,24 +893,10 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
875
893
// fixed width
876
894
ps1 . Append ( string . Format ( "# read fixed width data file, positions {0}\r \n " , GetColumnWidths ( csvdef , true ) ) ) ;
877
895
878
- ps1 . Append ( "$stream_in = [System.IO.StreamReader]::new($pathname + $ filename)\r \n \r \n " ) ;
896
+ ps1 . Append ( "$stream_in = [System.IO.StreamReader]::new($filename)\r \n \r \n " ) ;
879
897
ps1 . Append ( "$csvdata = while ($line = $stream_in.ReadLine()) {\r \n " ) ;
880
898
ps1 . Append ( "\t [PSCustomObject]@{\r \n " ) ;
881
-
882
- // fixed width columns
883
- var startpos = 0 ;
884
- for ( int c = 0 ; c < csvdef . Fields . Count ; c ++ )
885
- {
886
- // next field
887
- var coldef = csvdef . Fields [ c ] ;
888
-
889
- // space characters are not allowed in PowerShell customobject field names
890
- var colname = coldef . Name . PadRight ( 15 , ' ' ) ;
891
- var strpos = startpos . ToString ( ) . PadLeft ( 3 , ' ' ) ;
892
- var strwid = coldef . MaxWidth . ToString ( ) . PadLeft ( 2 , ' ' ) ;
893
- ps1 . Append ( string . Format ( "\t \t {0} = $line.Substring({1}, {2}).Trim(' \" ')\r \n " , colname , strpos , strwid ) ) ;
894
- startpos += coldef . MaxWidth ;
895
- } ;
899
+ ps1 . Append ( col_fixed ) ;
896
900
ps1 . Append ( "\t }\r \n }\r \n \r \n " ) ;
897
901
}
898
902
else
@@ -906,11 +910,17 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
906
910
if ( col_types != "" )
907
911
{
908
912
ps1 . Append ( "# Explicit datatypes\r \n " ) ;
909
- ps1 . Append ( "# WARNING: The script below doesn't have any eror handling for null/empty values,\r \n " ) ;
910
- ps1 . Append ( "# so if your data file contains int, decimal or datetime columns with empty or incorrect values,\r \n " ) ;
911
- ps1 . Append ( "# this script can throw errors or silently change values to '0', so beware.\r \n " ) ;
913
+ ps1 . Append ( "# WARNING: PowerShell has very basic error handling for null or invalid values,\r \n " ) ;
914
+ ps1 . Append ( "# so if your data file contains integer, decimal or datetime columns with empty or incorrect values,\r \n " ) ;
915
+ ps1 . Append ( "# this script can throw errors, silently change values to '0' or omit rows in the output csv, so beware.\r \n " ) ;
916
+ ps1 . Append ( "$line = 0\r \n " ) ;
912
917
ps1 . Append ( "foreach ($row in $csvdata)\r \n {\r \n " ) ;
918
+ ps1 . Append ( "\t $line += 1\r \n " ) ;
919
+ ps1 . Append ( "\t try {\r \n " ) ;
913
920
ps1 . Append ( col_types ) ;
921
+ ps1 . Append ( "\t } catch {\r \n " ) ;
922
+ ps1 . Append ( "\t \t Write-Error \" Data conversion error(s) on line $line\" -TargetObject $row\r \n " ) ;
923
+ ps1 . Append ( "\t }\r \n " ) ;
914
924
ps1 . Append ( "}\r \n \r \n " ) ;
915
925
}
916
926
@@ -921,13 +931,13 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
921
931
ps1 . Append ( string . Format ( "{0}\r \n " , col_enums ) ) ;
922
932
ps1 . Append ( "# enumeration check invalid values\r \n " ) ;
923
933
ps1 . Append ( "$line = 0\r \n " ) ;
924
- ps1 . Append ( "foreach ($row in $csvdata)\r \n \r \n " ) ;
934
+ ps1 . Append ( "foreach ($row in $csvdata)\r \n { \r \n " ) ;
925
935
ps1 . Append ( "\t # check invalid values\r \n " ) ;
926
936
ps1 . Append ( "\t $errmsg = \" \" \r \n " ) ;
927
937
ps1 . Append ( string . Format ( "{0}\r \n " , check_enums ) ) ;
928
938
ps1 . Append ( "\t # report invalid values\r \n " ) ;
929
939
ps1 . Append ( "\t $line = $line + 1\r \n " ) ;
930
- ps1 . Append ( "\t if ($errmsg) {Write-Output \" line $($ line):$errmsg \" }\r \n }\r \n \r \n " ) ;
940
+ ps1 . Append ( "\t if ($errmsg) {Write-Error \" $errmsg on line $line\" -TargetObject $row }\r \n }\r \n \r \n " ) ;
931
941
}
932
942
933
943
if ( exampleDate == "" ) exampleDate = "myDateField" ;
@@ -939,17 +949,19 @@ public static void GeneratePowerShell(CsvDefinition csvdef)
939
949
940
950
ps1 . Append ( "# Reorder or remove columns (edit code below)\r \n " ) ;
941
951
ps1 . Append ( "$csvnew = $csvdata | ForEach-Object {\r \n " ) ;
942
- ps1 . Append ( "\t # Reorder columns\r \n " ) ;
952
+ ps1 . Append ( "\t [PSCustomObject]@{\r \n " ) ;
953
+ ps1 . Append ( "\t \t # Reorder columns\r \n " ) ;
943
954
ps1 . Append ( col_order ) ;
944
- ps1 . Append ( "#\t # Add columns\r \n " ) ;
945
- ps1 . Append ( string . Format ( "#\t {0} = $_.{0}.ToString(\" yyyy-MM-dd\" )\r \n " , exampleDate ) ) ;
946
- ps1 . Append ( "#\t YesNo_code = switch ($_.YesNoValue) {\r \n " ) ;
947
- ps1 . Append ( "#\t \t \t \" No\" {\" 0\" }\r \n " ) ;
948
- ps1 . Append ( "#\t \t \t \" Yes\" {\" 1\" }\r \n " ) ;
949
- ps1 . Append ( "#\t \t \t default {$_}\r \n " ) ;
950
- ps1 . Append ( "#\t \t }\r \n " ) ;
951
- ps1 . Append ( "#\t bmi = [math]::Round($_.Weight / ($_.Height * $_.Height), 2)\r \n " ) ;
952
- ps1 . Append ( "#\t center_patient = $_.centercode.SubString(0, 2) + \" -\" + patientcode # '01-123' etc\r \n " ) ;
955
+ ps1 . Append ( "#\t \t # Add columns\r \n " ) ;
956
+ ps1 . Append ( string . Format ( "#\t \t {0} = $_.{1}.ToString(\" yyyy-MM-dd\" )\r \n " , exampleDate . PadRight ( MAX_COLNAME , ' ' ) , exampleDate ) ) ;
957
+ ps1 . Append ( string . Format ( "#\t \t {0} = switch ($_.YesNoValue) {{\r \n " , "YesNo_code" . PadRight ( MAX_COLNAME , ' ' ) ) ) ;
958
+ ps1 . Append ( "#\t \t \t \t \" No\" {\" 0\" }\r \n " ) ;
959
+ ps1 . Append ( "#\t \t \t \t \" Yes\" {\" 1\" }\r \n " ) ;
960
+ ps1 . Append ( "#\t \t \t \t default {$_}\r \n " ) ;
961
+ ps1 . Append ( "#\t \t \t }\r \n " ) ;
962
+ ps1 . Append ( string . Format ( "#\t \t {0} = [math]::Round($_.Weight / ($_.Height * $_.Height), 2)\r \n " , "bmi" . PadRight ( MAX_COLNAME , ' ' ) ) ) ;
963
+ ps1 . Append ( string . Format ( "#\t \t {0} = $_.centercode.SubString(0, 2) + \" -\" + patientcode # '01-123' etc\r \n " , "cent_pat" . PadRight ( MAX_COLNAME , ' ' ) ) ) ;
964
+ ps1 . Append ( "\t }\r \n " ) ;
953
965
ps1 . Append ( "}\r \n \r \n " ) ;
954
966
955
967
ps1 . Append ( "# Merge datasets example, to join on multiple columns use a list, for example: on=['patient_id', 'center_id']\r \n " ) ;
0 commit comments