Skip to content

Commit da5bcbb

Browse files
authored
Fix #154. We already have the ability to only read certain columns via manually iterating CSV.File or using Tables.select, but there was another issue where this file had an invalid column name that dies while trying to normalize the name. (#382)
1 parent 1c1f406 commit da5bcbb

File tree

3 files changed

+10
-1
lines changed

3 files changed

+10
-1
lines changed

src/filedetection.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ normalizename(name::Symbol) = name
88
function normalizename(name::String)
99
uname = strip(Unicode.normalize(name))
1010
id = Base.isidentifier(uname) ? uname : map(c->Base.is_id_char(c) ? c : '_', uname)
11-
cleansed = string((!Base.is_id_start_char(id[1]) || id in RESERVED) ? "_" : "", id)
11+
cleansed = string((isempty(id) || !Base.is_id_start_char(id[1]) || id in RESERVED) ? "_" : "", id)
1212
return Symbol(replace(cleansed, r"(_)\1+"=>"_"))
1313
end
1414

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
a, b, ,
2+
0, 1, , comment
3+
12, 5, ,

test/testfiles/testfiles.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,4 +490,10 @@ testfiles = [
490490
NamedTuple{(:FAMILY, :PERSON, :MARKER, :RATIO), Tuple{String, String, String, Float64}},
491491
(FAMILY = ["A", "A", "A", "A", "A", "A", "EPGP013951", "EPGP014065", "EPGP014065", "EPGP014065", "EP07", "83346_EPGP014244", "83346_EPGP014244", "83506", "87001"], PERSON = ["EP01223", "EP01227", "EP01228", "EP01228", "EP01227", "EP01228", "EPGP013952", "EPGP014066", "EPGP014065", "EPGP014068", "706", "T3011", "T3231", "T17255", "301"], MARKER = ["rs710865", "rs11249215", "rs11249215", "rs10903129", "rs621559", "rs1514175", "rs773564", "rs2794520", "rs296547", "rs296547", "rs10927875", "rs2251760", "rs2251760", "rs2475335", "rs2413583"], RATIO = [0.0214, 0.0107, 0.00253, 0.0116, 0.00842, 0.0202, 0.00955, 0.0193, 0.0135, 0.0239, 0.0157, 0.0154, 0.0154, 0.00784, 0.0112])
492492
),
493+
# #154
494+
("test_file_issue_154.csv", (allowmissing=:auto, normalizenames=true),
495+
(2, 4),
496+
NamedTuple{(:a, :b, :_, :Column4), Tuple{Int64, Int64, Missing, Union{Missing, String}}},
497+
(a = [0, 12], b = [1, 5], _ = Missing[missing, missing], Column4 = Union{Missing, String}[" comment ", missing])
498+
),
493499
];

0 commit comments

Comments
 (0)