Skip to content

Commit eb8c45c

Browse files
committed
Improved enumeration detection #89
Improved enumeration detection, value in a coded column must appear at least 2 tiumes or more on average
1 parent c51226b commit eb8c45c

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

CSVLintNppPlugin/CsvLint/CsvDefinition.cs

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,27 +142,36 @@ public void AddCodedValues(Dictionary<string, int> slcodes)
142142
{
143143
// if any contains Carriage Retunr/Line Feed, then it's probably text not codes
144144
var containsCrLf = false;
145+
var total = 0;
145146
foreach (var s in slcodes)
146147
{
147148
if (s.Key.Contains('\r') || s.Key.Contains('\n')) containsCrLf = true;
149+
total += s.Value;
148150
}
149151

150152
// check if could be coded values
151153
if ( (containsCrLf == false) && (slcodes.Count > 0) && (slcodes.Count <= Main.Settings.UniqueValuesMax) )
152154
{
153-
// set coded values
154-
this.isCodedValue = true;
155+
// check enumeration ratio, this is to avoid interpreting a column with 100 rows and only 3 text values to be interpreted as enumeration
156+
var ratio = 1.0 * total / slcodes.Count;
155157

156-
this.CodedList = new List<string>();
157-
158-
foreach (var s in slcodes)
158+
// in a coded values column each unique value must be used at least 2 tiumes or more (on average)
159+
if (ratio >= 2.0)
159160
{
160-
this.CodedList.Add(s.Key);
161-
}
161+
// set coded values
162+
this.isCodedValue = true;
162163

163-
// Sort list, with a hack to sort integers correctly
164-
// i.e. list of integers should not be sorted like [1, 10, 11, 2, 3, .. etc]
165-
this.CodedList.Sort(new StrCmpLogicalComparer());
164+
this.CodedList = new List<string>();
165+
166+
foreach (var s in slcodes)
167+
{
168+
this.CodedList.Add(s.Key);
169+
}
170+
171+
// Sort list, with a hack to sort integers correctly
172+
// i.e. list of integers should not be sorted like [1, 10, 11, 2, 3, .. etc]
173+
this.CodedList.Sort(new StrCmpLogicalComparer());
174+
}
166175
}
167176
}
168177
}

0 commit comments

Comments
 (0)