|
7 | 7 |
|
8 | 8 | namespace HashFields.Data.Csv
|
9 | 9 | {
|
| 10 | + /// <summary> |
| 11 | + /// Helper to work with delimited tabular data as columns rather than rows. |
| 12 | + /// </summary> |
| 13 | + /// <see cref="IEquatable{T}" /> |
10 | 14 | internal class Columnar : IEquatable<Columnar>
|
11 | 15 | {
|
12 |
| - private readonly List<string> _headers = new(); |
| 16 | + private readonly List<string> _header = new(); |
13 | 17 | private readonly Dictionary<string, List<string>> _data = new();
|
| 18 | + private readonly string _delimiter; |
14 | 19 |
|
| 20 | + /// <summary> |
| 21 | + /// The column of values by column name. |
| 22 | + /// </summary> |
| 23 | + /// <param name="key">The name of the column.</param> |
| 24 | + /// <returns>A list representing the column's values.</returns> |
15 | 25 | public List<string> this[string key] { get => _data[key]; }
|
16 |
| - public List<string> this[int index] { get => _data[_headers[index]]; } |
17 |
| - public List<string> Header { get => _headers.ToList(); } |
18 |
| - public List<List<string>> Columns { get => _data.Values.ToList(); } |
19 | 26 |
|
20 |
| - public Columnar(string delimiter) : this(new MemoryStream(), delimiter) |
21 |
| - { |
22 |
| - } |
| 27 | + /// <summary> |
| 28 | + /// The column of values by column index. |
| 29 | + /// </summary> |
| 30 | + /// <param name="index">The 0-based index of the column.</param> |
| 31 | + /// <returns>A list representing the column's values.</returns> |
| 32 | + public List<string> this[int index] { get => _data[_header[index]]; } |
23 | 33 |
|
| 34 | + /// <summary> |
| 35 | + /// The list of column names. |
| 36 | + /// </summary> |
| 37 | + public List<string> Header { get => _header.ToList(); } |
| 38 | + |
| 39 | + /// <summary> |
| 40 | + /// The list of data columns. |
| 41 | + /// </summary> |
| 42 | + public List<List<string>> Columns { get => _data.Values.ToList(); } |
| 43 | + |
| 44 | + /// <summary> |
| 45 | + /// Initialize a new <c>Columnar</c> for delimited data. |
| 46 | + /// </summary> |
| 47 | + /// <param name="stream">The <c>Stream</c> of data to read into this <c>Columnar</c>.</param> |
| 48 | + /// <param name="delimiter">The delimiter used between fields in the data.</param> |
24 | 49 | public Columnar(Stream stream, string delimiter)
|
25 | 50 | {
|
26 | 51 | if (stream is not null)
|
27 | 52 | {
|
28 |
| - var tuple = Parse(stream, delimiter); |
| 53 | + _delimiter = delimiter; |
| 54 | + |
| 55 | + var tuple = Parse(stream, _delimiter); |
29 | 56 |
|
30 |
| - _headers = tuple.Item1; |
| 57 | + _header = tuple.Item1; |
31 | 58 | _data = tuple.Item2;
|
32 | 59 | }
|
33 | 60 | }
|
34 | 61 |
|
| 62 | + /// <summary> |
| 63 | + /// Call a function for each value in the specified columns. |
| 64 | + /// </summary> |
| 65 | + /// <param name="func"> |
| 66 | + /// A function taking a string as input and returning a string. |
| 67 | + /// Each value in the column is passed through this function and |
| 68 | + /// overwritten in-place. |
| 69 | + /// </param> |
| 70 | + /// <param name="columns">The list of columns to apply the function on.</param> |
35 | 71 | public void Apply(Func<string, string> func, params string[] columns)
|
36 | 72 | {
|
37 |
| - foreach (var column in _headers.Intersect(columns).ToArray()) |
| 73 | + foreach (var column in _header.Intersect(columns).ToArray()) |
38 | 74 | {
|
39 | 75 | _data[column] = _data[column].ConvertAll(s => func(s));
|
40 | 76 | }
|
41 | 77 | }
|
42 | 78 |
|
43 |
| - public bool Equals(Columnar other) |
44 |
| - { |
45 |
| - if (other is null) |
46 |
| - { |
47 |
| - return false; |
48 |
| - } |
49 |
| - |
50 |
| - if (!_headers.SequenceEqual(other._headers)) |
51 |
| - { |
52 |
| - return false; |
53 |
| - } |
54 |
| - |
55 |
| - foreach (var column in _data) |
56 |
| - { |
57 |
| - if (!column.Value.SequenceEqual(other._data[column.Key])) |
58 |
| - { |
59 |
| - return false; |
60 |
| - } |
61 |
| - } |
62 |
| - |
63 |
| - return true; |
64 |
| - } |
65 |
| - |
66 |
| - public override bool Equals(object obj) |
67 |
| - { |
68 |
| - if (obj is null) |
69 |
| - { |
70 |
| - return false; |
71 |
| - } |
72 |
| - |
73 |
| - if (obj is not Columnar columnar) |
74 |
| - { |
75 |
| - return false; |
76 |
| - } |
77 |
| - |
78 |
| - return Equals(columnar); |
79 |
| - } |
80 |
| - |
81 |
| - public override int GetHashCode() |
82 |
| - { |
83 |
| - var hashcode = new HashCode(); |
84 |
| - foreach (var header in _headers) |
85 |
| - { |
86 |
| - hashcode.Add(header); |
87 |
| - } |
88 |
| - foreach (var column in _data.Values) |
89 |
| - { |
90 |
| - foreach (var val in column) |
91 |
| - { |
92 |
| - hashcode.Add(val); |
93 |
| - } |
94 |
| - } |
95 |
| - return hashcode.ToHashCode(); |
96 |
| - } |
97 |
| - |
| 79 | + /// <summary> |
| 80 | + /// Remove the named columns from this <c>Columnar</c> data. |
| 81 | + /// The column names should match those found in the <c>Header</c>. |
| 82 | + /// </summary> |
| 83 | + /// <seealso cref="Header" /> |
| 84 | + /// <param name="columns">The list of column names to remove.</param> |
98 | 85 | public void Remove(params string[] columns)
|
99 | 86 | {
|
100 |
| - foreach (var column in _headers.Intersect(columns).ToArray()) |
| 87 | + // find intersection of the real header names and those for removal |
| 88 | + // create a new array from this intersection so we don't loop over |
| 89 | + // the collection we are modifying! |
| 90 | + foreach (var column in _header.Intersect(columns).ToArray()) |
101 | 91 | {
|
102 |
| - _headers.Remove(column); |
| 92 | + _header.Remove(column); |
103 | 93 | _data.Remove(column);
|
104 | 94 | }
|
105 | 95 | }
|
106 | 96 |
|
107 |
| - public void Write(Stream destination) |
108 |
| - { |
109 |
| - using var sw = new StreamWriter(destination); |
110 |
| - foreach (var row in Rows()) |
111 |
| - { |
112 |
| - sw.WriteLine(String.Join(",", row)); |
113 |
| - } |
114 |
| - } |
115 |
| - |
116 |
| - private List<List<string>> Rows() |
| 97 | + /// <summary> |
| 98 | + /// Compute the list of data rows from the current state of this <c>Columnar</c>. |
| 99 | + /// </summary> |
| 100 | + public List<List<string>> Rows() |
117 | 101 | {
|
| 102 | + // find the column with the longest length (N) - the number of rows |
| 103 | + // create a list of N lists to represent the rows |
118 | 104 | var rows = Enumerable.Range(0, Columns.Max(c => c.Count))
|
119 | 105 | .Select(_ => new List<string>())
|
120 | 106 | .ToList();
|
121 | 107 |
|
122 | 108 | foreach (var column in Columns)
|
123 | 109 | {
|
124 |
| - foreach (var val in column) |
| 110 | + // copy values for this column into each row |
| 111 | + for (int i = 0; i < column.Count; i++) |
125 | 112 | {
|
126 |
| - rows[column.IndexOf(val)].Add(val); |
| 113 | + // rows[i] is a list representing the ith row |
| 114 | + // append the column value to the end of the row list |
| 115 | + // the "next" position in the row |
| 116 | + rows[i].Add(column[i]); |
127 | 117 | }
|
128 | 118 | }
|
129 | 119 |
|
| 120 | + // insert the header row first |
130 | 121 | rows.Insert(0, Header);
|
131 | 122 |
|
132 | 123 | return rows;
|
133 | 124 | }
|
134 | 125 |
|
| 126 | + /// <summary> |
| 127 | + /// Write this <c>Columnar</c> data to a stream as delimited tabular data. |
| 128 | + /// </summary> |
| 129 | + /// <param name="destination">A writable <c>Steam</c> target for this <c>Columnar</c>.</param> |
| 130 | + public void Write(Stream destination) |
| 131 | + { |
| 132 | + using var sw = new StreamWriter(destination); |
| 133 | + foreach (var row in Rows()) |
| 134 | + { |
| 135 | + sw.WriteLine(String.Join(_delimiter, row)); |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + /// <summary> |
| 140 | + /// Read delimited data from a stream and convert into columnar format. |
| 141 | + /// </summary> |
| 142 | + /// <param name="stream">The source of data.</param> |
| 143 | + /// <param name="delimiter">The delimiter used to separate fields in the data.</param> |
| 144 | + /// <returns>A <c>Tuple</c> containing two items: |
| 145 | + /// <list type="bullet"> |
| 146 | + /// <item> |
| 147 | + /// <term><c>List{String}</c></term> |
| 148 | + /// <description>The ordered header row of column names.</description> |
| 149 | + /// </item> |
| 150 | + /// <item> |
| 151 | + /// <term><c>Dictionary{String,List{String}}</c></term> |
| 152 | + /// <description> |
| 153 | + /// The data columns, where the key is the column name |
| 154 | + /// and the value is the list of values in the column. |
| 155 | + /// </description> |
| 156 | + /// </item> |
| 157 | + /// </list> |
| 158 | + /// </returns> |
135 | 159 | private static Tuple<List<string>, Dictionary<string, List<string>>> Parse(Stream stream, string delimiter)
|
136 | 160 | {
|
137 | 161 | var header = new List<string>();
|
@@ -170,5 +194,64 @@ private static Tuple<List<string>, Dictionary<string, List<string>>> Parse(Strea
|
170 | 194 | )
|
171 | 195 | );
|
172 | 196 | }
|
| 197 | + |
| 198 | + #region IEquatable<Columnar> |
| 199 | + |
| 200 | + public bool Equals(Columnar other) |
| 201 | + { |
| 202 | + if (other is null) |
| 203 | + { |
| 204 | + return false; |
| 205 | + } |
| 206 | + |
| 207 | + if (!_header.SequenceEqual(other._header)) |
| 208 | + { |
| 209 | + return false; |
| 210 | + } |
| 211 | + |
| 212 | + foreach (var column in _data) |
| 213 | + { |
| 214 | + if (!column.Value.SequenceEqual(other._data[column.Key])) |
| 215 | + { |
| 216 | + return false; |
| 217 | + } |
| 218 | + } |
| 219 | + |
| 220 | + return true; |
| 221 | + } |
| 222 | + |
| 223 | + public override bool Equals(object obj) |
| 224 | + { |
| 225 | + if (obj is null) |
| 226 | + { |
| 227 | + return false; |
| 228 | + } |
| 229 | + |
| 230 | + if (obj is not Columnar columnar) |
| 231 | + { |
| 232 | + return false; |
| 233 | + } |
| 234 | + |
| 235 | + return Equals(columnar); |
| 236 | + } |
| 237 | + |
| 238 | + public override int GetHashCode() |
| 239 | + { |
| 240 | + var hashcode = new HashCode(); |
| 241 | + foreach (var header in _header) |
| 242 | + { |
| 243 | + hashcode.Add(header); |
| 244 | + } |
| 245 | + foreach (var column in _data.Values) |
| 246 | + { |
| 247 | + foreach (var val in column) |
| 248 | + { |
| 249 | + hashcode.Add(val); |
| 250 | + } |
| 251 | + } |
| 252 | + return hashcode.ToHashCode(); |
| 253 | + } |
| 254 | + |
| 255 | + #endregion |
173 | 256 | }
|
174 | 257 | }
|
0 commit comments