|
| 1 | +// |
| 2 | +// CSVObject.m |
| 3 | +// QuickLookCSV |
| 4 | +// |
| 5 | +// Created by Pascal Pfiffner on 03.07.09. |
| 6 | +// This sourcecode is released under the Apache License, Version 2.0 |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0.html |
| 8 | +// |
| 9 | + |
| 10 | +#import "CSVObject.h" |
| 11 | +#import "CSVRowObject.h" |
| 12 | + |
| 13 | + |
| 14 | +@implementation CSVObject |
| 15 | + |
| 16 | +@synthesize separator, rows, columnKeys; |
| 17 | + |
| 18 | + |
| 19 | +- (id) init |
| 20 | +{ |
| 21 | + self = [super init]; |
| 22 | + if(nil != self) { |
| 23 | + self.separator = @","; |
| 24 | + } |
| 25 | + |
| 26 | + return self; |
| 27 | +} |
| 28 | + |
| 29 | ++ (CSVObject *) csvObject |
| 30 | +{ |
| 31 | + return [[[CSVObject alloc] init] autorelease]; |
| 32 | +} |
| 33 | + |
| 34 | +- (void) dealloc |
| 35 | +{ |
| 36 | + self.separator = nil; |
| 37 | + self.rows = nil; |
| 38 | + self.columnKeys = nil; |
| 39 | + |
| 40 | + [super dealloc]; |
| 41 | +} |
| 42 | +#pragma mark - |
| 43 | + |
| 44 | + |
| 45 | + |
| 46 | +#pragma mark Parsing from String |
| 47 | +- (NSUInteger) numRowsFromCSVString:(NSString *)string error:(NSError **)error |
| 48 | +{ |
| 49 | + return [self numRowsFromCSVString:string maxRows:0 error:error]; |
| 50 | +} |
| 51 | + |
| 52 | +- (NSUInteger) numRowsFromCSVString:(NSString *)string maxRows:(NSUInteger)maxRows error:(NSError **)error |
| 53 | +{ |
| 54 | + NSUInteger numRows = 0; |
| 55 | + |
| 56 | + // String is non-empty |
| 57 | + if([string length] > 0) { |
| 58 | + NSMutableArray *thisRows = [NSMutableArray array]; |
| 59 | + NSMutableArray *thisColumnKeys = [NSMutableArray array]; |
| 60 | + |
| 61 | + // Check whether the file uses ";" as separator |
| 62 | + if(![separator isEqualToString:@";"]) { |
| 63 | + NSUInteger testStringLength = ([string length] > 100) ? 100 : [string length]; |
| 64 | + NSString *testString = [string substringToIndex:testStringLength]; |
| 65 | + if([[testString componentsSeparatedByString:@";"] count] > [[testString componentsSeparatedByString:separator] count]) { |
| 66 | + self.separator = @";"; |
| 67 | + } |
| 68 | + } |
| 69 | + |
| 70 | + // Get newline character set |
| 71 | + NSMutableCharacterSet *newlineCharacterSet = (id)[NSMutableCharacterSet whitespaceAndNewlineCharacterSet]; |
| 72 | + [newlineCharacterSet formIntersectionWithCharacterSet:[[NSCharacterSet whitespaceCharacterSet] invertedSet]]; |
| 73 | + |
| 74 | + // Characters where the parser should stop |
| 75 | + NSMutableCharacterSet *importantCharactersSet = (id)[NSMutableCharacterSet characterSetWithCharactersInString:[NSString stringWithFormat:@"%@\"", separator]]; |
| 76 | + [importantCharactersSet formUnionWithCharacterSet:newlineCharacterSet]; |
| 77 | + |
| 78 | + |
| 79 | + // Create scanner and scan the string |
| 80 | + // ideas for the following block from Drew McCormack >> http://www.macresearch.org/cocoa-scientists-part-xxvi-parsing-csv-data |
| 81 | + BOOL insideQuotes = NO; // needed to determine whether we're inside doublequotes |
| 82 | + BOOL finishedRow = NO; // used for the inner while loop |
| 83 | + BOOL isNewColumn = NO; |
| 84 | + NSMutableDictionary *columns = nil; |
| 85 | + NSMutableString *currentCellString = [NSMutableString string]; |
| 86 | + NSUInteger colIndex = 0; |
| 87 | + |
| 88 | + NSScanner *scanner = [NSScanner scannerWithString:string]; |
| 89 | + [scanner setCharactersToBeSkipped:nil]; |
| 90 | + while(![scanner isAtEnd]) { |
| 91 | + |
| 92 | + // we'll end up here after every row |
| 93 | + insideQuotes = NO; |
| 94 | + finishedRow = NO; |
| 95 | + columns = ([thisColumnKeys count] > 0) ? [NSMutableDictionary dictionaryWithCapacity:[thisColumnKeys count]] : [NSMutableDictionary dictionary]; |
| 96 | + [currentCellString setString:@""]; |
| 97 | + colIndex = 0; |
| 98 | + |
| 99 | + // Scan row up to the next terminator |
| 100 | + while(!finishedRow) { |
| 101 | + NSString *tempString; |
| 102 | + NSString *colKey; |
| 103 | + if([thisColumnKeys count] > colIndex) { |
| 104 | + colKey = [thisColumnKeys objectAtIndex:colIndex]; |
| 105 | + isNewColumn = NO; |
| 106 | + } |
| 107 | + else { |
| 108 | + colKey = [NSString stringWithFormat:@"col_%i", colIndex]; |
| 109 | + isNewColumn = YES; |
| 110 | + } |
| 111 | + |
| 112 | + |
| 113 | + // Scan characters into our string |
| 114 | + if([scanner scanUpToCharactersFromSet:importantCharactersSet intoString:&tempString] ) { |
| 115 | + [currentCellString appendString:tempString]; |
| 116 | + } |
| 117 | + |
| 118 | + |
| 119 | + // found the separator |
| 120 | + if([scanner scanString:separator intoString:NULL]) { |
| 121 | + if(insideQuotes) { // Separator character inside double quotes |
| 122 | + [currentCellString appendString:separator]; |
| 123 | + } |
| 124 | + else { // This is a column separating comma |
| 125 | + [columns setObject:[currentCellString copy] forKey:colKey]; |
| 126 | + if(isNewColumn) { |
| 127 | + [thisColumnKeys addObject:colKey]; |
| 128 | + } |
| 129 | + |
| 130 | + // on to the next column/cell! |
| 131 | + [currentCellString setString:@""]; |
| 132 | + [scanner scanCharactersFromSet:[NSCharacterSet whitespaceCharacterSet] intoString:NULL]; |
| 133 | + colIndex++; |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + |
| 138 | + // found a doublequote (") |
| 139 | + else if([scanner scanString:@"\"" intoString:NULL]) { |
| 140 | + if(insideQuotes && [scanner scanString:@"\"" intoString:NULL]) { // Replace double - doublequotes with a single doublequote in our string. |
| 141 | + [currentCellString appendString:@"\""]; |
| 142 | + } |
| 143 | + else { // Start or end of a quoted string. |
| 144 | + insideQuotes = !insideQuotes; |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | + |
| 149 | + // found a newline |
| 150 | + else if([scanner scanCharactersFromSet:newlineCharacterSet intoString:&tempString]) { |
| 151 | + if(insideQuotes) { // We're inside quotes - add line break to column text |
| 152 | + [currentCellString appendString:tempString]; |
| 153 | + } |
| 154 | + else { // End of row |
| 155 | + [columns setObject:[currentCellString copy] forKey:colKey]; |
| 156 | + if(isNewColumn) { |
| 157 | + [thisColumnKeys addObject:colKey]; |
| 158 | + } |
| 159 | + |
| 160 | + finishedRow = YES; |
| 161 | + } |
| 162 | + } |
| 163 | + |
| 164 | + |
| 165 | + // found the end |
| 166 | + else if([scanner isAtEnd]) { |
| 167 | + [columns setObject:[currentCellString copy] forKey:colKey]; |
| 168 | + if(isNewColumn) { |
| 169 | + [thisColumnKeys addObject:colKey]; |
| 170 | + } |
| 171 | + |
| 172 | + finishedRow = YES; |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + |
| 177 | + // one row scanned - add to the lines array |
| 178 | + if([columns count] > 0) { |
| 179 | + CSVRowObject *newRow = [CSVRowObject rowFromDict:columns]; |
| 180 | + [thisRows addObject:newRow]; |
| 181 | + } |
| 182 | + |
| 183 | + numRows++; |
| 184 | + if((maxRows > 0) && (numRows > maxRows)) { |
| 185 | + break; |
| 186 | + } |
| 187 | + } |
| 188 | + |
| 189 | + // finished scanning our string |
| 190 | + self.rows = thisRows; |
| 191 | + self.columnKeys = thisColumnKeys; |
| 192 | + } |
| 193 | + |
| 194 | + // empty string |
| 195 | + else if(nil != error) { |
| 196 | + NSDictionary *errorDict = [NSDictionary dictionaryWithObject:@"Cannot parse an empty string" forKey:@"userInfo"]; |
| 197 | + *error = [NSError errorWithDomain:NSCocoaErrorDomain code:1 userInfo:errorDict]; |
| 198 | + } |
| 199 | + |
| 200 | + return numRows; |
| 201 | +} |
| 202 | + |
| 203 | + |
| 204 | +@end |
0 commit comments