Skip to content

Commit dae5dec

Browse files
committed
Allow fields/columns projections when parsing header.
When using a CSV header, we might find fields in a different order than the target table columns, and maybe not all of the fields are going to be read. Take account of the header we read rather than expecting the header to look like the target table definition. Fix #888.
1 parent 1306b4c commit dae5dec

File tree

6 files changed

+33
-25
lines changed

6 files changed

+33
-25
lines changed

src/load/load-file.lisp

+2
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@
9898
(loop :for path-spec :in path-list
9999
:count t
100100
:do (let ((table-source (clone-copy-for copy path-spec)))
101+
(when (and (header table-source) (null (fields table-source)))
102+
(parse-header table-source))
101103
(incf task-count
102104
(copy-from table-source
103105
:concurrency concurrency

src/sources/common/api.lisp

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@
9595
:initform nil)) ;
9696
(:documentation "pgloader Multiple Files Data Source (csv, fixed, copy)."))
9797

98-
(defgeneric parse-header (md-copy header)
98+
(defgeneric parse-header (md-copy)
9999
(:documentation "Parse the file header and return a list of fields."))
100100

101101
(defgeneric process-rows (md-copy stream process-fn)

src/sources/common/md-methods.lisp

+3-7
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
(in-package #:pgloader.sources)
66

7-
(defmethod parse-header ((copy md-copy) header)
7+
(defmethod parse-header ((copy md-copy))
88
"Unsupported by default, to be implemented in each md-copy subclass."
99
(error "Parsing the header of a ~s is not implemented yet." (type-of copy)))
1010

@@ -59,12 +59,8 @@
5959
;; about skipping the first line
6060
(loop :repeat (skip-lines copy) :do (read-line input nil nil))
6161

62-
;; we might now have to read the fields from the header line
63-
(when (header copy)
64-
(setf (fields copy)
65-
(parse-header copy (read-line input nil nil)))
66-
67-
(log-message :debug "Parsed header columns ~s" (fields copy)))
62+
;; we might now have to skip the header line
63+
(when (header copy) (read-line input nil nil))
6864

6965
;; read in the text file, split it into columns
7066
(process-rows copy input process-row-fn))))

src/sources/csv/csv.lisp

+21-11
Original file line numberDiff line numberDiff line change
@@ -57,19 +57,29 @@
5757
;;;
5858
;;; Read a file format in CSV format, and call given function on each line.
5959
;;;
60-
(defmethod parse-header ((csv copy-csv) header)
60+
(defmethod parse-header ((csv copy-csv))
6161
"Parse the header line given csv setup."
6262
;; a field entry is a list of field name and options
63-
(mapcar #'list
64-
(car ; parsing a single line
65-
(cl-csv:read-csv header
66-
:separator (csv-separator csv)
67-
:quote (csv-quote csv)
68-
:escape (csv-escape csv)
69-
:unquoted-empty-string-is-nil t
70-
:quoted-empty-string-is-nil nil
71-
:trim-outer-whitespace (csv-trim-blanks csv)
72-
:newline (csv-newline csv)))))
63+
(with-connection (cnx (source csv)
64+
:direction :input
65+
:external-format (encoding csv)
66+
:if-does-not-exist nil)
67+
(let ((input (md-strm cnx)))
68+
(loop :repeat (skip-lines csv) :do (read-line input nil nil))
69+
(let* ((header-line (read-line input nil nil))
70+
(field-name-list
71+
(mapcar #'list ; we need each field to be a list
72+
(car ; parsing a single line
73+
(cl-csv:read-csv header-line
74+
:separator (csv-separator csv)
75+
:quote (csv-quote csv)
76+
:escape (csv-escape csv)
77+
:unquoted-empty-string-is-nil t
78+
:quoted-empty-string-is-nil nil
79+
:trim-outer-whitespace (csv-trim-blanks csv)
80+
:newline (csv-newline csv))))))
81+
(log-message :notice "Parsed header columns ~s" (fields csv))
82+
(setf (fields csv) field-name-list )))))
7383

7484
(defmethod process-rows ((csv copy-csv) stream process-fn)
7585
"Process rows from STREAM according to COPY specifications and PROCESS-FN."

test/csv-header.load

+4-4
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ LOAD CSV
1515
"repl$grpid" text,
1616
"repl$id" text,
1717
another text,
18-
fields text
18+
fields integer
1919
)
2020
$$;
2121

2222

23-
somefields,rekplcode,repl$grpid,repl$id,another,fields
24-
a,b,c,d,e,f
25-
foo,bar,baz,quux,foobar,fizzbuzz
23+
somefields,rekplcode,repl$grpid,repl$id,fields,another
24+
a,b,c,d,1,e
25+
foo,bar,baz,quux,2,foobar

test/regress/expected/csv-header.out

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
a b c d e f
2-
foo bar baz quux foobar fizzbuzz
1+
a b c d e 1
2+
foo bar baz quux foobar 2

0 commit comments

Comments
 (0)