From c3c9cf3380b36634ca02767a1fb903416012bdce Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 2 Jan 2023 15:21:57 -0500 Subject: [PATCH 01/36] feat: incomplete parser --- lib/formdata/constants.js | 30 ++++++ lib/formdata/parser.js | 218 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 lib/formdata/constants.js create mode 100644 lib/formdata/parser.js diff --git a/lib/formdata/constants.js b/lib/formdata/constants.js new file mode 100644 index 00000000000..fed0a6071e1 --- /dev/null +++ b/lib/formdata/constants.js @@ -0,0 +1,30 @@ +'use strict' + +const states = { + INITIAL: 0, + BOUNDARY: 1, + READ_HEADERS: 2, + READ_BODY: 3 +} + +const headerStates = { + DEFAULT: -1, // no match + FIRST: 0, + SECOND: 1, + THIRD: 2 +} + +const chars = { + '-': '-'.charCodeAt(0), + cr: '\r'.charCodeAt(0), + lf: '\n'.charCodeAt(0) +} + +const dashes = Buffer.from('--') + +module.exports = { + states, + chars, + headerStates, + dashes +} diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js new file mode 100644 index 00000000000..e91d8a337df --- /dev/null +++ b/lib/formdata/parser.js @@ -0,0 +1,218 @@ +'use strict' + +const { Writable } = require('stream') +const { states, chars, headerStates, dashes } = require('./constants') +const { parseMIMEType } = require('../fetch/dataURL') + +class FormDataParser extends Writable { + /** @type {Buffer[]} */ + #buffers = [] + #byteOffset = 0 + + #state = states.INITIAL + + /** @type {Buffer} */ + #boundary + + #info = { + headerState: headerStates.DEFAULT, + rawHeaders: null, + bodyState: { + buffersChecked: 0, + byteOffset: 0 + }, + bodyChunks: [] + } + + constructor (opts) { + super(opts) + + const contentType = opts.headers.get?.('content-type') ?? opts.headers['content-type'] + const mimeType = contentType ? parseMIMEType(contentType) : null + + if (mimeType === null || mimeType.essence !== 'multipart/form-data') { + this.destroy(new Error('Invalid mimetype essence.')) + return + } else if (!mimeType.parameters.has('boundary')) { + this.destroy(new Error('No boundary parameter.')) + } + + this.#boundary = Buffer.from(mimeType.parameters.get('boundary')) + } + + /** + * @param {Buffer} chunk + * @param {() => void} callback + */ + _write (chunk, _, callback) { + this.#buffers.push(chunk) + this.#byteOffset += chunk.length + + this.run(callback) + } + + /** + * @param {() => void} callback + */ + run (callback) { + while (true) { + if (this.#state === states.INITIAL) { + if (this.#byteOffset < 2) { + return callback() + } + + const bytes = this.consume(2) + + if (bytes[0] !== chars['-'] || bytes[1] !== chars['-']) { + this.destroy(new Error('FormData should start with --')) + return + } + + this.#state = states.BOUNDARY + } else if (this.#state === states.BOUNDARY) { + if (this.#byteOffset < this.#boundary.length + 2) { + return callback() + } + + const bytes = this.consume(this.#boundary.length) + const nextBytes = this.consume(2) // \r\n OR -- + + if (bytes.toString() !== this.#boundary) { + this.destroy(new Error('Received invalid boundary')) + return + } else if (nextBytes?.[0] !== chars.cr || nextBytes?.[1] !== chars.lf) { + if (nextBytes?.[0] === chars['-'] && nextBytes?.[1] === chars['-']) { + // Done parsing form-data + // TODO: emit event or something + this.end() + return + } + + this.destroy(new Error('Boundary did not end with CRLF')) + return + } + + this.#state = states.READ_HEADERS + } else if (this.#state === states.READ_HEADERS) { + // There can be an arbitrary amount of headers and each one has an + // arbitrary length. Therefore it's easier to read the headers, and + // then parse once we receive 2 CRLFs which marks the body's start. + + if (this.#byteOffset === 0) { + return callback() + } + + let buffersToRemove = 0 + // number of bytes to remove from the last buffer + let bytesToRemove = 0 + + done: { + for (let i = 0; i < this.#buffers.length; i++) { + const buffer = this.#buffers[i] + + for (let j = 0; j < buffer.length; j++) { + const byte = buffer[j] + const state = this.#info.headerState + + if (byte === chars.cr && state === headerStates.DEFAULT) { + this.#info.headerState = headerStates.FIRST + } else if (byte === chars.lf && state === headerStates.FIRST) { + this.#info.headerState = headerStates.SECOND + } else if (byte === chars.cr && state === headerStates.SECOND) { + this.#info.headerState = headerStates.THIRD + } else if (byte === chars.lf && state === headerStates.THIRD) { + // Got \r\n\r\n which marks the end of the headers. + this.#state = states.READ_BODY + + break done + } else { + bytesToRemove += 1 + this.#info.headerState = headerStates.DEFAULT + } + } + + bytesToRemove = 0 + buffersToRemove += 1 + } + } + + const headers = [] + let headersLength = 0 + + while (buffersToRemove-- > 0) { + const buffer = this.#buffers.shift() + headers.push(buffer) + headersLength += buffer.length + } + + if (bytesToRemove > 0) { + const header = this.#buffers[0].subarray(0, bytesToRemove) + headers.push(header) + headersLength += header.length + + this.#buffers[0] = this.#buffers[0].subarray(bytesToRemove) + } + + this.#byteOffset -= headersLength + this.consume(4) // remove \r\n\r\n + this.#info.rawHeaders = Buffer.concat(headers, headersLength) + } else if (this.#state === states.READ_BODY) { + // A part's body can contain CRLFs so they cannot be used to + // determine when the body ends. We need to check if a chunk + // (or chunks) contains the boundary to stop parsing the body. + + if (this.#byteOffset < this.#boundary.length) { + return callback() + } + + + } + } + } + + /** + * Take n bytes from the buffered Buffers + * @param {number} n + * @returns {Buffer|null} + */ + consume (n) { + if (n > this.#byteOffset) { + return null + } else if (n === 0) { + return emptyBuffer + } + + if (this.#buffers[0].length === n) { + this.#byteOffset -= this.#buffers[0].length + return this.#buffers.shift() + } + + const buffer = Buffer.allocUnsafe(n) + let offset = 0 + + while (offset !== n) { + const next = this.#buffers[0] + const { length } = next + + if (length + offset === n) { + buffer.set(this.#buffers.shift(), offset) + break + } else if (length + offset > n) { + buffer.set(next.subarray(0, n - offset), offset) + this.#buffers[0] = next.subarray(n - offset) + break + } else { + buffer.set(this.#buffers.shift(), offset) + offset += next.length + } + } + + this.#byteOffset -= n + + return buffer + } +} + +module.exports = { + FormDataParser +} From 438b7a3a61b19ec055ed6234a308acd236053efb Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Wed, 4 Jan 2023 12:06:15 -0500 Subject: [PATCH 02/36] feat: add in basic body parsing algorithm --- lib/formdata/constants.js | 4 +- lib/formdata/parser.js | 85 +++++++++++++++++++++++++++++---------- 2 files changed, 65 insertions(+), 24 deletions(-) diff --git a/lib/formdata/constants.js b/lib/formdata/constants.js index fed0a6071e1..3e701791e03 100644 --- a/lib/formdata/constants.js +++ b/lib/formdata/constants.js @@ -20,11 +20,11 @@ const chars = { lf: '\n'.charCodeAt(0) } -const dashes = Buffer.from('--') +const emptyBuffer = Buffer.alloc(0) module.exports = { states, chars, headerStates, - dashes + emptyBuffer } diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index e91d8a337df..d45bef12cf5 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -1,7 +1,7 @@ 'use strict' const { Writable } = require('stream') -const { states, chars, headerStates, dashes } = require('./constants') +const { states, chars, headerStates, emptyBuffer } = require('./constants') const { parseMIMEType } = require('../fetch/dataURL') class FormDataParser extends Writable { @@ -17,11 +17,7 @@ class FormDataParser extends Writable { #info = { headerState: headerStates.DEFAULT, rawHeaders: null, - bodyState: { - buffersChecked: 0, - byteOffset: 0 - }, - bodyChunks: [] + body: null } constructor (opts) { @@ -77,11 +73,11 @@ class FormDataParser extends Writable { const bytes = this.consume(this.#boundary.length) const nextBytes = this.consume(2) // \r\n OR -- - if (bytes.toString() !== this.#boundary) { + if (!this.#boundary.equals(bytes)) { this.destroy(new Error('Received invalid boundary')) return - } else if (nextBytes?.[0] !== chars.cr || nextBytes?.[1] !== chars.lf) { - if (nextBytes?.[0] === chars['-'] && nextBytes?.[1] === chars['-']) { + } else if (nextBytes[0] !== chars.cr || nextBytes[1] !== chars.lf) { + if (nextBytes[0] === chars['-'] && nextBytes[1] === chars['-']) { // Done parsing form-data // TODO: emit event or something this.end() @@ -106,15 +102,23 @@ class FormDataParser extends Writable { // number of bytes to remove from the last buffer let bytesToRemove = 0 - done: { - for (let i = 0; i < this.#buffers.length; i++) { - const buffer = this.#buffers[i] - - for (let j = 0; j < buffer.length; j++) { - const byte = buffer[j] + done: { // eslint-disable-line no-labels + for (const buffer of this.#buffers) { + for (const byte of buffer) { const state = this.#info.headerState - if (byte === chars.cr && state === headerStates.DEFAULT) { + if (byte !== chars.cr && byte !== chars.lf) { + // Some pieces may have multiple headers, separated by \r\n. + // We need to also consider those bytes. + if (state === headerStates.FIRST) { // \r + bytesToRemove += 1 + } else if (state === headerStates.SECOND) { // \r\n + bytesToRemove += 2 + } + + bytesToRemove += 1 + this.#info.headerState = headerStates.DEFAULT + } else if (byte === chars.cr && state === headerStates.DEFAULT) { this.#info.headerState = headerStates.FIRST } else if (byte === chars.lf && state === headerStates.FIRST) { this.#info.headerState = headerStates.SECOND @@ -123,11 +127,8 @@ class FormDataParser extends Writable { } else if (byte === chars.lf && state === headerStates.THIRD) { // Got \r\n\r\n which marks the end of the headers. this.#state = states.READ_BODY - - break done - } else { - bytesToRemove += 1 - this.#info.headerState = headerStates.DEFAULT + + break done // eslint-disable-line no-labels } } @@ -165,7 +166,47 @@ class FormDataParser extends Writable { return callback() } - + /** @type {Buffer} */ + let buffer = this.#buffers.shift() + let bufferLength = buffer.length + + while (!buffer.includes(this.#boundary)) { + // No more buffers to check + if (this.#byteOffset === 0) { + this.#buffers.unshift(buffer) + buffer = undefined + return callback() + } + + const next = this.#buffers.shift() + this.#byteOffset -= next.length + bufferLength += next.length + buffer = Buffer.concat([buffer, next], bufferLength) + } + + if (bufferLength === this.#boundary.length) { + this.#buffers.unshift(buffer) + this.#byteOffset += buffer.length + + this.#state = states.BOUNDARY + } else { + const idx = buffer.indexOf(this.#boundary) + const rest = buffer.subarray(idx) + + this.#info.body = buffer.subarray(0, idx - 4) // remove \r\n-- + + // TODO: emit event here + // TODO: parse headers here + console.log({ + raw: this.#info.rawHeaders.toString(), + body: this.#info.body.toString() + }) + + this.#buffers.unshift(rest) + this.#byteOffset += rest.length + + this.#state = states.BOUNDARY + } } } } From 3c6f6f86219befa7235cc45810528e19dbbb2954 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Wed, 4 Jan 2023 12:55:11 -0500 Subject: [PATCH 03/36] fix: remove unused body var --- lib/formdata/filestream.js | 11 +++++++++++ lib/formdata/parser.js | 24 ++++++++++++++++++------ 2 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 lib/formdata/filestream.js diff --git a/lib/formdata/filestream.js b/lib/formdata/filestream.js new file mode 100644 index 00000000000..a6eb360c69a --- /dev/null +++ b/lib/formdata/filestream.js @@ -0,0 +1,11 @@ +'use strict' + +const { Readable } = require('stream') + +class FileStream extends Readable { + _read () {} +} + +module.exports = { + FileStream +} diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index d45bef12cf5..870387e1a97 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -2,6 +2,7 @@ const { Writable } = require('stream') const { states, chars, headerStates, emptyBuffer } = require('./constants') +const { FileStream } = require('./filestream') const { parseMIMEType } = require('../fetch/dataURL') class FormDataParser extends Writable { @@ -17,7 +18,7 @@ class FormDataParser extends Writable { #info = { headerState: headerStates.DEFAULT, rawHeaders: null, - body: null + stream: null } constructor (opts) { @@ -166,6 +167,8 @@ class FormDataParser extends Writable { return callback() } + this.#info.stream ??= new FileStream() + /** @type {Buffer} */ let buffer = this.#buffers.shift() let bufferLength = buffer.length @@ -178,6 +181,17 @@ class FormDataParser extends Writable { return callback() } + const doubleDashIdx = buffer.length ? buffer.indexOf('--') : null + + if (doubleDashIdx === -1) { + // Chunk is completely useless, emit it + this.#info.stream.push(buffer) + buffer = emptyBuffer + } else if (doubleDashIdx + this.#boundary.length >= buffer.length) { + this.#info.stream.push(buffer) + buffer = emptyBuffer + } + const next = this.#buffers.shift() this.#byteOffset -= next.length bufferLength += next.length @@ -193,14 +207,12 @@ class FormDataParser extends Writable { const idx = buffer.indexOf(this.#boundary) const rest = buffer.subarray(idx) - this.#info.body = buffer.subarray(0, idx - 4) // remove \r\n-- + this.#info.stream.push(buffer.subarray(0, idx - 4)) // remove \r\n-- + // TODO: clean up stream + // this.#info.stream.push(null) // TODO: emit event here // TODO: parse headers here - console.log({ - raw: this.#info.rawHeaders.toString(), - body: this.#info.body.toString() - }) this.#buffers.unshift(rest) this.#byteOffset += rest.length From 4866b9a9592c5848df4325d9fbe6c8ca752dcde7 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Wed, 4 Jan 2023 12:59:10 -0500 Subject: [PATCH 04/36] fix: cleanup --- lib/formdata/parser.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 870387e1a97..dcb98a0dece 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -187,9 +187,13 @@ class FormDataParser extends Writable { // Chunk is completely useless, emit it this.#info.stream.push(buffer) buffer = emptyBuffer - } else if (doubleDashIdx + this.#boundary.length >= buffer.length) { + bufferLength = 0 + } else if (doubleDashIdx + this.#boundary.length < buffer.length) { + // If the double dash index is not part of the boundary and + // not a chunked piece of the boundary this.#info.stream.push(buffer) buffer = emptyBuffer + bufferLength = 0 } const next = this.#buffers.shift() From acb3fef0c1ed4120533c334149dab9731b222569 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Fri, 6 Jan 2023 18:30:46 -0500 Subject: [PATCH 05/36] feat: parse headers and emit file event --- lib/fetch/dataURL.js | 18 ++++- lib/formdata/constants.js | 7 +- lib/formdata/parser.js | 148 +++++++++++++++++++++++++++++++++++--- 3 files changed, 158 insertions(+), 15 deletions(-) diff --git a/lib/fetch/dataURL.js b/lib/fetch/dataURL.js index beefad15482..d287c5ad27e 100644 --- a/lib/fetch/dataURL.js +++ b/lib/fetch/dataURL.js @@ -132,11 +132,16 @@ function URLSerializer (url, excludeFragment = false) { // https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points /** - * @param {(char: string) => boolean} condition - * @param {string} input + * @template {string|Buffer} T + * @param {(char: T[number]) => boolean} condition + * @param {T} input * @param {{ position: number }} position + * @returns {T} */ function collectASequenceOfCodePoints (condition, input, position) { + const inputIsString = typeof input === 'string' + const start = position.position + // 1. Let result be the empty string. let result = '' @@ -144,13 +149,20 @@ function collectASequenceOfCodePoints (condition, input, position) { // code point at position within input meets the condition condition: while (position.position < input.length && condition(input[position.position])) { // 1. Append that code point to the end of result. - result += input[position.position] + if (inputIsString) { + result += input[position.position] + } // 2. Advance position by 1. position.position++ } // 3. Return result. + + if (!inputIsString) { + return input.subarray(start, position.position) + } + return result } diff --git a/lib/formdata/constants.js b/lib/formdata/constants.js index 3e701791e03..1bac7259edb 100644 --- a/lib/formdata/constants.js +++ b/lib/formdata/constants.js @@ -17,7 +17,12 @@ const headerStates = { const chars = { '-': '-'.charCodeAt(0), cr: '\r'.charCodeAt(0), - lf: '\n'.charCodeAt(0) + lf: '\n'.charCodeAt(0), + ':': ':'.charCodeAt(0), + ' ': ' '.charCodeAt(0), // 0x20 + ';': ';'.charCodeAt(0), + '=': '='.charCodeAt(0), + '"': '"'.charCodeAt(0) } const emptyBuffer = Buffer.alloc(0) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index dcb98a0dece..0cfa60e3fc3 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -3,7 +3,11 @@ const { Writable } = require('stream') const { states, chars, headerStates, emptyBuffer } = require('./constants') const { FileStream } = require('./filestream') -const { parseMIMEType } = require('../fetch/dataURL') +const { + parseMIMEType, + collectASequenceOfCodePoints, + collectAnHTTPQuotedString +} = require('../fetch/dataURL') class FormDataParser extends Writable { /** @type {Buffer[]} */ @@ -18,7 +22,9 @@ class FormDataParser extends Writable { #info = { headerState: headerStates.DEFAULT, rawHeaders: null, - stream: null + stream: null, + headers: null, + fileName: '' } constructor (opts) { @@ -136,9 +142,11 @@ class FormDataParser extends Writable { bytesToRemove = 0 buffersToRemove += 1 } + + return callback() } - const headers = [] + const headers = this.#info.rawHeaders ?? [] let headersLength = 0 while (buffersToRemove-- > 0) { @@ -157,7 +165,15 @@ class FormDataParser extends Writable { this.#byteOffset -= headersLength this.consume(4) // remove \r\n\r\n - this.#info.rawHeaders = Buffer.concat(headers, headersLength) + this.#parseRawHeaders(Buffer.concat(headers, headersLength)) + + this.#info.stream = new FileStream() + this.emit( + 'file', + this.#info.fileName, + this.#info.stream, + {} // TODO + ) } else if (this.#state === states.READ_BODY) { // A part's body can contain CRLFs so they cannot be used to // determine when the body ends. We need to check if a chunk @@ -167,8 +183,6 @@ class FormDataParser extends Writable { return callback() } - this.#info.stream ??= new FileStream() - /** @type {Buffer} */ let buffer = this.#buffers.shift() let bufferLength = buffer.length @@ -212,11 +226,7 @@ class FormDataParser extends Writable { const rest = buffer.subarray(idx) this.#info.stream.push(buffer.subarray(0, idx - 4)) // remove \r\n-- - // TODO: clean up stream - // this.#info.stream.push(null) - - // TODO: emit event here - // TODO: parse headers here + this.#info.stream.destroy() this.#buffers.unshift(rest) this.#byteOffset += rest.length @@ -268,6 +278,122 @@ class FormDataParser extends Writable { return buffer } + + /** + * @param {Buffer} buffer + */ + #parseRawHeaders (buffer) { + this.#info.rawHeaders = null + + // https://www.rfc-editor.org/rfc/rfc7578#section-4.8 + + const headers = {} + const position = { position: 0 } + + while (position.position < buffer.length) { + const nameBuffer = collectASequenceOfCodePoints( + (char) => char !== chars[':'], + buffer, + position + ) + + // Invalid header; has no value + if (position.position >= buffer.length) { + break + } + + // MIME header fields in multipart bodies are required to + // consist only of 7-bit data in the US-ASCII character set + const name = nameBuffer.toString('ascii').toLowerCase() + + if ( + name !== 'content-type' && + name !== 'content-disposition' && + name !== 'content-transfer-encoding' + ) { + // The multipart/form-data media type does not support any MIME header + // fields in parts other than Content-Type, Content-Disposition, and (in + // limited circumstances) Content-Transfer-Encoding. Other header + // fields MUST NOT be included and MUST be ignored. + + // Go to the next header if one is available. Headers are split by \r\n, + // so we skip all characters until the sequence is found. + collectASequenceOfCodePoints( + (char) => char !== chars.cr && buffer[position.position + 1] !== chars.lf, + buffer, + position + ) + + continue + } + + if (buffer[position.position + 1] === chars[' ']) { + position.position += 2 // skip "; " + } else { + position.position++ // skip ":" + } + + const value = collectASequenceOfCodePoints( + (char) => { + if (name === 'content-disposition') { + // mimetypes (ie. from a content-type header) can have + // a semicolon in it. In that case, we want to read the + // entire value in, not just until we reach a semicolon + return char !== chars[';'] + } + + return char !== chars.cr && buffer[position.position + 1] !== chars.lf + }, + buffer, + position + ) + + headers[name] = value.toString('ascii') + + // No attributes + if (position.position >= buffer.length) { + continue + } + + const attributes = collectASequenceOfCodePoints( + (char) => char !== chars.cr && buffer[position.position + 1] !== chars.lf, + buffer, + position + ) + + if (name === 'content-disposition') { + const attrs = attributes.toString('ascii').slice(1) + + const equalIdx = attrs.indexOf('=') + const attrName = attrs.slice(0, equalIdx).trim() + const attrValue = attrs.slice(equalIdx + 1) + + const position = { position: 0 } + const value = attrValue[0] === '"' + ? collectAnHTTPQuotedString(attrValue, position, true) + : attrValue + + // TODO: match busboy's behavior with error handling + if (attrName !== 'name') { + this.destroy('Invalid attribute') + return + } + + this.#info.fileName = value + } + + position.position += 2 // skip \r\n + } + + // Validate the header values here + // TODO: match busboy's behavior with error handling + if (headers['content-disposition'] !== 'form-data') { + this.destroy(new Error('received invalid content-disposition header')) + return + } + + this.#info.headers = headers + } } module.exports = { From 2d4decdc437e4c2572fadd3e6380c5ee56bad639 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Fri, 6 Jan 2023 23:43:42 -0500 Subject: [PATCH 06/36] fix: differentiate between file and field and parse header attributes correctly --- lib/formdata/parser.js | 145 +++++++++++++++++++++++++++++++---------- 1 file changed, 110 insertions(+), 35 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 0cfa60e3fc3..1136cf35a19 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -23,8 +23,10 @@ class FormDataParser extends Writable { headerState: headerStates.DEFAULT, rawHeaders: null, stream: null, - headers: null, - fileName: '' + body: { + chunks: [], + length: 0 + } } constructor (opts) { @@ -165,15 +167,30 @@ class FormDataParser extends Writable { this.#byteOffset -= headersLength this.consume(4) // remove \r\n\r\n - this.#parseRawHeaders(Buffer.concat(headers, headersLength)) - - this.#info.stream = new FileStream() - this.emit( - 'file', - this.#info.fileName, - this.#info.stream, - {} // TODO + const { headers: receivedHeaders, attributes } = this.#parseRawHeaders( + Buffer.concat(headers, headersLength) ) + + if ( + attributes.filename !== undefined || + receivedHeaders['content-type'] === 'application/octet-stream' + ) { + this.#info.stream = new FileStream() + + this.emit( + 'file', + attributes.name, // name is a required attribute + this.#info.stream, + { + filename: attributes.filename ?? '', + encoding: receivedHeaders['content-transfer-encoding'] ?? '7bit', + mimeType: receivedHeaders['content-type'] ?? 'application/octet-stream' + } + ) + } else { + this.#info.headers = receivedHeaders + this.#info.attributes = attributes + } } else if (this.#state === states.READ_BODY) { // A part's body can contain CRLFs so they cannot be used to // determine when the body ends. We need to check if a chunk @@ -199,13 +216,25 @@ class FormDataParser extends Writable { if (doubleDashIdx === -1) { // Chunk is completely useless, emit it - this.#info.stream.push(buffer) + if (this.#info.stream !== null) { + this.#info.stream.push(buffer) + } else { + this.#info.body.chunks.push(buffer) + this.#info.body.length += buffer.length + } + buffer = emptyBuffer bufferLength = 0 } else if (doubleDashIdx + this.#boundary.length < buffer.length) { // If the double dash index is not part of the boundary and // not a chunked piece of the boundary - this.#info.stream.push(buffer) + if (this.#info.stream !== null) { + this.#info.stream.push(buffer) + } else { + this.#info.body.chunks.push(buffer) + this.#info.body.length += buffer.length + } + buffer = emptyBuffer bufferLength = 0 } @@ -225,8 +254,36 @@ class FormDataParser extends Writable { const idx = buffer.indexOf(this.#boundary) const rest = buffer.subarray(idx) - this.#info.stream.push(buffer.subarray(0, idx - 4)) // remove \r\n-- - this.#info.stream.destroy() + const chunk = buffer.subarray(0, idx - 4) // remove \r\n-- + + if (this.#info.stream !== null) { + this.#info.stream.push(chunk) + this.#info.stream.destroy() + } else { + const { headers, attributes, body } = this.#info + + body.chunks.push(chunk) + body.length += chunk.length + + const fullBody = new TextDecoder('utf-8', { fatal: true }).decode( + Buffer.concat(body.chunks, body.length) + ) + + this.emit( + 'field', + attributes.name, + fullBody, + { + nameTruncated: false, // todo + valueTruncated: false, // todo + encoding: headers['content-transfer-encoding'] ?? '7bit', + mimeType: headers['content-type'] ?? 'text/plain' + } + ) + + body.chunks.length = 0 + body.length = 0 + } this.#buffers.unshift(rest) this.#byteOffset += rest.length @@ -288,6 +345,8 @@ class FormDataParser extends Writable { // https://www.rfc-editor.org/rfc/rfc7578#section-4.8 const headers = {} + const attributes = {} + const position = { position: 0 } while (position.position < buffer.length) { @@ -355,31 +414,44 @@ class FormDataParser extends Writable { continue } - const attributes = collectASequenceOfCodePoints( - (char) => char !== chars.cr && buffer[position.position + 1] !== chars.lf, - buffer, - position - ) + if (name !== 'content-disposition') { + collectASequenceOfCodePoints( + (char) => char !== chars.cr && buffer[position.position + 1] !== chars.lf, + buffer, + position + ) + } else { + // A content-disposition header can contain multiple attributes, + // separated with a semicolon in the form of name=value. - if (name === 'content-disposition') { - const attrs = attributes.toString('ascii').slice(1) + while (position.position < buffer.length) { + position.position++ // skip ";" - const equalIdx = attrs.indexOf('=') - const attrName = attrs.slice(0, equalIdx).trim() - const attrValue = attrs.slice(equalIdx + 1) + const collected = collectASequenceOfCodePoints( + (char) => { + if (char === chars.cr) { + return buffer[position.position + 1] !== chars.lf + } - const position = { position: 0 } - const value = attrValue[0] === '"' - ? collectAnHTTPQuotedString(attrValue, position, true) - : attrValue + return char !== chars[';'] + }, + buffer, + position + ) - // TODO: match busboy's behavior with error handling - if (attrName !== 'name') { - this.destroy('Invalid attribute') - return - } + const attribute = collected.toString('ascii') + const equalIdx = attribute.indexOf('=') + const name = attribute.slice(0, equalIdx).trim() + const value = attribute[equalIdx + 1] === '"' + ? collectAnHTTPQuotedString(attribute.slice(equalIdx + 1), { position: 0 }, true) + : attribute.slice(equalIdx + 1) + + attributes[name] = value - this.#info.fileName = value + if (buffer[position.position] === chars.cr) { + break + } + } } position.position += 2 // skip \r\n @@ -390,9 +462,12 @@ class FormDataParser extends Writable { if (headers['content-disposition'] !== 'form-data') { this.destroy(new Error('received invalid content-disposition header')) return + } else if (attributes.name === undefined) { + this.destroy(new Error('Content-Disposition had no name attribute')) + return } - this.#info.headers = headers + return { headers, attributes } } } From ae7c177bf901142100c1e755dc205a3bbd4f2e54 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 10:52:50 -0500 Subject: [PATCH 07/36] run busboy test & support charset options --- index.js | 4 + lib/formdata/parser.js | 23 ++-- test/busboy/LICENSE | 19 ++++ test/busboy/common.js | 105 +++++++++++++++++++ test/busboy/test-types-multipart-charsets.js | 97 +++++++++++++++++ 5 files changed, 240 insertions(+), 8 deletions(-) create mode 100644 test/busboy/LICENSE create mode 100644 test/busboy/common.js create mode 100644 test/busboy/test-types-multipart-charsets.js diff --git a/index.js b/index.js index 02ac246fa45..713fa4aa394 100644 --- a/index.js +++ b/index.js @@ -141,6 +141,10 @@ if (util.nodeMajor >= 18 && hasCrypto) { const { WebSocket } = require('./lib/websocket/websocket') module.exports.WebSocket = WebSocket + + const { FormDataParser } = require('./lib/formdata/parser') + + module.exports.FormDataParser = FormDataParser } module.exports.request = makeDispatcher(api.request) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 1136cf35a19..0b316a19ccc 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -29,6 +29,8 @@ class FormDataParser extends Writable { } } + #opts = {} + constructor (opts) { super(opts) @@ -43,6 +45,11 @@ class FormDataParser extends Writable { } this.#boundary = Buffer.from(mimeType.parameters.get('boundary')) + + if (opts) { + this.#opts.defCharset = opts.defCharset ?? 'utf8' + this.#opts.defParamCharset = opts.defParamCharset ?? 'utf8' + } } /** @@ -88,8 +95,8 @@ class FormDataParser extends Writable { } else if (nextBytes[0] !== chars.cr || nextBytes[1] !== chars.lf) { if (nextBytes[0] === chars['-'] && nextBytes[1] === chars['-']) { // Done parsing form-data - // TODO: emit event or something - this.end() + + this.destroy() return } @@ -158,11 +165,10 @@ class FormDataParser extends Writable { } if (bytesToRemove > 0) { - const header = this.#buffers[0].subarray(0, bytesToRemove) - headers.push(header) - headersLength += header.length + const removed = this.consume(bytesToRemove) - this.#buffers[0] = this.#buffers[0].subarray(bytesToRemove) + headers.push(removed) + headersLength += removed.length } this.#byteOffset -= headersLength @@ -176,6 +182,7 @@ class FormDataParser extends Writable { receivedHeaders['content-type'] === 'application/octet-stream' ) { this.#info.stream = new FileStream() + this.#info.stream.truncated = false // TODO this.emit( 'file', @@ -363,7 +370,7 @@ class FormDataParser extends Writable { // MIME header fields in multipart bodies are required to // consist only of 7-bit data in the US-ASCII character set - const name = nameBuffer.toString('ascii').toLowerCase() + const name = nameBuffer.toString(this.#opts.defCharset).toLowerCase() if ( name !== 'content-type' && @@ -439,7 +446,7 @@ class FormDataParser extends Writable { position ) - const attribute = collected.toString('ascii') + const attribute = collected.toString(this.#opts.defParamCharset) const equalIdx = attribute.indexOf('=') const name = attribute.slice(0, equalIdx).trim() const value = attribute[equalIdx + 1] === '"' diff --git a/test/busboy/LICENSE b/test/busboy/LICENSE new file mode 100644 index 00000000000..290762e94f4 --- /dev/null +++ b/test/busboy/LICENSE @@ -0,0 +1,19 @@ +Copyright Brian White. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. \ No newline at end of file diff --git a/test/busboy/common.js b/test/busboy/common.js new file mode 100644 index 00000000000..c2ed9c47c03 --- /dev/null +++ b/test/busboy/common.js @@ -0,0 +1,105 @@ +'use strict' + +const assert = require('assert') +const { inspect } = require('util') + +const mustCallChecks = [] + +function noop () {} + +function runCallChecks (exitCode) { + if (exitCode !== 0) return + + const failed = mustCallChecks.filter((context) => { + if ('minimum' in context) { + context.messageSegment = `at least ${context.minimum}` + return context.actual < context.minimum + } + context.messageSegment = `exactly ${context.exact}` + return context.actual !== context.exact + }) + + failed.forEach((context) => { + console.error('Mismatched %s function calls. Expected %s, actual %d.', + context.name, + context.messageSegment, + context.actual) + console.error(context.stack.split('\n').slice(2).join('\n')) + }) + + if (failed.length) { process.exit(1) } +} + +function mustCall (fn, exact) { + return _mustCallInner(fn, exact, 'exact') +} + +function mustCallAtLeast (fn, minimum) { + return _mustCallInner(fn, minimum, 'minimum') +} + +function _mustCallInner (fn, criteria = 1, field) { + if (process._exiting) { throw new Error('Cannot use common.mustCall*() in process exit handler') } + + if (typeof fn === 'number') { + criteria = fn + fn = noop + } else if (fn === undefined) { + fn = noop + } + + if (typeof criteria !== 'number') { throw new TypeError(`Invalid ${field} value: ${criteria}`) } + + const context = { + [field]: criteria, + actual: 0, + stack: inspect(new Error()), + name: fn.name || '' + } + + // Add the exit listener only once to avoid listener leak warnings + if (mustCallChecks.length === 0) { process.on('exit', runCallChecks) } + + mustCallChecks.push(context) + + function wrapped (...args) { + ++context.actual + return fn.call(this, ...args) + } + // TODO: remove origFn? + wrapped.origFn = fn + + return wrapped +} + +function getCallSite (top) { + const originalStackFormatter = Error.prepareStackTrace + Error.prepareStackTrace = (_err, stack) => + `${stack[0].getFileName()}:${stack[0].getLineNumber()}` + const err = new Error() + Error.captureStackTrace(err, top) + // With the V8 Error API, the stack is not formatted until it is accessed + // eslint-disable-next-line no-unused-expressions + err.stack + Error.prepareStackTrace = originalStackFormatter + return err.stack +} + +function mustNotCall (msg) { + const callSite = getCallSite(mustNotCall) + return function mustNotCall (...args) { + args = args.map(inspect).join(', ') + const argsInfo = (args.length > 0 + ? `\ncalled with arguments: ${args}` + : '') + assert.fail( + `${msg || 'function should not have been called'} at ${callSite}` + + argsInfo) + } +} + +module.exports = { + mustCall, + mustCallAtLeast, + mustNotCall +} diff --git a/test/busboy/test-types-multipart-charsets.js b/test/busboy/test-types-multipart-charsets.js new file mode 100644 index 00000000000..d143a7654ac --- /dev/null +++ b/test/busboy/test-types-multipart-charsets.js @@ -0,0 +1,97 @@ +'use strict' + +const assert = require('assert') +const { inspect } = require('util') +const { join } = require('path') + +const { mustCall } = require(join(__dirname, 'common.js')) + +const { FormDataParser } = require('../..') +const busboy = (opts) => new FormDataParser(opts) + +const input = Buffer.from([ + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="テスト.dat"', + 'Content-Type: application/octet-stream', + '', + 'A'.repeat(1023), + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' +].join('\r\n')) +const boundary = '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k' +const expected = [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('A'.repeat(1023)), + info: { + filename: 'テスト.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + } +] +const bb = busboy({ + defParamCharset: 'utf8', + headers: { + 'content-type': `multipart/form-data; boundary=${boundary}` + } +}) +const results = [] + +bb.on('field', (name, val, info) => { + results.push({ type: 'field', name, val, info }) +}) + +bb.on('file', (name, stream, info) => { + const data = [] + let nb = 0 + const file = { + type: 'file', + name, + data: null, + info, + limited: false + } + results.push(file) + stream.on('data', (d) => { + data.push(d) + nb += d.length + }).on('limit', () => { + file.limited = true + }).on('close', () => { + file.data = Buffer.concat(data, nb) + assert.strictEqual(stream.truncated, file.limited) + }).once('error', (err) => { + file.err = err.message + }) +}) + +bb.on('error', (err) => { + results.push({ error: err.message }) +}) + +bb.on('partsLimit', () => { + results.push('partsLimit') +}) + +bb.on('filesLimit', () => { + results.push('filesLimit') +}) + +bb.on('fieldsLimit', () => { + results.push('fieldsLimit') +}) + +bb.on('close', mustCall(() => { + assert.deepStrictEqual( + results, + expected, + 'Results mismatch.\n' + + `Parsed: ${inspect(results)}\n` + + `Expected: ${inspect(expected)}` + ) +})) + +bb.end(input) From 1d665b9cbd649fe716db849a923179eaaaa18e85 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 12:51:23 -0500 Subject: [PATCH 08/36] fix: byte-by-byte receiving --- lib/formdata/parser.js | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 0b316a19ccc..f4886939fe1 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -110,11 +110,10 @@ class FormDataParser extends Writable { // arbitrary length. Therefore it's easier to read the headers, and // then parse once we receive 2 CRLFs which marks the body's start. - if (this.#byteOffset === 0) { + if (this.#byteOffset < 4) { return callback() } - let buffersToRemove = 0 // number of bytes to remove from the last buffer let bytesToRemove = 0 @@ -147,9 +146,6 @@ class FormDataParser extends Writable { break done // eslint-disable-line no-labels } } - - bytesToRemove = 0 - buffersToRemove += 1 } return callback() @@ -158,12 +154,6 @@ class FormDataParser extends Writable { const headers = this.#info.rawHeaders ?? [] let headersLength = 0 - while (buffersToRemove-- > 0) { - const buffer = this.#buffers.shift() - headers.push(buffer) - headersLength += buffer.length - } - if (bytesToRemove > 0) { const removed = this.consume(bytesToRemove) @@ -171,7 +161,6 @@ class FormDataParser extends Writable { headersLength += removed.length } - this.#byteOffset -= headersLength this.consume(4) // remove \r\n\r\n const { headers: receivedHeaders, attributes } = this.#parseRawHeaders( Buffer.concat(headers, headersLength) @@ -208,13 +197,14 @@ class FormDataParser extends Writable { } /** @type {Buffer} */ - let buffer = this.#buffers.shift() + let buffer = this.consume(this.#byteOffset) let bufferLength = buffer.length while (!buffer.includes(this.#boundary)) { // No more buffers to check if (this.#byteOffset === 0) { this.#buffers.unshift(buffer) + this.#byteOffset += buffer.length buffer = undefined return callback() } @@ -260,7 +250,6 @@ class FormDataParser extends Writable { } else { const idx = buffer.indexOf(this.#boundary) const rest = buffer.subarray(idx) - const chunk = buffer.subarray(0, idx - 4) // remove \r\n-- if (this.#info.stream !== null) { From 6ec1fd318f35c45c4b21e6714dc60b738990888b Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 12:51:36 -0500 Subject: [PATCH 09/36] fix: byte-by-byte receiving --- test/busboy/test-types-multipart.js | 1113 +++++++++++++++++++++++++++ 1 file changed, 1113 insertions(+) create mode 100644 test/busboy/test-types-multipart.js diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js new file mode 100644 index 00000000000..d0d76c0a6e1 --- /dev/null +++ b/test/busboy/test-types-multipart.js @@ -0,0 +1,1113 @@ +'use strict' + +const assert = require('assert') +const { inspect } = require('util') + +const { FormDataParser } = require('../..') +const busboy = (opts) => new FormDataParser(opts) + +const active = new Map() + +const tests = [ + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_0"', + '', + 'super alpha file', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_1"', + '', + 'super beta file', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="1k_a.dat"', + 'Content-Type: application/octet-stream', + '', + 'A'.repeat(1023), + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_1"; filename="1k_b.dat"', + 'Content-Type: application/octet-stream', + '', + 'B'.repeat(1023), + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'field', + name: 'file_name_0', + val: 'super alpha file', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + { + type: 'field', + name: 'file_name_1', + val: 'super beta file', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('A'.repeat(1023)), + info: { + filename: '1k_a.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + }, + { + type: 'file', + name: 'upload_file_1', + data: Buffer.from('B'.repeat(1023)), + info: { + filename: '1k_b.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + } + ], + what: 'Fields and files' + }, + { + source: [ + ['------WebKitFormBoundaryTB2MiQ36fnSJlrhY', + 'Content-Disposition: form-data; name="cont"', + '', + 'some random content', + '------WebKitFormBoundaryTB2MiQ36fnSJlrhY', + 'Content-Disposition: form-data; name="pass"', + '', + 'some random pass', + '------WebKitFormBoundaryTB2MiQ36fnSJlrhY', + 'Content-Disposition: form-data; name=bit', + '', + '2', + '------WebKitFormBoundaryTB2MiQ36fnSJlrhY--' + ].join('\r\n') + ], + boundary: '----WebKitFormBoundaryTB2MiQ36fnSJlrhY', + expected: [ + { + type: 'field', + name: 'cont', + val: 'some random content', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + { + type: 'field', + name: 'pass', + val: 'some random pass', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + { + type: 'field', + name: 'bit', + val: '2', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + } + ], + what: 'Fields only' + }, + { + source: [ + '' + ], + boundary: '----WebKitFormBoundaryTB2MiQ36fnSJlrhY', + expected: [ + { error: 'Unexpected end of form' } + ], + what: 'No fields and no files' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_0"', + '', + 'super alpha file', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="1k_a.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + limits: { + fileSize: 13, + fieldSize: 5 + }, + expected: [ + { + type: 'field', + name: 'file_name_0', + val: 'super', + info: { + nameTruncated: false, + valueTruncated: true, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('ABCDEFGHIJKLM'), + info: { + filename: '1k_a.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: true + } + ], + what: 'Fields and files (limits)' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_0"', + '', + 'super alpha file', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="1k_a.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + limits: { + files: 0 + }, + expected: [ + { + type: 'field', + name: 'file_name_0', + val: 'super alpha file', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + 'filesLimit' + ], + what: 'Fields and files (limits: 0 files)' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_0"', + '', + 'super alpha file', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_1"', + '', + 'super beta file', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="1k_a.dat"', + 'Content-Type: application/octet-stream', + '', + 'A'.repeat(1023), + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_1"; filename="1k_b.dat"', + 'Content-Type: application/octet-stream', + '', + 'B'.repeat(1023), + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'field', + name: 'file_name_0', + val: 'super alpha file', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + { + type: 'field', + name: 'file_name_1', + val: 'super beta file', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + } + ], + events: ['field'], + what: 'Fields and (ignored) files' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="/tmp/1k_a.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_1"; filename="C:\\files\\1k_b.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_2"; filename="relative/1k_c.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), + info: { + filename: '1k_a.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + }, + { + type: 'file', + name: 'upload_file_1', + data: Buffer.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), + info: { + filename: '1k_b.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + }, + { + type: 'file', + name: 'upload_file_2', + data: Buffer.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), + info: { + filename: '1k_c.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + } + ], + what: 'Files with filenames containing paths' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="/absolute/1k_a.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_1"; filename="C:\\absolute\\1k_b.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_2"; filename="relative/1k_c.dat"', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + preservePath: true, + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), + info: { + filename: '/absolute/1k_a.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + }, + { + type: 'file', + name: 'upload_file_1', + data: Buffer.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), + info: { + filename: 'C:\\absolute\\1k_b.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + }, + { + type: 'file', + name: 'upload_file_2', + data: Buffer.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), + info: { + filename: 'relative/1k_c.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + } + ], + what: 'Paths to be preserved through the preservePath option' + }, + { + source: [ + ['------WebKitFormBoundaryTB2MiQ36fnSJlrhY', + 'Content-Disposition: form-data; name="cont"', + 'Content-Type: ', + '', + 'some random content', + '------WebKitFormBoundaryTB2MiQ36fnSJlrhY', + 'Content-Disposition: ', + '', + 'some random pass', + '------WebKitFormBoundaryTB2MiQ36fnSJlrhY--' + ].join('\r\n') + ], + boundary: '----WebKitFormBoundaryTB2MiQ36fnSJlrhY', + expected: [ + { + type: 'field', + name: 'cont', + val: 'some random content', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + } + ], + what: 'Empty content-type and empty content-disposition' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="file"; filename*=utf-8\'\'n%C3%A4me.txt', + 'Content-Type: application/octet-stream', + '', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'file', + name: 'file', + data: Buffer.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), + info: { + filename: 'näme.txt', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false + } + ], + what: 'Unicode filenames' + }, + { + source: [ + ['--asdasdasdasd\r\n', + 'Content-Type: text/plain\r\n', + 'Content-Disposition: form-data; name="foo"\r\n', + '\r\n', + 'asd\r\n', + '--asdasdasdasd--' + ].join(':)') + ], + boundary: 'asdasdasdasd', + expected: [ + { error: 'Malformed part header' }, + { error: 'Unexpected end of form' } + ], + what: 'Stopped mid-header' + }, + { + source: [ + ['------WebKitFormBoundaryTB2MiQ36fnSJlrhY', + 'Content-Disposition: form-data; name="cont"', + 'Content-Type: application/json', + '', + '{}', + '------WebKitFormBoundaryTB2MiQ36fnSJlrhY--' + ].join('\r\n') + ], + boundary: '----WebKitFormBoundaryTB2MiQ36fnSJlrhY', + expected: [ + { + type: 'field', + name: 'cont', + val: '{}', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'application/json' + } + } + ], + what: 'content-type for fields' + }, + { + source: [ + '------WebKitFormBoundaryTB2MiQ36fnSJlrhY--' + ], + boundary: '----WebKitFormBoundaryTB2MiQ36fnSJlrhY', + expected: [], + what: 'empty form' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name=upload_file_0; filename="1k_a.dat"', + 'Content-Type: application/octet-stream', + 'Content-Transfer-Encoding: binary', + '', + '' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.alloc(0), + info: { + filename: '1k_a.dat', + encoding: 'binary', + mimeType: 'application/octet-stream' + }, + limited: false, + err: 'Unexpected end of form' + }, + { error: 'Unexpected end of form' } + ], + what: 'Stopped mid-file #1' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name=upload_file_0; filename="1k_a.dat"', + 'Content-Type: application/octet-stream', + '', + 'a' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('a'), + info: { + filename: '1k_a.dat', + encoding: '7bit', + mimeType: 'application/octet-stream' + }, + limited: false, + err: 'Unexpected end of form' + }, + { error: 'Unexpected end of form' } + ], + what: 'Stopped mid-file #2' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="notes.txt"', + 'Content-Type: text/plain; charset=utf8', + '', + 'a', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('a'), + info: { + filename: 'notes.txt', + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + } + ], + what: 'Text file with charset' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="notes.txt"', + 'Content-Type: ', + ' text/plain; charset=utf8', + '', + 'a', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('a'), + info: { + filename: 'notes.txt', + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + } + ], + what: 'Folded header value' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Type: text/plain; charset=utf8', + '', + 'a', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [], + what: 'No Content-Disposition' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_0"', + '', + 'a'.repeat(64 * 1024), + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="notes.txt"', + 'Content-Type: ', + ' text/plain; charset=utf8', + '', + 'bc', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + limits: { + fieldSize: Infinity + }, + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('bc'), + info: { + filename: 'notes.txt', + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + } + ], + events: ['file'], + what: 'Skip field parts if no listener' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_0"', + '', + 'a', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="notes.txt"', + 'Content-Type: ', + ' text/plain; charset=utf8', + '', + 'bc', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + limits: { + parts: 1 + }, + expected: [ + { + type: 'field', + name: 'file_name_0', + val: 'a', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + 'partsLimit' + ], + what: 'Parts limit' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_0"', + '', + 'a', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; name="file_name_1"', + '', + 'b', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + limits: { + fields: 1 + }, + expected: [ + { + type: 'field', + name: 'file_name_0', + val: 'a', + info: { + nameTruncated: false, + valueTruncated: false, + encoding: '7bit', + mimeType: 'text/plain' + } + }, + 'fieldsLimit' + ], + what: 'Fields limit' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="notes.txt"', + 'Content-Type: text/plain; charset=utf8', + '', + 'ab', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_1"; filename="notes2.txt"', + 'Content-Type: text/plain; charset=utf8', + '', + 'cd', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + limits: { + files: 1 + }, + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('ab'), + info: { + filename: 'notes.txt', + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + }, + 'filesLimit' + ], + what: 'Files limit' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + `name="upload_file_0"; filename="${'a'.repeat(64 * 1024)}.txt"`, + 'Content-Type: text/plain; charset=utf8', + '', + 'ab', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_1"; filename="notes2.txt"', + 'Content-Type: text/plain; charset=utf8', + '', + 'cd', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { error: 'Malformed part header' }, + { + type: 'file', + name: 'upload_file_1', + data: Buffer.from('cd'), + info: { + filename: 'notes2.txt', + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + } + ], + what: 'Oversized part header' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + 'name="upload_file_0"; filename="notes.txt"', + 'Content-Type: text/plain; charset=utf8', + '', + 'a'.repeat(31) + '\r' + ].join('\r\n'), + 'b'.repeat(40), + '\r\n-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + fileHwm: 32, + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('a'.repeat(31) + '\r' + 'b'.repeat(40)), + info: { + filename: 'notes.txt', + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + } + ], + what: 'Lookbehind data should not stall file streams' + }, + { + source: [ + ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + `name="upload_file_0"; filename="${'a'.repeat(8 * 1024)}.txt"`, + 'Content-Type: text/plain; charset=utf8', + '', + 'ab', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + `name="upload_file_1"; filename="${'b'.repeat(8 * 1024)}.txt"`, + 'Content-Type: text/plain; charset=utf8', + '', + 'cd', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + 'Content-Disposition: form-data; ' + + `name="upload_file_2"; filename="${'c'.repeat(8 * 1024)}.txt"`, + 'Content-Type: text/plain; charset=utf8', + '', + 'ef', + '-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k--' + ].join('\r\n') + ], + boundary: '---------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', + expected: [ + { + type: 'file', + name: 'upload_file_0', + data: Buffer.from('ab'), + info: { + filename: `${'a'.repeat(8 * 1024)}.txt`, + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + }, + { + type: 'file', + name: 'upload_file_1', + data: Buffer.from('cd'), + info: { + filename: `${'b'.repeat(8 * 1024)}.txt`, + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + }, + { + type: 'file', + name: 'upload_file_2', + data: Buffer.from('ef'), + info: { + filename: `${'c'.repeat(8 * 1024)}.txt`, + encoding: '7bit', + mimeType: 'text/plain' + }, + limited: false + } + ], + what: 'Header size limit should be per part' + }, + { + source: [ + '\r\n--d1bf46b3-aa33-4061-b28d-6c5ced8b08ee\r\n', + 'Content-Type: application/gzip\r\n' + + 'Content-Encoding: gzip\r\n' + + 'Content-Disposition: form-data; name=batch-1; filename=batch-1' + + '\r\n\r\n', + '\r\n--d1bf46b3-aa33-4061-b28d-6c5ced8b08ee--' + ], + boundary: 'd1bf46b3-aa33-4061-b28d-6c5ced8b08ee', + expected: [ + { + type: 'file', + name: 'batch-1', + data: Buffer.alloc(0), + info: { + filename: 'batch-1', + encoding: '7bit', + mimeType: 'application/gzip' + }, + limited: false + } + ], + what: 'Empty part' + } +] + +for (const test of tests.slice(0, 1)) { + active.set(test, 1) + + const { what, boundary, events, limits, preservePath, fileHwm } = test + const bb = busboy({ + fileHwm, + limits, + preservePath, + headers: { + 'content-type': `multipart/form-data; boundary=${boundary}` + } + }) + const results = [] + + if (events === undefined || events.includes('field')) { + bb.on('field', (name, val, info) => { + results.push({ type: 'field', name, val, info }) + }) + } + + if (events === undefined || events.includes('file')) { + bb.on('file', (name, stream, info) => { + const data = [] + let nb = 0 + const file = { + type: 'file', + name, + data: null, + info, + limited: false + } + results.push(file) + stream.on('data', (d) => { + data.push(d) + nb += d.length + }).on('limit', () => { + file.limited = true + }).on('close', () => { + file.data = Buffer.concat(data, nb) + assert.strictEqual(stream.truncated, file.limited) + }).once('error', (err) => { + file.err = err.message + }) + }) + } + + bb.on('error', (err) => { + results.push({ error: err.message }) + }) + + bb.on('partsLimit', () => { + results.push('partsLimit') + }) + + bb.on('filesLimit', () => { + results.push('filesLimit') + }) + + bb.on('fieldsLimit', () => { + results.push('fieldsLimit') + }) + + bb.on('close', () => { + active.delete(test) + + assert.deepStrictEqual( + results, + test.expected, + `[${what}] Results mismatch.\n` + + `Parsed: ${inspect(results)}\n` + + `Expected: ${inspect(test.expected)}` + ) + }) + + for (const src of test.source) { + const buf = (typeof src === 'string' ? Buffer.from(src, 'utf8') : src) + bb.write(buf) + } + bb.end() +} + +// Byte-by-byte versions +for (let test of tests.slice(0, 1)) { + test = { ...test } + test.what += ' (byte-by-byte)' + active.set(test, 1) + + const { what, boundary, events, limits, preservePath, fileHwm } = test + const bb = busboy({ + fileHwm, + limits, + preservePath, + headers: { + 'content-type': `multipart/form-data; boundary=${boundary}` + } + }) + const results = [] + + if (events === undefined || events.includes('field')) { + bb.on('field', (name, val, info) => { + results.push({ type: 'field', name, val, info }) + }) + } + + if (events === undefined || events.includes('file')) { + bb.on('file', (name, stream, info) => { + const data = [] + let nb = 0 + const file = { + type: 'file', + name, + data: null, + info, + limited: false + } + results.push(file) + stream.on('data', (d) => { + data.push(d) + nb += d.length + }).on('limit', () => { + file.limited = true + }).on('close', () => { + file.data = Buffer.concat(data, nb) + assert.strictEqual(stream.truncated, file.limited) + }).once('error', (err) => { + file.err = err.message + }) + }) + } + + bb.on('error', (err) => { + results.push({ error: err.message }) + }) + + bb.on('partsLimit', () => { + results.push('partsLimit') + }) + + bb.on('filesLimit', () => { + results.push('filesLimit') + }) + + bb.on('fieldsLimit', () => { + results.push('fieldsLimit') + }) + + bb.on('close', () => { + active.delete(test) + + assert.deepStrictEqual( + results, + test.expected, + `[${what}] Results mismatch.\n` + + `Parsed: ${inspect(results)}\n` + + `Expected: ${inspect(test.expected)}` + ) + }) + + for (const src of test.source) { + const buf = (typeof src === 'string' ? Buffer.from(src, 'utf8') : src) + for (let i = 0; i < buf.length; ++i) { bb.write(buf.slice(i, i + 1)) } + } + bb.end() +} + +{ + let exception = false + process.once('uncaughtException', (ex) => { + exception = true + throw ex + }) + process.on('exit', () => { + if (exception || active.size === 0) { return } + process.exitCode = 1 + console.error('==========================') + console.error(`${active.size} test(s) did not finish:`) + console.error('==========================') + console.error(Array.from(active.keys()).map((v) => v.what).join('\n')) + }) +} From 119e172d36e599a1747ee861b8179af5d2160ff5 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 13:03:27 -0500 Subject: [PATCH 10/36] fix: error on _final if parsing isn't complete --- lib/formdata/parser.js | 13 ++++++++++++- test/busboy/test-types-multipart.js | 4 ++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index f4886939fe1..4266c4a77a8 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -26,7 +26,8 @@ class FormDataParser extends Writable { body: { chunks: [], length: 0 - } + }, + complete: false } #opts = {} @@ -63,6 +64,15 @@ class FormDataParser extends Writable { this.run(callback) } + /** + * @param {(error?: Error | null) => void)} cb + */ + _final (cb) { + if (!this.#info.complete) { + return cb(new Error('Unexpected end of form')) + } + } + /** * @param {() => void} callback */ @@ -96,6 +106,7 @@ class FormDataParser extends Writable { if (nextBytes[0] === chars['-'] && nextBytes[1] === chars['-']) { // Done parsing form-data + this.#info.complete = true this.destroy() return } diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index d0d76c0a6e1..4e48e8e029a 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 1)) { +for (const test of tests.slice(0, 3)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 1)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 1)) { +for (let test of tests.slice(0, 3)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From b902ef10ac8082c7dab0d4db24b72ce1cb212099 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 14:13:19 -0500 Subject: [PATCH 11/36] feat: implement fileSize and fieldSize limits --- lib/formdata/parser.js | 70 ++++++++++++++++++++++++----- test/busboy/test-types-multipart.js | 4 +- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 4266c4a77a8..facf41c14cc 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -27,7 +27,16 @@ class FormDataParser extends Writable { chunks: [], length: 0 }, - complete: false + complete: false, + limits: { + fieldNameSize: 0, + fieldSize: 0, + fields: 0, + fileSize: 0, + files: 0, + parts: 0, + headerPairs: 0 + } } #opts = {} @@ -50,6 +59,15 @@ class FormDataParser extends Writable { if (opts) { this.#opts.defCharset = opts.defCharset ?? 'utf8' this.#opts.defParamCharset = opts.defParamCharset ?? 'utf8' + this.#opts.limits = { + fieldNameSize: opts.limits?.fieldNameSize ?? 100, + fieldSize: opts.limits?.fieldSize ?? 1000 * 1000, + fields: opts.limits?.fields ?? Infinity, + fileSize: opts.limits?.fileSize ?? Infinity, + files: opts.limits?.files ?? Infinity, + parts: opts.limits?.parts ?? Infinity, + headerPairs: opts.limits?.headerPairs ?? 2000 + } } } @@ -263,26 +281,58 @@ class FormDataParser extends Writable { const rest = buffer.subarray(idx) const chunk = buffer.subarray(0, idx - 4) // remove \r\n-- - if (this.#info.stream !== null) { - this.#info.stream.push(chunk) + if ( + this.#info.stream !== null && + this.#info.limits.fileSize < this.#opts.limits.fileSize + ) { + const limit = this.#opts.limits.fileSize + const current = this.#info.limits.fileSize + const length = chunk.length + + let limitedChunk + + if (current + length >= limit) { + // If the limit is reached + limitedChunk = chunk.subarray(0, limit - (current + length)) + + this.#info.stream.emit('limit') // TODO: arguments? + this.#info.stream.truncated = true + } else { + limitedChunk = chunk + } + + this.#info.limits.fileSize += length + this.#info.stream.push(limitedChunk) this.#info.stream.destroy() - } else { + } else if (this.#info.limits.fieldSize < this.#opts.limits.fieldSize) { const { headers, attributes, body } = this.#info body.chunks.push(chunk) body.length += chunk.length - const fullBody = new TextDecoder('utf-8', { fatal: true }).decode( - Buffer.concat(body.chunks, body.length) - ) + let fullBody = Buffer.concat(body.chunks, body.length) + + const limit = this.#opts.limits.fieldSize + const current = this.#info.limits.fieldSize + + let valueTruncated = false + + if (current + body.length >= limit) { + // If the limit is reached + fullBody = fullBody.subarray(0, limit - (current + body.length)) + + valueTruncated = true + } + + this.#info.limits.fieldSize += body.length this.emit( 'field', attributes.name, - fullBody, + new TextDecoder('utf-8', { fatal: true }).decode(fullBody), { - nameTruncated: false, // todo - valueTruncated: false, // todo + nameTruncated: false, + valueTruncated, encoding: headers['content-transfer-encoding'] ?? '7bit', mimeType: headers['content-type'] ?? 'text/plain' } diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 4e48e8e029a..7a4272ef1e2 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 3)) { +for (const test of tests.slice(0, 4)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 3)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 3)) { +for (let test of tests.slice(0, 4)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 5629f71687cd66d55d8811b2c7e87c5fe71ba3b7 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 17:37:09 -0500 Subject: [PATCH 12/36] feat: implement files limit --- lib/formdata/parser.js | 40 +++++++++++++++++++---------- test/busboy/test-types-multipart.js | 4 +-- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index facf41c14cc..350fd91d252 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -56,18 +56,16 @@ class FormDataParser extends Writable { this.#boundary = Buffer.from(mimeType.parameters.get('boundary')) - if (opts) { - this.#opts.defCharset = opts.defCharset ?? 'utf8' - this.#opts.defParamCharset = opts.defParamCharset ?? 'utf8' - this.#opts.limits = { - fieldNameSize: opts.limits?.fieldNameSize ?? 100, - fieldSize: opts.limits?.fieldSize ?? 1000 * 1000, - fields: opts.limits?.fields ?? Infinity, - fileSize: opts.limits?.fileSize ?? Infinity, - files: opts.limits?.files ?? Infinity, - parts: opts.limits?.parts ?? Infinity, - headerPairs: opts.limits?.headerPairs ?? 2000 - } + this.#opts.defCharset = opts?.defCharset ?? 'utf8' + this.#opts.defParamCharset = opts?.defParamCharset ?? 'utf8' + this.#opts.limits = { + fieldNameSize: opts?.limits?.fieldNameSize ?? 100, + fieldSize: opts?.limits?.fieldSize ?? 1000 * 1000, + fields: opts?.limits?.fields ?? Infinity, + fileSize: opts?.limits?.fileSize ?? Infinity, + files: opts?.limits?.files ?? Infinity, + parts: opts?.limits?.parts ?? Infinity, + headerPairs: opts?.limits?.headerPairs ?? 2000 } } @@ -199,6 +197,11 @@ class FormDataParser extends Writable { attributes.filename !== undefined || receivedHeaders['content-type'] === 'application/octet-stream' ) { + if (this.#info.limits.files >= this.#opts.limits.files) { + this.#info.skipPart = 'file' + continue + } + this.#info.stream = new FileStream() this.#info.stream.truncated = false // TODO @@ -281,9 +284,11 @@ class FormDataParser extends Writable { const rest = buffer.subarray(idx) const chunk = buffer.subarray(0, idx - 4) // remove \r\n-- + // If the current part is a file if ( this.#info.stream !== null && - this.#info.limits.fileSize < this.#opts.limits.fileSize + this.#info.limits.fileSize < this.#opts.limits.fileSize && + !this.#info.skipPart ) { const limit = this.#opts.limits.fileSize const current = this.#info.limits.fileSize @@ -301,10 +306,15 @@ class FormDataParser extends Writable { limitedChunk = chunk } + this.#info.limits.files += 1 this.#info.limits.fileSize += length + this.#info.stream.push(limitedChunk) this.#info.stream.destroy() - } else if (this.#info.limits.fieldSize < this.#opts.limits.fieldSize) { + } else if ( + this.#info.limits.fieldSize < this.#opts.limits.fieldSize && + !this.#info.skipPart + ) { const { headers, attributes, body } = this.#info body.chunks.push(chunk) @@ -340,6 +350,8 @@ class FormDataParser extends Writable { body.chunks.length = 0 body.length = 0 + } else if (this.#info.skipPart === 'file') { + this.emit('filesLimit') } this.#buffers.unshift(rest) diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 7a4272ef1e2..958f093034e 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 4)) { +for (const test of tests.slice(0, 5)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 4)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 4)) { +for (let test of tests.slice(0, 5)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 084a5e356879bdb75ba35d27c677c59b2cc91667 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 17:50:32 -0500 Subject: [PATCH 13/36] fix: parse filename base and fix filename w/ backslashes --- lib/formdata/parser.js | 17 ++++++++++------- lib/formdata/util.js | 25 +++++++++++++++++++++++++ test/busboy/test-types-multipart.js | 4 ++-- 3 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 lib/formdata/util.js diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 350fd91d252..fd5b766a5bf 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -1,12 +1,13 @@ 'use strict' const { Writable } = require('stream') +const { basename } = require('path') const { states, chars, headerStates, emptyBuffer } = require('./constants') const { FileStream } = require('./filestream') +const { collectHTTPQuotedStringLenient } = require('./util') const { parseMIMEType, - collectASequenceOfCodePoints, - collectAnHTTPQuotedString + collectASequenceOfCodePoints } = require('../fetch/dataURL') class FormDataParser extends Writable { @@ -510,12 +511,14 @@ class FormDataParser extends Writable { const attribute = collected.toString(this.#opts.defParamCharset) const equalIdx = attribute.indexOf('=') - const name = attribute.slice(0, equalIdx).trim() - const value = attribute[equalIdx + 1] === '"' - ? collectAnHTTPQuotedString(attribute.slice(equalIdx + 1), { position: 0 }, true) - : attribute.slice(equalIdx + 1) + const name = attribute.slice(0, equalIdx).trim().toLowerCase() + const value = collectHTTPQuotedStringLenient(attribute.slice(equalIdx + 1)) - attributes[name] = value + if (name === 'filename') { + attributes[name] = basename(value) + } else { + attributes[name] = value + } if (buffer[position.position] === chars.cr) { break diff --git a/lib/formdata/util.js b/lib/formdata/util.js new file mode 100644 index 00000000000..5bf619903c9 --- /dev/null +++ b/lib/formdata/util.js @@ -0,0 +1,25 @@ +'use strict' + +const { collectASequenceOfCodePoints } = require('../fetch/dataURL') + +/** + * Collect a string inside of quotes. Unlike + * `collectAnHTTPQuotedString` from dataURL utilities, + * this one does not remove backslashes. + * @param {string} str + */ +function collectHTTPQuotedStringLenient (str) { + if (str[0] !== '"') { + return str + } + + return collectASequenceOfCodePoints( + (char) => char !== '"', + str, + { position: 1 } + ) +} + +module.exports = { + collectHTTPQuotedStringLenient +} diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 958f093034e..9bdf81eb1f9 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 5)) { +for (const test of tests.slice(0, 7)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 5)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 5)) { +for (let test of tests.slice(0, 7)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 771fa9de8de4036ba6c948980446926ddbd0d49b Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 17:55:24 -0500 Subject: [PATCH 14/36] fix: only parse path if preservePath is false --- lib/formdata/parser.js | 3 ++- test/busboy/test-types-multipart.js | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index fd5b766a5bf..f06ed5378ab 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -59,6 +59,7 @@ class FormDataParser extends Writable { this.#opts.defCharset = opts?.defCharset ?? 'utf8' this.#opts.defParamCharset = opts?.defParamCharset ?? 'utf8' + this.#opts.preservePath = !!opts?.preservePath this.#opts.limits = { fieldNameSize: opts?.limits?.fieldNameSize ?? 100, fieldSize: opts?.limits?.fieldSize ?? 1000 * 1000, @@ -515,7 +516,7 @@ class FormDataParser extends Writable { const value = collectHTTPQuotedStringLenient(attribute.slice(equalIdx + 1)) if (name === 'filename') { - attributes[name] = basename(value) + attributes[name] = this.#opts.preservePath ? value : basename(value) } else { attributes[name] = value } diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 9bdf81eb1f9..18fa9b313f3 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 7)) { +for (const test of tests.slice(0, 8)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 7)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 7)) { +for (let test of tests.slice(0, 8)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 8301ce896bbe24a477f2f53a930b5eb9672f4bfc Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 7 Jan 2023 18:29:56 -0500 Subject: [PATCH 15/36] fix: filename parsing and filename* parsing --- lib/formdata/parser.js | 54 ++++++++++++++++++++++------- lib/formdata/util.js | 16 ++++++++- test/busboy/test-types-multipart.js | 4 +-- 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index f06ed5378ab..88bd4f6ad43 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -4,10 +4,14 @@ const { Writable } = require('stream') const { basename } = require('path') const { states, chars, headerStates, emptyBuffer } = require('./constants') const { FileStream } = require('./filestream') -const { collectHTTPQuotedStringLenient } = require('./util') +const { + collectHTTPQuotedStringLenient, + findCharacterType +} = require('./util') const { parseMIMEType, - collectASequenceOfCodePoints + collectASequenceOfCodePoints, + stringPercentDecode } = require('../fetch/dataURL') class FormDataParser extends Writable { @@ -114,6 +118,8 @@ class FormDataParser extends Writable { return callback() } + this.#info.skipPart = '' + const bytes = this.consume(this.#boundary.length) const nextBytes = this.consume(2) // \r\n OR -- @@ -195,8 +201,11 @@ class FormDataParser extends Writable { Buffer.concat(headers, headersLength) ) - if ( + if (this.#info.skipPart === 'missing content-disposition') { + continue + } else if ( attributes.filename !== undefined || + attributes['filename*'] !== undefined || receivedHeaders['content-type'] === 'application/octet-stream' ) { if (this.#info.limits.files >= this.#opts.limits.files) { @@ -212,9 +221,9 @@ class FormDataParser extends Writable { attributes.name, // name is a required attribute this.#info.stream, { - filename: attributes.filename ?? '', + filename: attributes.filename ?? attributes['filename*'] ?? '', encoding: receivedHeaders['content-transfer-encoding'] ?? '7bit', - mimeType: receivedHeaders['content-type'] ?? 'application/octet-stream' + mimeType: receivedHeaders['content-type'] || 'application/octet-stream' } ) } else { @@ -346,7 +355,7 @@ class FormDataParser extends Writable { nameTruncated: false, valueTruncated, encoding: headers['content-transfer-encoding'] ?? '7bit', - mimeType: headers['content-type'] ?? 'text/plain' + mimeType: headers['content-type'] || 'text/plain' } ) @@ -515,8 +524,32 @@ class FormDataParser extends Writable { const name = attribute.slice(0, equalIdx).trim().toLowerCase() const value = collectHTTPQuotedStringLenient(attribute.slice(equalIdx + 1)) - if (name === 'filename') { - attributes[name] = this.#opts.preservePath ? value : basename(value) + if (name === 'filename*') { + // https://www.rfc-editor.org/rfc/rfc6266#section-4.3 + + // This attribute is split into 3 parts: + // 1. The character type (ie. utf-8) + // 2. The optional language tag (ie. en or ''). This value is in single quotes. + // 3. The percent encoded name. + + const quote1 = value.indexOf('\'') + const quote2 = value.indexOf('\'', quote1 + 1) + + const characterType = findCharacterType(value.slice(0, quote1)) + const percentEncoded = value.slice(quote2 + 1) + + attributes['filename*'] = new TextDecoder(characterType).decode( + stringPercentDecode(percentEncoded) + ) + + attributes.filename ??= undefined + } else if (name === 'filename') { + // Therefore, when both + // "filename" and "filename*" are present in a single header field + // value, recipients SHOULD pick "filename*" and ignore "filename". + if (!attributes['filename*']) { + attributes[name] = this.#opts.preservePath ? value : basename(value) + } } else { attributes[name] = value } @@ -531,13 +564,10 @@ class FormDataParser extends Writable { } // Validate the header values here - // TODO: match busboy's behavior with error handling if (headers['content-disposition'] !== 'form-data') { - this.destroy(new Error('received invalid content-disposition header')) - return + this.#info.skipPart = 'missing content-disposition' } else if (attributes.name === undefined) { this.destroy(new Error('Content-Disposition had no name attribute')) - return } return { headers, attributes } diff --git a/lib/formdata/util.js b/lib/formdata/util.js index 5bf619903c9..56edb482688 100644 --- a/lib/formdata/util.js +++ b/lib/formdata/util.js @@ -20,6 +20,20 @@ function collectHTTPQuotedStringLenient (str) { ) } +function findCharacterType (type) { + switch (type) { + case 'utf8': + case 'utf-8': + return 'utf-8' + case 'latin1': + case 'ascii': + return 'latin1' + default: + return type.toLowerCase() + } +} + module.exports = { - collectHTTPQuotedStringLenient + collectHTTPQuotedStringLenient, + findCharacterType } diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 18fa9b313f3..d65b3c6bcb9 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 8)) { +for (const test of tests.slice(0, 10)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 8)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 8)) { +for (let test of tests.slice(0, 10)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 4c40486abe6bd77b94a182b8067d389a49d8f40d Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sun, 8 Jan 2023 10:39:54 -0500 Subject: [PATCH 16/36] fix: actively parse headers, rather than lazily --- lib/formdata/parser.js | 95 ++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 88bd4f6ad43..c0b48ff3bc1 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -26,7 +26,6 @@ class FormDataParser extends Writable { #info = { headerState: headerStates.DEFAULT, - rawHeaders: null, stream: null, body: { chunks: [], @@ -41,6 +40,9 @@ class FormDataParser extends Writable { files: 0, parts: 0, headerPairs: 0 + }, + headers: { + attributes: {} } } @@ -153,22 +155,16 @@ class FormDataParser extends Writable { let bytesToRemove = 0 done: { // eslint-disable-line no-labels - for (const buffer of this.#buffers) { - for (const byte of buffer) { - const state = this.#info.headerState + this.#info.headerState = headerStates.DEFAULT - if (byte !== chars.cr && byte !== chars.lf) { - // Some pieces may have multiple headers, separated by \r\n. - // We need to also consider those bytes. - if (state === headerStates.FIRST) { // \r - bytesToRemove += 1 - } else if (state === headerStates.SECOND) { // \r\n - bytesToRemove += 2 - } + for (let j = 0; j < this.#buffers.length; j++) { + const buffer = this.#buffers[j] - bytesToRemove += 1 - this.#info.headerState = headerStates.DEFAULT - } else if (byte === chars.cr && state === headerStates.DEFAULT) { + for (let i = 0; i < buffer.length; i++) { + const byte = buffer[i] + const state = this.#info.headerState + + if (byte === chars.cr && state === headerStates.DEFAULT) { this.#info.headerState = headerStates.FIRST } else if (byte === chars.lf && state === headerStates.FIRST) { this.#info.headerState = headerStates.SECOND @@ -177,36 +173,49 @@ class FormDataParser extends Writable { } else if (byte === chars.lf && state === headerStates.THIRD) { // Got \r\n\r\n which marks the end of the headers. this.#state = states.READ_BODY + bytesToRemove += i + + const removed = this.consume(bytesToRemove - 3) // remove \r\n\r + + this.#parseRawHeaders(removed) + this.consume(4) break done // eslint-disable-line no-labels + } else { + this.#info.headerState = headerStates.DEFAULT + + if (state === headerStates.SECOND) { + if (bytesToRemove === 0) { + continue + } + + const removed = this.consume(bytesToRemove - 2) // remove \r\n + + this.#parseRawHeaders(removed) + this.consume(2) + bytesToRemove -= removed.length + } } } + + bytesToRemove += buffer.length } return callback() } - const headers = this.#info.rawHeaders ?? [] - let headersLength = 0 - - if (bytesToRemove > 0) { - const removed = this.consume(bytesToRemove) - - headers.push(removed) - headersLength += removed.length - } - - this.consume(4) // remove \r\n\r\n - const { headers: receivedHeaders, attributes } = this.#parseRawHeaders( - Buffer.concat(headers, headersLength) - ) + const { attributes, ...headers } = this.#info.headers - if (this.#info.skipPart === 'missing content-disposition') { - continue + if ( + headers['content-disposition'] !== 'form-data' || + !attributes.name + ) { + // https://www.rfc-editor.org/rfc/rfc7578#section-4.2 + this.#info.skipPart = 'invalid content-disposition' } else if ( attributes.filename !== undefined || attributes['filename*'] !== undefined || - receivedHeaders['content-type'] === 'application/octet-stream' + headers['content-type'] === 'application/octet-stream' ) { if (this.#info.limits.files >= this.#opts.limits.files) { this.#info.skipPart = 'file' @@ -222,13 +231,10 @@ class FormDataParser extends Writable { this.#info.stream, { filename: attributes.filename ?? attributes['filename*'] ?? '', - encoding: receivedHeaders['content-transfer-encoding'] ?? '7bit', - mimeType: receivedHeaders['content-type'] || 'application/octet-stream' + encoding: headers['content-transfer-encoding'] ?? '7bit', + mimeType: headers['content-type'] || 'application/octet-stream' } ) - } else { - this.#info.headers = receivedHeaders - this.#info.attributes = attributes } } else if (this.#state === states.READ_BODY) { // A part's body can contain CRLFs so they cannot be used to @@ -326,7 +332,7 @@ class FormDataParser extends Writable { this.#info.limits.fieldSize < this.#opts.limits.fieldSize && !this.#info.skipPart ) { - const { headers, attributes, body } = this.#info + const { headers: { attributes, ...headers }, body } = this.#info body.chunks.push(chunk) body.length += chunk.length @@ -420,12 +426,10 @@ class FormDataParser extends Writable { * @param {Buffer} buffer */ #parseRawHeaders (buffer) { - this.#info.rawHeaders = null - // https://www.rfc-editor.org/rfc/rfc7578#section-4.8 - const headers = {} - const attributes = {} + const headers = this.#info.headers + const attributes = this.#info.headers.attributes const position = { position: 0 } @@ -563,13 +567,6 @@ class FormDataParser extends Writable { position.position += 2 // skip \r\n } - // Validate the header values here - if (headers['content-disposition'] !== 'form-data') { - this.#info.skipPart = 'missing content-disposition' - } else if (attributes.name === undefined) { - this.destroy(new Error('Content-Disposition had no name attribute')) - } - return { headers, attributes } } } From 6c1b276d6e38c48ce5134faeaced92385bd1f34e Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sun, 8 Jan 2023 16:49:11 -0500 Subject: [PATCH 17/36] fix(finally): validate header name and fix parser issues --- lib/formdata/parser.js | 30 ++++++++++++++++++++++------- test/busboy/test-types-multipart.js | 4 ++-- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index c0b48ff3bc1..d446f40ebbb 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -185,15 +185,17 @@ class FormDataParser extends Writable { this.#info.headerState = headerStates.DEFAULT if (state === headerStates.SECOND) { - if (bytesToRemove === 0) { + const removed = this.consume(bytesToRemove + i - 2) + + if (this.#info.skipPart) { + // If we are meant to skip this part, we shouldn't + // parse the headers. We just need to consume the bytes. continue } - const removed = this.consume(bytesToRemove - 2) // remove \r\n - this.#parseRawHeaders(removed) this.consume(2) - bytesToRemove -= removed.length + bytesToRemove -= removed.length + 2 } } } @@ -207,8 +209,11 @@ class FormDataParser extends Writable { const { attributes, ...headers } = this.#info.headers if ( - headers['content-disposition'] !== 'form-data' || - !attributes.name + !this.#info.skipPart && + ( + headers['content-disposition'] !== 'form-data' || + !attributes.name + ) ) { // https://www.rfc-editor.org/rfc/rfc7578#section-4.2 this.#info.skipPart = 'invalid content-disposition' @@ -440,8 +445,19 @@ class FormDataParser extends Writable { position ) - // Invalid header; has no value if (position.position >= buffer.length) { + // Invalid header; doesn't contain a delimiter + break + } else if (nameBuffer.length === 0) { + // https://www.rfc-editor.org/rfc/rfc9110.html#section-5.1-2 + // field-name = token + // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / + // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA + // token = 1*tchar + + // The field name MUST be at least 1 character long. + this.#info.skipPart = 'malformed part header' + this.emit('error', new Error('Malformed part header')) break } diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index d65b3c6bcb9..0122c7caf89 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 10)) { +for (const test of tests.slice(0, 11)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 10)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 10)) { +for (let test of tests.slice(0, 11)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 601a4ea1501d285c8d6fe7818dfae916edd3125b Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sun, 8 Jan 2023 20:03:52 -0500 Subject: [PATCH 18/36] fix: send leftover buffers to filestream on destroy, cleanup --- lib/formdata/parser.js | 17 +++++++++++++++++ test/busboy/test-types-multipart.js | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index d446f40ebbb..7f1209aa4c9 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -97,6 +97,21 @@ class FormDataParser extends Writable { } } + _destroy (err, cb) { + if (this.#info.stream) { + // Push any leftover buffers to the filestream + for (const leftover of this.#buffers) { + this.#info.stream.push(leftover) + } + + // Emit an error event on the file stream if there is one + this.#info.stream.destroy(err) + this.#info.stream = null + } + + cb(err) + } + /** * @param {() => void} callback */ @@ -227,6 +242,8 @@ class FormDataParser extends Writable { continue } + // End the active stream if there is one + this.#info.stream?.push(null) this.#info.stream = new FileStream() this.#info.stream.truncated = false // TODO diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 0122c7caf89..7cd440e2f19 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 11)) { +for (const test of tests.slice(0, 15)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 11)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 11)) { +for (let test of tests.slice(0, 15)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From cd6dd4bd2ce31e755f01796d9b7884e15e89d207 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sun, 8 Jan 2023 20:09:12 -0500 Subject: [PATCH 19/36] fix: content-type headers cannot contain ; --- lib/formdata/parser.js | 12 ++++-------- test/busboy/test-types-multipart.js | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 7f1209aa4c9..132fe702011 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -511,14 +511,10 @@ class FormDataParser extends Writable { const value = collectASequenceOfCodePoints( (char) => { - if (name === 'content-disposition') { - // mimetypes (ie. from a content-type header) can have - // a semicolon in it. In that case, we want to read the - // entire value in, not just until we reach a semicolon - return char !== chars[';'] - } - - return char !== chars.cr && buffer[position.position + 1] !== chars.lf + return ( + char !== chars[';'] && + (char !== chars.cr && buffer[position.position + 1] !== chars.lf) + ) }, buffer, position diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 7cd440e2f19..4549d712707 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 15)) { +for (const test of tests.slice(0, 16)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 15)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 15)) { +for (let test of tests.slice(0, 16)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 52b4b0a37e335a395ac8815d21dedf8d5f3522e7 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sun, 8 Jan 2023 20:45:40 -0500 Subject: [PATCH 20/36] enable test --- test/busboy/test-types-multipart.js | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 4549d712707..30c045aaabc 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -618,7 +618,17 @@ const tests = [ info: { filename: 'notes.txt', encoding: '7bit', - mimeType: 'text/plain' + // TODO(@KhafraDev): This behavior is correct (the value should be text/plain), + // but the added complexity of removing leading/trailing whitespace would make + // this very challenging. Note that a header field value must not be empty, + // which will be used to allow such headers to be parsed. + // + // https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5 + // A field value does not include leading or trailing whitespace. When + // a specific version of HTTP allows such whitespace to appear in a + // message, a field parsing implementation MUST exclude such whitespace + // prior to evaluating the field value. + mimeType: 'application/octet-stream' // 'text/plain' }, limited: false } @@ -931,7 +941,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 16)) { +for (const test of tests.slice(0, 18)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1023,7 @@ for (const test of tests.slice(0, 16)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 16)) { +for (let test of tests.slice(0, 18)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From d33ac7e3f01f9403073027442302040859f2149a Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 9 Jan 2023 13:55:30 -0500 Subject: [PATCH 21/36] fix: content-type header with leading whitespace --- lib/formdata/parser.js | 11 +++++++++++ test/busboy/test-types-multipart.js | 12 +----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 132fe702011..f06e7869e55 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -191,6 +191,17 @@ class FormDataParser extends Writable { bytesToRemove += i const removed = this.consume(bytesToRemove - 3) // remove \r\n\r + const headers = this.#info.headers + + // This is a very lazy implementation. + if ('content-type' in headers && headers['content-type'].length === 0) { + const mimeUnparsed = removed.toString(this.#opts.defCharset) + const mimeType = parseMIMEType(mimeUnparsed) + + if (mimeType !== 'failure') { + headers['content-type'] = mimeType.essence + } + } this.#parseRawHeaders(removed) this.consume(4) diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 30c045aaabc..eaab0f00831 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -618,17 +618,7 @@ const tests = [ info: { filename: 'notes.txt', encoding: '7bit', - // TODO(@KhafraDev): This behavior is correct (the value should be text/plain), - // but the added complexity of removing leading/trailing whitespace would make - // this very challenging. Note that a header field value must not be empty, - // which will be used to allow such headers to be parsed. - // - // https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5 - // A field value does not include leading or trailing whitespace. When - // a specific version of HTTP allows such whitespace to appear in a - // message, a field parsing implementation MUST exclude such whitespace - // prior to evaluating the field value. - mimeType: 'application/octet-stream' // 'text/plain' + mimeType: 'text/plain' }, limited: false } From a2bcfd60555fc1e9c5ff473ce459f8086ec09c70 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 9 Jan 2023 14:07:07 -0500 Subject: [PATCH 22/36] fix: don't emit field event w/o listeners --- lib/formdata/parser.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index f06e7869e55..2f7efd6f679 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -89,7 +89,7 @@ class FormDataParser extends Writable { } /** - * @param {(error?: Error | null) => void)} cb + * @param {((error?: Error | null) => void)} cb */ _final (cb) { if (!this.#info.complete) { @@ -97,6 +97,10 @@ class FormDataParser extends Writable { } } + /** + * @param {Error|null} err + * @param {((error?: Error|null) => void)} cb + */ _destroy (err, cb) { if (this.#info.stream) { // Push any leftover buffers to the filestream @@ -363,7 +367,8 @@ class FormDataParser extends Writable { this.#info.stream.destroy() } else if ( this.#info.limits.fieldSize < this.#opts.limits.fieldSize && - !this.#info.skipPart + !this.#info.skipPart && + this.listenerCount('field') > 0 ) { const { headers: { attributes, ...headers }, body } = this.#info From 82ea7a3ea98cd7b2ca3e4849ec8c8c3d9aa52e9b Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 9 Jan 2023 14:07:24 -0500 Subject: [PATCH 23/36] fix: don't emit field event w/o listeners --- test/busboy/test-types-multipart.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index eaab0f00831..df99f041465 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 18)) { +for (const test of tests.slice(0, 19)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 18)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 18)) { +for (let test of tests.slice(0, 19)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 0d9905456b06211586ef545ea8bed870b8ad5681 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 9 Jan 2023 14:18:42 -0500 Subject: [PATCH 24/36] feat: implement parts limit --- lib/formdata/parser.js | 6 ++++++ test/busboy/test-types-multipart.js | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 2f7efd6f679..3e9702dd804 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -158,8 +158,14 @@ class FormDataParser extends Writable { this.destroy(new Error('Boundary did not end with CRLF')) return + } else if (this.#info.limits.parts >= this.#opts.limits.parts) { + this.emit('partsLimit') + this.#info.skipPart = 'partsLimit' + this.#state = states.READ_BODY + continue } + this.#info.limits.parts++ this.#state = states.READ_HEADERS } else if (this.#state === states.READ_HEADERS) { // There can be an arbitrary amount of headers and each one has an diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index df99f041465..1051f0e6169 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 19)) { +for (const test of tests.slice(0, 20)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 19)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 19)) { +for (let test of tests.slice(0, 20)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 50669a6d2642083c728db3bfb8e3468aaf294c74 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 9 Jan 2023 14:28:48 -0500 Subject: [PATCH 25/36] feat: implement fields limit --- lib/formdata/parser.js | 18 ++++++++++++++++++ test/busboy/test-types-multipart.js | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 3e9702dd804..a9331671784 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -376,6 +376,23 @@ class FormDataParser extends Writable { !this.#info.skipPart && this.listenerCount('field') > 0 ) { + if (this.#info.limits.fields >= this.#opts.limits.fields) { + this.#info.skipPart = 'fieldsLimit' + this.emit('fieldsLimit') + + this.#state = states.BOUNDARY + + // Remove all body chunks for the current field + this.#info.body.chunks.length = 0 + this.#info.body.length = 0 + + // Add the rest of the bytes back to the buffer to parse + this.#buffers.unshift(rest) + this.#byteOffset += rest.length + + continue + } + const { headers: { attributes, ...headers }, body } = this.#info body.chunks.push(chunk) @@ -409,6 +426,7 @@ class FormDataParser extends Writable { } ) + this.#info.limits.fields++ body.chunks.length = 0 body.length = 0 } else if (this.#info.skipPart === 'file') { diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 1051f0e6169..25812016b9c 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 20)) { +for (const test of tests.slice(0, 21)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 20)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 20)) { +for (let test of tests.slice(0, 21)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 5dd8b4532468518c2db1dfd4e0dfc71f167d0ac2 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 9 Jan 2023 17:01:24 -0500 Subject: [PATCH 26/36] fix: limit header length to 16 KiB --- lib/formdata/constants.js | 5 ++++- lib/formdata/parser.js | 9 ++++++++- test/busboy/test-types-multipart.js | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/formdata/constants.js b/lib/formdata/constants.js index 1bac7259edb..ac03c3839e2 100644 --- a/lib/formdata/constants.js +++ b/lib/formdata/constants.js @@ -27,9 +27,12 @@ const chars = { const emptyBuffer = Buffer.alloc(0) +const maxHeaderLength = 16 * 1024 + module.exports = { states, chars, headerStates, - emptyBuffer + emptyBuffer, + maxHeaderLength } diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index a9331671784..c0b6de5b53a 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -2,7 +2,7 @@ const { Writable } = require('stream') const { basename } = require('path') -const { states, chars, headerStates, emptyBuffer } = require('./constants') +const { states, chars, headerStates, emptyBuffer, maxHeaderLength } = require('./constants') const { FileStream } = require('./filestream') const { collectHTTPQuotedStringLenient, @@ -493,6 +493,13 @@ class FormDataParser extends Writable { const headers = this.#info.headers const attributes = this.#info.headers.attributes + if (buffer.length > maxHeaderLength) { + this.#info.skipPart = 'malformed part header' + this.emit('error', new Error('Malformed part header')) + + return { headers, attributes } + } + const position = { position: 0 } while (position.position < buffer.length) { diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 25812016b9c..7249e329d16 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -931,7 +931,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 21)) { +for (const test of tests.slice(0, 22)) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1013,7 @@ for (const test of tests.slice(0, 21)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 21)) { +for (let test of tests.slice(0, 22)) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From 13f3cb5e0f914368ec16a1e189eaaabba955efcc Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Mon, 9 Jan 2023 17:41:49 -0500 Subject: [PATCH 27/36] omg every test passes --- lib/formdata/parser.js | 11 ++++++++--- test/busboy/test-types-multipart.js | 6 ++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index c0b6de5b53a..dc79736c0d6 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -128,12 +128,17 @@ class FormDataParser extends Writable { const bytes = this.consume(2) - if (bytes[0] !== chars['-'] || bytes[1] !== chars['-']) { - this.destroy(new Error('FormData should start with --')) + if ( + (bytes[0] !== chars['-'] || bytes[1] !== chars['-']) && + (bytes[0] !== chars.cr || bytes[1] !== chars.lf) + ) { + this.destroy(new Error('FormData should start with -- or CRLF')) return } - this.#state = states.BOUNDARY + if (bytes[0] === chars['-']) { + this.#state = states.BOUNDARY + } } else if (this.#state === states.BOUNDARY) { if (this.#byteOffset < this.#boundary.length + 2) { return callback() diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index 7249e329d16..bf2edd65fd8 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -781,6 +781,8 @@ const tests = [ what: 'Files limit' }, { + // Note: this test is very slow because we need to write > 64 KiB + // of data one byte at a time. source: [ ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', 'Content-Disposition: form-data; ' + @@ -931,7 +933,7 @@ const tests = [ } ] -for (const test of tests.slice(0, 22)) { +for (const test of tests) { active.set(test, 1) const { what, boundary, events, limits, preservePath, fileHwm } = test @@ -1013,7 +1015,7 @@ for (const test of tests.slice(0, 22)) { } // Byte-by-byte versions -for (let test of tests.slice(0, 22)) { +for (let test of tests) { test = { ...test } test.what += ' (byte-by-byte)' active.set(test, 1) From ed92a8246a8dd02ff3a4f4805c102c08e175150d Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Tue, 10 Jan 2023 00:36:28 -0500 Subject: [PATCH 28/36] fix: use path.win32.basename --- lib/formdata/parser.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index dc79736c0d6..4a823124418 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -1,7 +1,7 @@ 'use strict' const { Writable } = require('stream') -const { basename } = require('path') +const { win32: { basename } } = require('path') const { states, chars, headerStates, emptyBuffer, maxHeaderLength } = require('./constants') const { FileStream } = require('./filestream') const { From dd01bd39d4a19ce2e67a62842c1c3a576a19d3af Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Tue, 10 Jan 2023 01:52:27 -0500 Subject: [PATCH 29/36] fix: add types & highwatermark options --- index.d.ts | 1 + lib/formdata/parser.js | 5 +- types/formdataparser.d.ts | 111 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 types/formdataparser.d.ts diff --git a/index.d.ts b/index.d.ts index d67de97241d..ba0f0909d72 100644 --- a/index.d.ts +++ b/index.d.ts @@ -21,6 +21,7 @@ export * from './types/fetch' export * from './types/file' export * from './types/filereader' export * from './types/formdata' +export * from './types/formdataparser' export * from './types/diagnostics-channel' export * from './types/websocket' export * from './types/content-type' diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 4a823124418..7c2c3c0ff57 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -49,7 +49,7 @@ class FormDataParser extends Writable { #opts = {} constructor (opts) { - super(opts) + super({ highWaterMark: opts?.highWaterMark }) const contentType = opts.headers.get?.('content-type') ?? opts.headers['content-type'] const mimeType = contentType ? parseMIMEType(contentType) : null @@ -65,6 +65,7 @@ class FormDataParser extends Writable { this.#opts.defCharset = opts?.defCharset ?? 'utf8' this.#opts.defParamCharset = opts?.defParamCharset ?? 'utf8' + this.#opts.fileHwm = opts?.fileHwm this.#opts.preservePath = !!opts?.preservePath this.#opts.limits = { fieldNameSize: opts?.limits?.fieldNameSize ?? 100, @@ -270,7 +271,7 @@ class FormDataParser extends Writable { // End the active stream if there is one this.#info.stream?.push(null) - this.#info.stream = new FileStream() + this.#info.stream = new FileStream({ highWaterMark: this.#opts.fileHwm }) this.#info.stream.truncated = false // TODO this.emit( diff --git a/types/formdataparser.d.ts b/types/formdataparser.d.ts new file mode 100644 index 00000000000..ac2e8149c9f --- /dev/null +++ b/types/formdataparser.d.ts @@ -0,0 +1,111 @@ +// @ts-check + +/// + +import { Writable, Readable } from 'stream' +import { IncomingHttpHeaders } from 'http' +import { Headers } from './fetch' + +export interface FormDataParserConfig { + headers: IncomingHttpHeaders | Headers + highWaterMark?: number + fileHwm?: number + defCharset?: string + defParamCharset?: string + preservePath?: boolean + limits?: { + fieldNameSize?: number + fieldSize?: number + fields?: number + fileSize?: number + files?: number + parts?: number + headerPairs?: number + } +} + +interface FormDataParserErrors { + file: ( + name: string, + stream: Readable, + info: { + fileName: string, + stream: FileStream, + info: { + filename: string, + encoding: string, + mimeType: string + } + } + ) => void + + field: ( + name: string, + value: string, + info: { + name: string, + data: string, + info: { + nameTruncated: boolean, + valueTruncated: boolean, + encoding: string, + mimeType: string + } + } + ) => void + + partsLimit: () => void + + filesLimit: () => void + + fieldsLimit: () => void + + limit: () => void + + error: (error: Error) => void + + close: () => void +} + +export declare class FileStream extends Readable {} + +export declare class FormDataParser extends Writable { + constructor (opts: FormDataParserConfig) + + private run (cb: (err?: Error) => void): void + + addListener< + T extends keyof FormDataParserErrors + >(event: T, listener: FormDataParserErrors[T]): this + addListener(event: string | symbol, listener: (...args: any[]) => void): this + + on< + T extends keyof FormDataParserErrors + >(event: T, listener: FormDataParserErrors[T]): this + on(event: string | symbol, listener: (...args: any[]) => void): this + + once< + T extends keyof FormDataParserErrors + >(event: T, listener: FormDataParserErrors[T]): this + once(event: string | symbol, listener: (...args: any[]) => void): this + + removeListener< + T extends keyof FormDataParserErrors + >(event: T, listener: FormDataParserErrors[T]): this + removeListener(event: string | symbol, listener: (...args: any[]) => void): this + + off< + T extends keyof FormDataParserErrors + >(event: T, listener: FormDataParserErrors[T]): this + off(event: string | symbol, listener: (...args: any[]) => void): this + + prependListener< + T extends keyof FormDataParserErrors + >(event: T, listener: FormDataParserErrors[T]): this + prependListener(event: string | symbol, listener: (...args: any[]) => void): this + + prependOnceListener< + T extends keyof FormDataParserErrors + >(event: T, listener: FormDataParserErrors[T]): this + prependOnceListener(event: string | symbol, listener: (...args: any[]) => void): this +} From 787b679897583305f79c903b080fe64060a25ccc Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Tue, 10 Jan 2023 23:07:32 -0500 Subject: [PATCH 30/36] fix: speedup header parsing & bug fixes & nyc coverage --- lib/formdata/constants.js | 5 +- lib/formdata/parser.js | 153 +++++++++++++++------------- lib/formdata/textsearch.js | 47 +++++++++ lib/formdata/util.js | 18 +++- package.json | 1 + test/busboy/test-types-multipart.js | 2 - 6 files changed, 150 insertions(+), 76 deletions(-) create mode 100644 lib/formdata/textsearch.js diff --git a/lib/formdata/constants.js b/lib/formdata/constants.js index ac03c3839e2..ef8d2a80a26 100644 --- a/lib/formdata/constants.js +++ b/lib/formdata/constants.js @@ -27,6 +27,8 @@ const chars = { const emptyBuffer = Buffer.alloc(0) +const crlfBuffer = Buffer.from('\r\n') + const maxHeaderLength = 16 * 1024 module.exports = { @@ -34,5 +36,6 @@ module.exports = { chars, headerStates, emptyBuffer, - maxHeaderLength + maxHeaderLength, + crlfBuffer } diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 7c2c3c0ff57..d4df1755d76 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -2,12 +2,15 @@ const { Writable } = require('stream') const { win32: { basename } } = require('path') -const { states, chars, headerStates, emptyBuffer, maxHeaderLength } = require('./constants') +const { states, chars, headerStates, emptyBuffer, maxHeaderLength, crlfBuffer } = require('./constants') const { FileStream } = require('./filestream') const { collectHTTPQuotedStringLenient, - findCharacterType + findCharacterType, + hasHeaderValue, + isWhitespace } = require('./util') +const { TextSearch } = require('./textsearch') const { parseMIMEType, collectASequenceOfCodePoints, @@ -43,13 +46,19 @@ class FormDataParser extends Writable { }, headers: { attributes: {} - } + }, + headerEndSearch: new TextSearch(Buffer.from('\r\n\r\n')), + headerBuffersChecked: 0 } #opts = {} constructor (opts) { - super({ highWaterMark: opts?.highWaterMark }) + super({ + highWaterMark: opts?.highWaterMark, + autoDestroy: true, + emitClose: true + }) const contentType = opts.headers.get?.('content-type') ?? opts.headers['content-type'] const mimeType = contentType ? parseMIMEType(contentType) : null @@ -114,6 +123,10 @@ class FormDataParser extends Writable { this.#info.stream = null } + if (this.#info.headerEndSearch.lookedAt) { + this.emit('error', new Error('Malformed part header')) + } + cb(err) } @@ -178,76 +191,60 @@ class FormDataParser extends Writable { // arbitrary length. Therefore it's easier to read the headers, and // then parse once we receive 2 CRLFs which marks the body's start. - if (this.#byteOffset < 4) { + if (this.#byteOffset < crlfBuffer.length * 2) { return callback() } - // number of bytes to remove from the last buffer - let bytesToRemove = 0 - - done: { // eslint-disable-line no-labels - this.#info.headerState = headerStates.DEFAULT - - for (let j = 0; j < this.#buffers.length; j++) { - const buffer = this.#buffers[j] - - for (let i = 0; i < buffer.length; i++) { - const byte = buffer[i] - const state = this.#info.headerState - - if (byte === chars.cr && state === headerStates.DEFAULT) { - this.#info.headerState = headerStates.FIRST - } else if (byte === chars.lf && state === headerStates.FIRST) { - this.#info.headerState = headerStates.SECOND - } else if (byte === chars.cr && state === headerStates.SECOND) { - this.#info.headerState = headerStates.THIRD - } else if (byte === chars.lf && state === headerStates.THIRD) { - // Got \r\n\r\n which marks the end of the headers. - this.#state = states.READ_BODY - bytesToRemove += i - - const removed = this.consume(bytesToRemove - 3) // remove \r\n\r - const headers = this.#info.headers - - // This is a very lazy implementation. - if ('content-type' in headers && headers['content-type'].length === 0) { - const mimeUnparsed = removed.toString(this.#opts.defCharset) - const mimeType = parseMIMEType(mimeUnparsed) - - if (mimeType !== 'failure') { - headers['content-type'] = mimeType.essence - } - } - - this.#parseRawHeaders(removed) - this.consume(4) - - break done // eslint-disable-line no-labels - } else { - this.#info.headerState = headerStates.DEFAULT - - if (state === headerStates.SECOND) { - const removed = this.consume(bytesToRemove + i - 2) - - if (this.#info.skipPart) { - // If we are meant to skip this part, we shouldn't - // parse the headers. We just need to consume the bytes. - continue - } - - this.#parseRawHeaders(removed) - this.consume(2) - bytesToRemove -= removed.length + 2 - } - } - } + let i = this.#info.headerBuffersChecked + + while (!this.#info.headerEndSearch.finished && i < this.#buffers.length) { + this.#info.headerEndSearch.write(this.#buffers[i++]) + this.#info.headerBuffersChecked++ - bytesToRemove += buffer.length + if (this.#info.headerEndSearch.lookedAt > maxHeaderLength) { + this.#info.skipPart = 'Malformed part header' + break } + } + if (!this.#info.headerEndSearch.finished) { return callback() } + const consumed = this.consume(this.#info.headerEndSearch.lookedAt) + + this.#info.headerBuffersChecked = 0 + this.#info.headerEndSearch.reset() + + let index = consumed.indexOf(crlfBuffer) + let start = 0 + + while (index !== -1 && start < index) { + const buffer = consumed.subarray(start, index) + const valueIndex = buffer.indexOf(':') + + if (valueIndex === -1) { + index = consumed.indexOf(crlfBuffer, index + buffer.length) + continue + } else if (!hasHeaderValue(buffer, valueIndex + 1)) { + // HTTP/1.1 header field values can be folded onto multiple lines if the + // continuation line begins with a space or horizontal tab. + index = consumed.indexOf(crlfBuffer, index + buffer.length) + continue + } + + this.#parseRawHeaders(buffer) + + start = index + 2 + index = consumed.indexOf(crlfBuffer, start) + + if (this.#info.skipPart) { + break + } + } + + this.#state = states.READ_BODY + const { attributes, ...headers } = this.#info.headers if ( @@ -257,6 +254,7 @@ class FormDataParser extends Writable { !attributes.name ) ) { + this.#info.headers = { attributes: {} } // https://www.rfc-editor.org/rfc/rfc7578#section-4.2 this.#info.skipPart = 'invalid content-disposition' } else if ( @@ -266,13 +264,18 @@ class FormDataParser extends Writable { ) { if (this.#info.limits.files >= this.#opts.limits.files) { this.#info.skipPart = 'file' + this.#info.headers = { attributes: {} } continue } // End the active stream if there is one this.#info.stream?.push(null) - this.#info.stream = new FileStream({ highWaterMark: this.#opts.fileHwm }) - this.#info.stream.truncated = false // TODO + this.#info.stream = new FileStream({ + highWaterMark: this.#opts.fileHwm, + autoDestroy: true, + emitClose: true + }) + this.#info.stream.truncated = false this.emit( 'file', @@ -284,6 +287,7 @@ class FormDataParser extends Writable { mimeType: headers['content-type'] || 'application/octet-stream' } ) + this.#info.headers = { attributes: {} } } } else if (this.#state === states.READ_BODY) { // A part's body can contain CRLFs so they cannot be used to @@ -432,6 +436,7 @@ class FormDataParser extends Writable { } ) + this.#info.headers = { attributes: {} } this.#info.limits.fields++ body.chunks.length = 0 body.length = 0 @@ -556,11 +561,15 @@ class FormDataParser extends Writable { continue } - if (buffer[position.position + 1] === chars[' ']) { - position.position += 2 // skip "; " - } else { - position.position++ // skip ":" - } + position.position++ // skip semicolon + + // Header values can contain leading whitespace, make sure + // we remove it all. + collectASequenceOfCodePoints( + (char) => isWhitespace(char), + buffer, + position + ) const value = collectASequenceOfCodePoints( (char) => { diff --git a/lib/formdata/textsearch.js b/lib/formdata/textsearch.js new file mode 100644 index 00000000000..bccc758ef44 --- /dev/null +++ b/lib/formdata/textsearch.js @@ -0,0 +1,47 @@ +'use strict' + +class TextSearch { + /** @param {Buffer} pattern */ + constructor (pattern) { + this.pattern = pattern + + this.back = 0 + this.lookedAt = 0 + } + + /** + * @param {Buffer} chunk + */ + write (chunk) { + if (this.finished) { + return true + } + + for (const byte of chunk) { + this.lookedAt++ + + if (byte !== this.pattern[this.back]) { + this.back = 0 + } else { + if (++this.back === this.pattern.length) { + return true + } + } + } + + return this.back === this.pattern.length + } + + reset () { + this.back = 0 + this.lookedAt = 0 + } + + get finished () { + return this.back === this.pattern.length + } +} + +module.exports = { + TextSearch +} diff --git a/lib/formdata/util.js b/lib/formdata/util.js index 56edb482688..34bb6562970 100644 --- a/lib/formdata/util.js +++ b/lib/formdata/util.js @@ -33,7 +33,23 @@ function findCharacterType (type) { } } +function isWhitespace (char) { + return char === 0x09 || char === 0x20 || char === 0x0D || char === 0x0A +} + +function hasHeaderValue (chunk, start) { + for (let i = start; i < chunk.length; i++) { + if (!isWhitespace(chunk[i])) { + return true + } + } + + return false +} + module.exports = { collectHTTPQuotedStringLenient, - findCharacterType + findCharacterType, + isWhitespace, + hasHeaderValue } diff --git a/package.json b/package.json index 37baa25665b..c32b0833e01 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "lint": "standard | snazzy", "lint:fix": "standard --fix | snazzy", "test": "npm run test:tap && npm run test:node-fetch && npm run test:fetch && npm run test:cookies && npm run test:wpt && npm run test:websocket && npm run test:jest && tsd", + "test:busboy": "node test/busboy/test.js", "test:cookies": "node scripts/verifyVersion 16 || tap test/cookie/*.js", "test:node-fetch": "node scripts/verifyVersion.js 16 || mocha test/node-fetch", "test:fetch": "node scripts/verifyVersion.js 16 || (npm run build:node && tap test/fetch/*.js && tap test/webidl/*.js)", diff --git a/test/busboy/test-types-multipart.js b/test/busboy/test-types-multipart.js index bf2edd65fd8..0e142963fb1 100644 --- a/test/busboy/test-types-multipart.js +++ b/test/busboy/test-types-multipart.js @@ -781,8 +781,6 @@ const tests = [ what: 'Files limit' }, { - // Note: this test is very slow because we need to write > 64 KiB - // of data one byte at a time. source: [ ['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k', 'Content-Disposition: form-data; ' + From 7b15177159e3d6aa7de0d1ca8961b140b4af631c Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Tue, 10 Jan 2023 23:08:08 -0500 Subject: [PATCH 31/36] fix: add main test file --- test/busboy/test.js | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 test/busboy/test.js diff --git a/test/busboy/test.js b/test/busboy/test.js new file mode 100644 index 00000000000..22c42d3fd6b --- /dev/null +++ b/test/busboy/test.js @@ -0,0 +1,20 @@ +'use strict' + +const { spawnSync } = require('child_process') +const { readdirSync } = require('fs') +const { join } = require('path') + +const files = readdirSync(__dirname).sort() +for (const filename of files) { + if (filename.startsWith('test-')) { + const path = join(__dirname, filename) + console.log(`> Running ${filename} ...`) + // Note: nyc changes process.argv0, process.execPath, etc. + const result = spawnSync(`node ${path}`, { + shell: true, + stdio: 'inherit', + windowsHide: true + }) + if (result.status !== 0) { process.exitCode = 1 } + } +} From ac815206257f1cfdaa0c9954ca522a23a9940aad Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Tue, 10 Jan 2023 23:23:16 -0500 Subject: [PATCH 32/36] whoops, skip tests on < v18 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index c32b0833e01..bfe8bcb1dea 100644 --- a/package.json +++ b/package.json @@ -47,7 +47,7 @@ "lint": "standard | snazzy", "lint:fix": "standard --fix | snazzy", "test": "npm run test:tap && npm run test:node-fetch && npm run test:fetch && npm run test:cookies && npm run test:wpt && npm run test:websocket && npm run test:jest && tsd", - "test:busboy": "node test/busboy/test.js", + "test:busboy": "node scripts/verifyVersion 18 || node test/busboy/test.js", "test:cookies": "node scripts/verifyVersion 16 || tap test/cookie/*.js", "test:node-fetch": "node scripts/verifyVersion.js 16 || mocha test/node-fetch", "test:fetch": "node scripts/verifyVersion.js 16 || (npm run build:node && tap test/fetch/*.js && tap test/webidl/*.js)", From e9c8f93f76ebb9132d16505f479ea7f27410aa00 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Wed, 11 Jan 2023 11:14:13 -0500 Subject: [PATCH 33/36] apply suggestions --- lib/formdata/constants.js | 18 +++++++++--------- lib/formdata/util.js | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/formdata/constants.js b/lib/formdata/constants.js index ef8d2a80a26..12ffbbc7942 100644 --- a/lib/formdata/constants.js +++ b/lib/formdata/constants.js @@ -15,19 +15,19 @@ const headerStates = { } const chars = { - '-': '-'.charCodeAt(0), - cr: '\r'.charCodeAt(0), - lf: '\n'.charCodeAt(0), - ':': ':'.charCodeAt(0), - ' ': ' '.charCodeAt(0), // 0x20 - ';': ';'.charCodeAt(0), - '=': '='.charCodeAt(0), - '"': '"'.charCodeAt(0) + '-': 0x2D, + cr: 0x0D, + lf: 0x0A, + ':': 0x3A, + ' ': 0x20, + ';': 0x3B, + '=': 0x3D, + '"': 0x22 } const emptyBuffer = Buffer.alloc(0) -const crlfBuffer = Buffer.from('\r\n') +const crlfBuffer = Buffer.from([0x0D, 0x0A]) // \r\n const maxHeaderLength = 16 * 1024 diff --git a/lib/formdata/util.js b/lib/formdata/util.js index 34bb6562970..c9bfc469c9b 100644 --- a/lib/formdata/util.js +++ b/lib/formdata/util.js @@ -9,12 +9,12 @@ const { collectASequenceOfCodePoints } = require('../fetch/dataURL') * @param {string} str */ function collectHTTPQuotedStringLenient (str) { - if (str[0] !== '"') { + if (!str.startsWith('"')) { return str } return collectASequenceOfCodePoints( - (char) => char !== '"', + (char) => char.charCodeAt(0) !== 0x22, // " str, { position: 1 } ) From 7642b605b17c2f3e8fc195ce7a872fc1051ab9c8 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Wed, 11 Jan 2023 13:54:20 -0500 Subject: [PATCH 34/36] cleanup FileStream --- lib/formdata/parser.js | 3 +++ test/client-request.js | 2 +- test/fetch/client-fetch.js | 2 +- test/utils/formdata.js | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index d4df1755d76..b34b5a22a51 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -270,6 +270,8 @@ class FormDataParser extends Writable { // End the active stream if there is one this.#info.stream?.push(null) + this.#info.stream?.destroy() + this.#info.stream = new FileStream({ highWaterMark: this.#opts.fileHwm, autoDestroy: true, @@ -380,6 +382,7 @@ class FormDataParser extends Writable { this.#info.limits.fileSize += length this.#info.stream.push(limitedChunk) + this.#info.stream.push(null) this.#info.stream.destroy() } else if ( this.#info.limits.fieldSize < this.#opts.limits.fieldSize && diff --git a/test/client-request.js b/test/client-request.js index ff564604d42..fbb1265ca4e 100644 --- a/test/client-request.js +++ b/test/client-request.js @@ -658,7 +658,7 @@ test('request text2', (t) => { }) }) -test('request with FormData body', { skip: nodeMajor < 16 }, (t) => { +test('request with FormData body', { skip: nodeMajor < 18 }, (t) => { const { FormData } = require('../') const { Blob } = require('buffer') diff --git a/test/fetch/client-fetch.js b/test/fetch/client-fetch.js index 7a0cb550f54..fea06a2b392 100644 --- a/test/fetch/client-fetch.js +++ b/test/fetch/client-fetch.js @@ -247,7 +247,7 @@ test('multipart fromdata non-ascii filed names', async (t) => { t.equal(form.get('fiŝo'), 'value1') }) -test('busboy emit error', async (t) => { +test('BodyMixin.formData rejects with mismatched boundary', async (t) => { t.plan(1) const formData = new FormData() formData.append('field1', 'value1') diff --git a/test/utils/formdata.js b/test/utils/formdata.js index 19dac3e84e5..24aee0a5c04 100644 --- a/test/utils/formdata.js +++ b/test/utils/formdata.js @@ -1,4 +1,4 @@ -const busboy = require('busboy') +const { FormDataParser } = require('../..') function parseFormDataString ( body, @@ -9,7 +9,7 @@ function parseFormDataString ( fields: [] } - const bb = busboy({ + const bb = new FormDataParser({ headers: { 'content-type': contentType } From 588d62da9292be634b7402e0fde5d34341669860 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Wed, 11 Jan 2023 15:57:55 -0500 Subject: [PATCH 35/36] fix: replace busboy in fetch --- lib/fetch/body.js | 22 +++++++++++----------- lib/formdata/parser.js | 2 ++ package.json | 4 +--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/fetch/body.js b/lib/fetch/body.js index c291afa9368..6ebf9b8723e 100644 --- a/lib/fetch/body.js +++ b/lib/fetch/body.js @@ -1,6 +1,5 @@ 'use strict' -const Busboy = require('busboy') const util = require('../core/util') const { ReadableStreamFrom, @@ -21,6 +20,7 @@ const { isErrored } = require('../core/util') const { isUint8Array, isArrayBuffer } = require('util/types') const { File: UndiciFile } = require('./file') const { parseMIMEType, serializeAMimeType } = require('./dataURL') +const { FormDataParser } = require('../formdata/parser') let ReadableStream = globalThis.ReadableStream @@ -374,10 +374,10 @@ function bodyMixinMethods (instance) { const responseFormData = new FormData() - let busboy + let parser try { - busboy = Busboy({ + parser = new FormDataParser({ headers, defParamCharset: 'utf8' }) @@ -385,10 +385,10 @@ function bodyMixinMethods (instance) { throw new DOMException(`${err}`, 'AbortError') } - busboy.on('field', (name, value) => { + parser.on('field', (name, value) => { responseFormData.append(name, value) }) - busboy.on('file', (name, value, info) => { + parser.on('file', (name, value, info) => { const { filename, encoding, mimeType } = info const chunks = [] @@ -417,14 +417,14 @@ function bodyMixinMethods (instance) { } }) - const busboyResolve = new Promise((resolve, reject) => { - busboy.on('finish', resolve) - busboy.on('error', (err) => reject(new TypeError(err))) + const promise = new Promise((resolve, reject) => { + parser.on('close', () => resolve()) + parser.on('error', (err) => reject(new TypeError(err))) }) - if (this.body !== null) for await (const chunk of consumeBody(this[kState].body)) busboy.write(chunk) - busboy.end() - await busboyResolve + if (this.body !== null) for await (const chunk of consumeBody(this[kState].body)) parser.write(chunk) + parser.end() + await promise return responseFormData } else if (/application\/x-www-form-urlencoded/.test(contentType)) { diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index b34b5a22a51..910214f3a88 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -29,6 +29,7 @@ class FormDataParser extends Writable { #info = { headerState: headerStates.DEFAULT, + /** @type {FileStream|null} */ stream: null, body: { chunks: [], @@ -383,6 +384,7 @@ class FormDataParser extends Writable { this.#info.stream.push(limitedChunk) this.#info.stream.push(null) + this.#info.stream.read() // TODO: why is this needed!?!?! this.#info.stream.destroy() } else if ( this.#info.limits.fieldSize < this.#opts.limits.fieldSize && diff --git a/package.json b/package.json index bfe8bcb1dea..b74e71e7840 100644 --- a/package.json +++ b/package.json @@ -72,6 +72,7 @@ "@types/node": "^18.0.3", "abort-controller": "^3.0.0", "atomic-sleep": "^1.0.0", + "busboy": "^1.6.0", "chai": "^4.3.4", "chai-as-promised": "^7.1.1", "chai-iterator": "^3.0.2", @@ -130,8 +131,5 @@ "testMatch": [ "/test/jest/**" ] - }, - "dependencies": { - "busboy": "^1.6.0" } } From e21de0814ad3e8eb00a7e82570a0fba319893cb1 Mon Sep 17 00:00:00 2001 From: Khafra <42794878+KhafraDev@users.noreply.github.com> Date: Sat, 25 Feb 2023 23:03:21 -0500 Subject: [PATCH 36/36] perf: use collectASequenceOfCodePointsFast --- lib/fetch/dataURL.js | 4 +++- lib/formdata/parser.js | 13 +++++++------ lib/formdata/util.js | 6 +++--- package.json | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/fetch/dataURL.js b/lib/fetch/dataURL.js index d287c5ad27e..f4e98592700 100644 --- a/lib/fetch/dataURL.js +++ b/lib/fetch/dataURL.js @@ -168,9 +168,11 @@ function collectASequenceOfCodePoints (condition, input, position) { /** * A faster collectASequenceOfCodePoints that only works when comparing a single character. + * @template {string|Buffer} T * @param {string} char - * @param {string} input + * @param {T} input * @param {{ position: number }} position + * @returns {T} */ function collectASequenceOfCodePointsFast (char, input, position) { const idx = input.indexOf(char, position.position) diff --git a/lib/formdata/parser.js b/lib/formdata/parser.js index 910214f3a88..19203f3a0ef 100644 --- a/lib/formdata/parser.js +++ b/lib/formdata/parser.js @@ -14,6 +14,7 @@ const { TextSearch } = require('./textsearch') const { parseMIMEType, collectASequenceOfCodePoints, + collectASequenceOfCodePointsFast, stringPercentDecode } = require('../fetch/dataURL') @@ -519,8 +520,8 @@ class FormDataParser extends Writable { const position = { position: 0 } while (position.position < buffer.length) { - const nameBuffer = collectASequenceOfCodePoints( - (char) => char !== chars[':'], + const nameBuffer = collectASequenceOfCodePointsFast( + chars[':'], buffer, position ) @@ -557,8 +558,8 @@ class FormDataParser extends Writable { // Go to the next header if one is available. Headers are split by \r\n, // so we skip all characters until the sequence is found. - collectASequenceOfCodePoints( - (char) => char !== chars.cr && buffer[position.position + 1] !== chars.lf, + collectASequenceOfCodePointsFast( + '\r\n', buffer, position ) @@ -595,8 +596,8 @@ class FormDataParser extends Writable { } if (name !== 'content-disposition') { - collectASequenceOfCodePoints( - (char) => char !== chars.cr && buffer[position.position + 1] !== chars.lf, + collectASequenceOfCodePointsFast( + '\r\n', buffer, position ) diff --git a/lib/formdata/util.js b/lib/formdata/util.js index c9bfc469c9b..e14d0ecae00 100644 --- a/lib/formdata/util.js +++ b/lib/formdata/util.js @@ -1,6 +1,6 @@ 'use strict' -const { collectASequenceOfCodePoints } = require('../fetch/dataURL') +const { collectASequenceOfCodePointsFast } = require('../fetch/dataURL') /** * Collect a string inside of quotes. Unlike @@ -13,8 +13,8 @@ function collectHTTPQuotedStringLenient (str) { return str } - return collectASequenceOfCodePoints( - (char) => char.charCodeAt(0) !== 0x22, // " + return collectASequenceOfCodePointsFast( + '"', str, { position: 1 } ) diff --git a/package.json b/package.json index b74e71e7840..4d667890de3 100644 --- a/package.json +++ b/package.json @@ -46,7 +46,7 @@ "build:wasm": "node build/wasm.js --docker", "lint": "standard | snazzy", "lint:fix": "standard --fix | snazzy", - "test": "npm run test:tap && npm run test:node-fetch && npm run test:fetch && npm run test:cookies && npm run test:wpt && npm run test:websocket && npm run test:jest && tsd", + "test": "npm run test:tap && npm run test:node-fetch && npm run test:fetch && npm run test:cookies && npm run test:busboy && npm run test:wpt && npm run test:websocket && npm run test:jest && tsd", "test:busboy": "node scripts/verifyVersion 18 || node test/busboy/test.js", "test:cookies": "node scripts/verifyVersion 16 || tap test/cookie/*.js", "test:node-fetch": "node scripts/verifyVersion.js 16 || mocha test/node-fetch",