Skip to content

Commit 00d3828

Browse files
committed
Remove parsing by references to improve code maintenance
1 parent f657448 commit 00d3828

File tree

2 files changed

+105
-78
lines changed

2 files changed

+105
-78
lines changed

src/InnerList.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
final class InnerList implements Countable, IteratorAggregate, StructuredField, SupportsParameters
1515
{
16-
/** @var array<Item> */
16+
/** @var array<Item> */
1717
private array $members;
1818

1919
public function __construct(private Parameters $parameters, Item ...$members)

src/Parser.php

Lines changed: 104 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ public static function parseList(string $httpValue): array
2828
$members = [];
2929
$remainder = ltrim($httpValue, ' ');
3030
while ('' !== $remainder) {
31-
$members[] = self::parseItemOrInnerList($remainder);
32-
$remainder = ltrim($remainder, " \t");
33-
31+
[$member, $offset] = self::parseItemOrInnerList($remainder);
32+
$members[] = $member;
33+
$remainder = ltrim(substr($remainder, $offset), " \t");
3434
if ('' === $remainder) {
3535
break;
3636
}
@@ -40,7 +40,6 @@ public static function parseList(string $httpValue): array
4040
}
4141

4242
$remainder = substr($remainder, strlen($found[1]));
43-
4443
if ('' === $remainder) {
4544
throw new SyntaxError("Unexpected end of line for The HTTP textual representation `$httpValue` for a list.");
4645
}
@@ -61,12 +60,17 @@ public static function parseDictionary(string $httpValue): array
6160
$members = [];
6261
$remainder = ltrim($httpValue, ' ');
6362
while ('' !== $remainder) {
64-
$key = self::parseKey($remainder);
63+
[$key, $offset] = self::parseKey($remainder);
64+
$remainder = substr($remainder, $offset);
6565
if ('' !== $remainder && $remainder[0] === '=') {
6666
$remainder = substr($remainder, 1);
67-
$members[$key] = self::parseItemOrInnerList($remainder);
67+
[$member, $offset] = self::parseItemOrInnerList($remainder);
68+
$members[$key] = $member;
69+
$remainder = substr($remainder, $offset);
6870
} else {
69-
$members[$key] = Item::from(true, self::parseParameters($remainder));
71+
[$parameters, $offset] = self::parseParameters($remainder);
72+
$members[$key] = Item::from(true, $parameters);
73+
$remainder = substr($remainder, $offset);
7074
}
7175

7276
$remainder = ltrim($remainder, " \t");
@@ -79,7 +83,6 @@ public static function parseDictionary(string $httpValue): array
7983
}
8084

8185
$remainder = substr($remainder, strlen($found[1]));
82-
8386
if ('' === $remainder) {
8487
throw new SyntaxError("Unexpected end of line for The HTTP textual representation `$httpValue` for a dictionary.");
8588
}
@@ -92,50 +95,70 @@ public static function parseDictionary(string $httpValue): array
9295
* Returns an Item or an InnerList value object from an HTTP textual representation.
9396
*
9497
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.1.1
98+
*
99+
* @return array{0: InnerList|Item, 1:int}
95100
*/
96-
private static function parseItemOrInnerList(string &$httpValue): InnerList|Item
101+
private static function parseItemOrInnerList(string $httpValue): array
97102
{
98103
if ($httpValue[0] === '(') {
99104
return self::parseInnerList($httpValue);
100105
}
101106

102-
return Item::from(self::parseBareItem($httpValue), self::parseParameters($httpValue));
107+
[$value, $offset] = self::parseBareItem($httpValue);
108+
$remainder = substr($httpValue, $offset);
109+
110+
[$parameters, $offset] = self::parseParameters($remainder);
111+
$remainder = substr($remainder, $offset);
112+
113+
return [Item::from($value, $parameters), strlen($httpValue) - strlen($remainder)];
103114
}
104115

105116
/**
106117
* Returns an InnerList value object from an HTTP textual representation.
107118
*
108119
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.1.2
120+
*
121+
* @return array{0:InnerList, 1:int}
109122
*/
110-
private static function parseInnerList(string &$httpValue): InnerList
123+
private static function parseInnerList(string $httpValue): array
111124
{
112125
$members = [];
113-
$httpValue = substr($httpValue, 1);
114-
while ('' !== $httpValue) {
115-
$httpValue = ltrim($httpValue, ' ');
126+
$remainder = substr($httpValue, 1);
127+
while ('' !== $remainder) {
128+
$remainder = ltrim($remainder, ' ');
116129

117-
if ($httpValue[0] === ')') {
118-
$httpValue = substr($httpValue, 1);
130+
if ($remainder[0] === ')') {
131+
$remainder = substr($remainder, 1);
132+
[$parameters, $offset] = self::parseParameters($remainder);
133+
$remainder = substr($remainder, $offset);
119134

120-
return InnerList::fromMembers($members, self::parseParameters($httpValue));
135+
return [InnerList::fromMembers($members, $parameters), strlen($httpValue) - strlen($remainder)];
121136
}
122137

123-
$members[] = Item::from(self::parseBareItem($httpValue), self::parseParameters($httpValue));
138+
[$value, $offset] = self::parseBareItem($remainder);
139+
$remainder = substr($remainder, $offset);
140+
141+
[$parameters, $offset] = self::parseParameters($remainder);
142+
$remainder = substr($remainder, $offset);
124143

125-
if ('' !== $httpValue && !in_array($httpValue[0], [' ', ')'], true)) {
126-
throw new SyntaxError("The HTTP textual representation `$httpValue` for a inner list is using invalid characters.");
144+
$members[] = Item::from($value, $parameters);
145+
146+
if ('' !== $remainder && !in_array($remainder[0], [' ', ')'], true)) {
147+
throw new SyntaxError("The HTTP textual representation `$remainder` for a inner list is using invalid characters.");
127148
}
128149
}
129150

130-
throw new SyntaxError("Unexpected end of line for The HTTP textual representation `$httpValue` for a inner list.");
151+
throw new SyntaxError("Unexpected end of line for The HTTP textual representation `$remainder` for a inner list.");
131152
}
132153

133154
/**
134155
* Returns a Item or an InnerList value object from an HTTP textual representation.
135156
*
136157
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.3.1
158+
*
159+
* @return array{0:bool|float|int|string|ByteSequence|Token, 1:int}
137160
*/
138-
private static function parseBareItem(string &$httpValue): bool|float|int|string|ByteSequence|Token
161+
private static function parseBareItem(string $httpValue): array
139162
{
140163
return match (true) {
141164
$httpValue === '' => throw new SyntaxError('Unexpected empty string for The HTTP textual representation of an item.'),
@@ -153,155 +176,159 @@ private static function parseBareItem(string &$httpValue): bool|float|int|string
153176
*
154177
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.3.2
155178
*
156-
* @return array<array-key, Item|Token|ByteSequence|float|int|bool|string>
179+
* @return array{0:array<array-key, Item|Token|ByteSequence|float|int|bool|string>, 1:int}
157180
*/
158-
private static function parseParameters(string &$httpValue): array
181+
private static function parseParameters(string $httpValue): array
159182
{
160183
$parameters = [];
184+
$remainder = $httpValue;
185+
while ('' !== $remainder && ';' === $remainder[0]) {
186+
$remainder = ltrim(substr($remainder, 1), ' ');
161187

162-
while ('' !== $httpValue && ';' === $httpValue[0]) {
163-
$httpValue = ltrim(substr($httpValue, 1), ' ');
164-
165-
$key = self::parseKey($httpValue);
188+
[$key, $keyOffset] = self::parseKey($remainder);
166189
$parameters[$key] = true;
167190

168-
if ('' !== $httpValue && '=' === $httpValue[0]) {
169-
$httpValue = substr($httpValue, 1);
170-
$parameters[$key] = self::parseBareItem($httpValue);
191+
$remainder = substr($remainder, $keyOffset);
192+
if ('' !== $remainder && '=' === $remainder[0]) {
193+
$remainder = substr($remainder, 1);
194+
195+
[$parameters[$key], $offset] = self::parseBareItem($remainder);
196+
$remainder = substr($remainder, $offset);
171197
}
172198
}
173199

174-
return $parameters;
200+
return [$parameters, strlen($httpValue) - strlen($remainder)];
175201
}
176202

177203
/**
178204
* Returns a Dictionary or a Parameter string key from an HTTP textual representation.
179205
*
180206
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.3.3
207+
*
208+
* @return array{0:string, 1:int}
181209
*/
182-
private static function parseKey(string &$httpValue): string
210+
private static function parseKey(string $httpValue): array
183211
{
184212
if (1 !== preg_match('/^[a-z*][a-z0-9.*_-]*/', $httpValue, $matches)) {
185-
throw new SyntaxError('Invalid character in key');
213+
throw new SyntaxError("Invalid character in the HTTP textual representation of a key `$httpValue`.");
186214
}
187215

188-
$httpValue = substr($httpValue, strlen($matches[0]));
189-
190-
return $matches[0];
216+
return [$matches[0], strlen($matches[0])];
191217
}
192218

193219
/**
194220
* Returns a boolean from an HTTP textual representation.
195221
*
196222
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.8
223+
*
224+
* @return array{0:bool, 1:int}
197225
*/
198-
private static function parseBoolean(string &$httpValue): bool
226+
private static function parseBoolean(string $httpValue): array
199227
{
200228
if (1 !== preg_match('/^\?[01]/', $httpValue)) {
201-
throw new SyntaxError('Invalid character in boolean');
229+
throw new SyntaxError("Invalid character in the HTTP textual representation of a boolean value `$httpValue`.");
202230
}
203231

204-
$value = $httpValue[1] === '1';
205-
206-
$httpValue = substr($httpValue, 2);
207-
208-
return $value;
232+
return [$httpValue[1] === '1', 2];
209233
}
210234

211235
/**
212236
* Returns a int or a float from an HTTP textual representation.
213237
*
214238
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.4
239+
*
240+
* @return array{0:int|float, 1:int}
215241
*/
216-
private static function parseNumber(string &$httpValue): int|float
242+
private static function parseNumber(string $httpValue): array
217243
{
218-
if (1 !== preg_match('/^(-?\d+(?:\.\d+)?)(?:[^\d.]|$)/', $httpValue, $number_matches)) {
219-
throw new SyntaxError('Invalid number format');
244+
if (1 !== preg_match('/^(?<number>-?\d+(?:\.\d+)?)(?:[^\d.]|$)/', $httpValue, $found)) {
245+
throw new SyntaxError("Invalid number format in the HTTP textual representation of a number value `$httpValue`.");
220246
}
221247

222-
$input_number = $number_matches[1];
223-
$httpValue = substr($httpValue, strlen($input_number));
248+
$offset = strlen($found['number']);
224249

225250
return match (true) {
226-
1 === preg_match('/^-?\d{1,12}\.\d{1,3}$/', $input_number) => (float) $input_number,
227-
1 === preg_match('/^-?\d{1,15}$/', $input_number) => (int) $input_number,
228-
default => throw new SyntaxError('Number contains too many digits'),
251+
1 === preg_match('/^-?\d{1,12}\.\d{1,3}$/', $found['number']) => [(float) $found['number'], $offset],
252+
1 === preg_match('/^-?\d{1,15}$/', $found['number']) => [(int) $found['number'], $offset],
253+
default => throw new SyntaxError("The number format in the HTTP textual representation `$httpValue` contains too much digit."),
229254
};
230255
}
231256

232257
/**
233258
* Returns a string from an HTTP textual representation.
234259
*
235260
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.5
261+
*
262+
* @return array{0:string, 1:int}
236263
*/
237-
private static function parseString(string &$httpValue): string
264+
private static function parseString(string $httpValue): array
238265
{
239-
// parseString is only called if first character is a double quote, so
240-
// don't need to validate it here.
241-
$httpValue = substr($httpValue, 1);
242-
243-
$output_string = '';
266+
$offset = 1;
267+
$httpValue = substr($httpValue, $offset);
268+
$output = '';
244269

245270
while (strlen($httpValue)) {
246271
$char = $httpValue[0];
272+
$offset += 1;
247273
$httpValue = substr($httpValue, 1);
248274

249-
if ($char === '"') {
250-
return $output_string;
275+
if ('"' === $char) {
276+
return [$output, $offset];
251277
}
252278

253279
if (ord($char) <= 0x1f || ord($char) >= 0x7f) {
254-
throw new SyntaxError('Invalid character in string');
280+
throw new SyntaxError("Invalid character in the HTTP textual representation of a string `$httpValue`.");
255281
}
256282

257283
if ($char !== '\\') {
258-
$output_string .= $char;
284+
$output .= $char;
259285
continue;
260286
}
261287

262-
if ($httpValue === '') {
263-
throw new SyntaxError('Invalid end of string');
288+
if ('' === $httpValue) {
289+
throw new SyntaxError("Invalid end of string in the HTTP textual representation of a string `$httpValue`.");
264290
}
265291

266292
$char = $httpValue[0];
293+
$offset += 1;
267294
$httpValue = substr($httpValue, 1);
268295
if (!in_array($char, ['"', '\\'], true)) {
269-
throw new SyntaxError('Invalid escaped character in string');
296+
throw new SyntaxError("Invalid characters in the HTTP textual representation of a string `$httpValue`.");
270297
}
271298

272-
$output_string .= $char;
299+
$output .= $char;
273300
}
274301

275-
throw new SyntaxError('Invalid end of string');
302+
throw new SyntaxError("Invalid end of string in the HTTP textual representation of a string `$httpValue`.");
276303
}
277304

278305
/**
279306
* Returns a Token from an HTTP textual representation.
280307
*
281308
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.6
309+
*
310+
* @return array{0:Token, 1:int}
282311
*/
283-
private static function parseToken(string &$httpValue): Token
312+
private static function parseToken(string $httpValue): array
284313
{
285-
preg_match('/^([a-z*][a-z0-9:\/'.preg_quote("!#$%&'*+-.^_`|~").']*)/i', $httpValue, $matches);
314+
preg_match('/^(?<token>[a-z*][a-z0-9:\/'.preg_quote("!#$%&'*+-.^_`|~").']*)/i', $httpValue, $found);
286315

287-
$httpValue = substr($httpValue, strlen($matches[1]));
288-
289-
return new Token($matches[1]);
316+
return [new Token($found['token']), strlen($found['token'])];
290317
}
291318

292319
/**
293320
* Returns a Byte Sequence from an HTTP textual representation.
294321
*
295322
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.7
323+
*
324+
* @return array{0:ByteSequence, 1:int}
296325
*/
297-
private static function parseByteSequence(string &$httpValue): ByteSequence
326+
private static function parseByteSequence(string $httpValue): array
298327
{
299328
if (1 !== preg_match('/^:([a-z0-9+\/=]*):/i', $httpValue, $matches)) {
300-
throw new SyntaxError('Invalid character in byte sequence');
329+
throw new SyntaxError("Invalid characters in the HTTP textual representation of a Byte Sequence `$httpValue`.");
301330
}
302331

303-
$httpValue = substr($httpValue, strlen($matches[0]));
304-
305-
return ByteSequence::fromEncoded($matches[1]);
332+
return [ByteSequence::fromEncoded($matches[1]), strlen($matches[0])];
306333
}
307334
}

0 commit comments

Comments
 (0)