Skip to content

Commit 39f896b

Browse files
committed
Improve decoupling from Parser
1 parent 49a7cb0 commit 39f896b

File tree

7 files changed

+83
-50
lines changed

7 files changed

+83
-50
lines changed

src/Dictionary.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public static function fromAssociative(iterable $members): self
9292
public static function fromPairs(iterable $pairs): self
9393
{
9494
if ($pairs instanceof MemberOrderedMap) {
95-
$pairs = $pairs->toPairs();
95+
return new self($pairs);
9696
}
9797

9898
return new self((function (iterable $pairs) {

src/InnerList.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ public static function fromHttpValue(Stringable|string $httpValue): self
6363
{
6464
[$membersList, $parameters] = Parser::parseInnerList($httpValue);
6565

66-
return self::fromAssociative(
66+
return new self(
6767
array_map(fn (array $member): Item => Item::fromAssociative(...$member), $membersList),
68-
$parameters
68+
Parameters::fromAssociative($parameters)
6969
);
7070
}
7171

src/Item.php

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,6 @@
99
use DateTimeZone;
1010
use Stringable;
1111
use function count;
12-
use function preg_match;
13-
use function str_contains;
14-
use function substr;
15-
use function trim;
1612

1713
/**
1814
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-3.3
@@ -38,18 +34,7 @@ private function __construct(
3834
*/
3935
public static function fromHttpValue(Stringable|string $httpValue): self
4036
{
41-
$itemString = trim((string) $httpValue, ' ');
42-
if ('' === $itemString || 1 === preg_match("/[\r\t\n]|[^\x20-\x7E]/", $itemString)) {
43-
throw new SyntaxError('The HTTP textual representation "'.$httpValue.'" for an item contains invalid characters.');
44-
}
45-
46-
[$value, $offset] = Parser::parseBareItem($itemString);
47-
$remainder = substr($itemString, $offset);
48-
if ('' !== $remainder && !str_contains($remainder, ';')) {
49-
throw new SyntaxError('The HTTP textual representation "'.$httpValue.'" for an item contains invalid characters.');
50-
}
51-
52-
return new self(new Value($value), Parameters::fromHttpValue($remainder));
37+
return self::fromAssociative(...Parser::parseItem($httpValue));
5338
}
5439

5540
/**

src/OuterList.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ private static function filterMember(mixed $member): object
6262
*/
6363
public static function fromHttpValue(Stringable|string $httpValue): self
6464
{
65-
return self::new(...(function (iterable $members) {
65+
return new self(...(function (iterable $members) {
6666
foreach ($members as $member) {
6767
if (!is_array($member[0])) {
6868
yield Item::fromAssociative(...$member);

src/Parameters.php

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
use function count;
1515
use function implode;
1616
use function is_string;
17-
use function trim;
1817
use const ARRAY_FILTER_USE_KEY;
1918

2019
/**
@@ -90,7 +89,7 @@ public static function fromAssociative(iterable $members): self
9089
public static function fromPairs(iterable $pairs): self
9190
{
9291
if ($pairs instanceof MemberOrderedMap) {
93-
$pairs = $pairs->toPairs();
92+
return new self($pairs);
9493
}
9594

9695
return new self((function (iterable $pairs) {
@@ -109,13 +108,7 @@ public static function fromPairs(iterable $pairs): self
109108
*/
110109
public static function fromHttpValue(Stringable|string $httpValue): self
111110
{
112-
$httpValue = trim((string) $httpValue);
113-
[$parameters, $offset] = Parser::parseParameters($httpValue);
114-
if (strlen($httpValue) !== $offset) {
115-
throw new SyntaxError('The HTTP textual representation "'.$httpValue.'" for Parameters contains invalid characters.');
116-
}
117-
118-
return self::fromAssociative($parameters);
111+
return new self(Parser::parseParameters($httpValue));
119112
}
120113

121114
public function toHttpValue(): string

src/Parser.php

Lines changed: 75 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
use function in_array;
1010
use function ltrim;
1111
use function preg_match;
12+
use function str_contains;
1213
use function strlen;
1314
use function substr;
1415

@@ -28,6 +29,57 @@
2829
*/
2930
final class Parser
3031
{
32+
private const REGEXP_BYTE_SEQUENCE = '/^(?<sequence>:(?<byte>[a-z\d+\/=]*):)/i';
33+
private const REGEXP_BOOLEAN = '/^\?[01]/';
34+
private const REGEXP_DATE = '/^@(?<date>-?\d{1,15})(?:[^\d.]|$)/';
35+
private const REGEXP_DECIMAL = '/^-?\d{1,12}\.\d{1,3}$/';
36+
private const REGEXP_INTEGER = '/^-?\d{1,15}$/';
37+
private const REGEXP_TOKEN = "/^(?<token>[a-z*][a-z\d:\/!#\$%&'*+\-.^_`|~]*)/i";
38+
private const REGEXP_INVALID_CHARACTERS = "/[\r\t\n]|[^\x20-\x7E]/";
39+
private const REGEXP_VALID_NUMBER = '/^(?<number>-?\d+(?:\.\d+)?)(?:[^\d.]|$)/';
40+
private const REGEXP_VALID_SPACE = '/^(?<space>,[ \t]*)/';
41+
private const FIRST_CHARACTER_RANGE_NUMBER = '-1234567890';
42+
private const FIRST_CHARACTER_RANGE_TOKEN = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ*';
43+
44+
/**
45+
* @return array{0:SfType, 1:array<string, SfType>}
46+
*/
47+
public static function parseItem(Stringable|string $httpValue): array
48+
{
49+
$itemString = trim((string) $httpValue, ' ');
50+
if ('' === $itemString || 1 === preg_match(self::REGEXP_INVALID_CHARACTERS, $itemString)) {
51+
throw new SyntaxError('The HTTP textual representation "'.$httpValue.'" for an item contains invalid characters.');
52+
}
53+
54+
[$value, $offset] = Parser::parseBareItem($itemString);
55+
$remainder = substr($itemString, $offset);
56+
if ('' !== $remainder && !str_contains($remainder, ';')) {
57+
throw new SyntaxError('The HTTP textual representation "'.$httpValue.'" for an item contains invalid characters.');
58+
}
59+
60+
return [$value, self::parseParameters($remainder)];
61+
}
62+
63+
/**
64+
* Returns an instance from an HTTP textual representation.
65+
*
66+
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-3.1.2
67+
*
68+
* @throws SyntaxError If the string is not a valid
69+
*
70+
* @return array<string, SfType>
71+
*/
72+
public static function parseParameters(Stringable|string $httpValue): array
73+
{
74+
$httpValue = trim((string) $httpValue);
75+
[$parameters, $offset] = Parser::parseContainedParameters($httpValue);
76+
if (strlen($httpValue) !== $offset) {
77+
throw new SyntaxError('The HTTP textual representation "'.$httpValue.'" for Parameters contains invalid characters.');
78+
}
79+
80+
return $parameters;
81+
}
82+
3183
/**
3284
* Returns an ordered list represented as a PHP list array from an HTTP textual representation.
3385
*
@@ -107,7 +159,7 @@ private static function removeCommaSeparatedWhiteSpaces(string $httpValue, int $
107159
return $httpValue;
108160
}
109161

110-
if (1 !== preg_match('/^(?<space>,[ \t]*)/', $httpValue, $found)) {
162+
if (1 !== preg_match(self::REGEXP_VALID_SPACE, $httpValue, $found)) {
111163
throw new SyntaxError('The HTTP textual representation is missing an excepted comma.');
112164
}
113165

@@ -130,7 +182,7 @@ private static function removeOptionalWhiteSpaces(string $httpValue): string
130182
}
131183

132184
/**
133-
* Returns an Item value object or an inner list as a PHP list array from an HTTP textual representation.
185+
* Returns an item or an inner list as a PHP list array from an HTTP textual representation.
134186
*
135187
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.1.1
136188
*
@@ -142,7 +194,7 @@ private static function parseItemOrInnerList(string $httpValue): array
142194
return self::parseInnerListValue($httpValue);
143195
}
144196

145-
[$item, $remainder] = self::parseItem($httpValue);
197+
[$item, $remainder] = self::parseContainedItem($httpValue);
146198

147199
return [$item, strlen($httpValue) - strlen($remainder)];
148200
}
@@ -163,13 +215,13 @@ private static function parseInnerListValue(string $httpValue): array
163215

164216
if (')' === $remainder[0]) {
165217
$remainder = substr($remainder, 1);
166-
[$parameters, $offset] = self::parseParameters($remainder);
218+
[$parameters, $offset] = self::parseContainedParameters($remainder);
167219
$remainder = substr($remainder, $offset);
168220

169221
return [[$list, $parameters], strlen($httpValue) - strlen($remainder)];
170222
}
171223

172-
[$list[], $remainder] = self::parseItem($remainder);
224+
[$list[], $remainder] = self::parseContainedItem($remainder);
173225

174226
if ('' !== $remainder && !in_array($remainder[0], [' ', ')'], true)) {
175227
throw new SyntaxError("The HTTP textual representation \"$remainder\" for a inner list is using invalid characters.");
@@ -180,33 +232,35 @@ private static function parseInnerListValue(string $httpValue): array
180232
}
181233

182234
/**
235+
* Returns an item represented as a PHP array from an HTTP textual representation and the consumed offset in a tuple.
236+
*
183237
* @return array{0:array{0:SfType, 1:array<string, SfType>}, 1:string}
184238
*/
185-
private static function parseItem(string $remainder): array
239+
private static function parseContainedItem(string $remainder): array
186240
{
187241
[$value, $offset] = self::parseBareItem($remainder);
188242
$remainder = substr($remainder, $offset);
189-
[$parameters, $offset] = self::parseParameters($remainder);
243+
[$parameters, $offset] = self::parseContainedParameters($remainder);
190244

191245
return [[$value, $parameters], substr($remainder, $offset)];
192246
}
193247

194248
/**
195-
* Returns an Item value from an HTTP textual representation and the consumed offset in a tuple.
249+
* Returns an item value from an HTTP textual representation and the consumed offset in a tuple.
196250
*
197251
* @see https://www.rfc-editor.org/rfc/rfc8941.html#section-4.2.3.1
198252
*
199253
* @return array{0:SfType, 1:int}
200254
*/
201-
public static function parseBareItem(string $httpValue): array
255+
private static function parseBareItem(string $httpValue): array
202256
{
203257
return match (true) {
204258
'"' === $httpValue[0] => self::parseString($httpValue),
205259
':' === $httpValue[0] => self::parseByteSequence($httpValue),
206260
'?' === $httpValue[0] => self::parseBoolean($httpValue),
207261
'@' === $httpValue[0] => self::parseDate($httpValue),
208-
1 === preg_match('/^(-|\d)/', $httpValue) => self::parseNumber($httpValue),
209-
1 === preg_match('/^([a-z*])/i', $httpValue) => self::parseToken($httpValue),
262+
str_contains(self::FIRST_CHARACTER_RANGE_NUMBER, $httpValue[0]) => self::parseNumber($httpValue),
263+
str_contains(self::FIRST_CHARACTER_RANGE_TOKEN, $httpValue[0]) => self::parseToken($httpValue),
210264
default => throw new SyntaxError("The HTTP textual representation \"$httpValue\" for an Item is unknown or unsupported."),
211265
};
212266
}
@@ -218,9 +272,10 @@ public static function parseBareItem(string $httpValue): array
218272
*
219273
* @return array{0:array<string, SfType>, 1:int}
220274
*/
221-
public static function parseParameters(string $httpValue): array
275+
private static function parseContainedParameters(Stringable|string $httpValue): array
222276
{
223277
$map = [];
278+
$httpValue = (string) $httpValue;
224279
$remainder = $httpValue;
225280
while ('' !== $remainder && ';' === $remainder[0]) {
226281
$remainder = ltrim(substr($remainder, 1), ' ');
@@ -249,7 +304,7 @@ public static function parseParameters(string $httpValue): array
249304
*/
250305
private static function parseBoolean(string $httpValue): array
251306
{
252-
if (1 !== preg_match('/^\?[01]/', $httpValue)) {
307+
if (1 !== preg_match(self::REGEXP_BOOLEAN, $httpValue)) {
253308
throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Boolean contains invalid characters.");
254309
}
255310

@@ -265,13 +320,13 @@ private static function parseBoolean(string $httpValue): array
265320
*/
266321
private static function parseNumber(string $httpValue): array
267322
{
268-
if (1 !== preg_match('/^(?<number>-?\d+(?:\.\d+)?)(?:[^\d.]|$)/', $httpValue, $found)) {
323+
if (1 !== preg_match(self::REGEXP_VALID_NUMBER, $httpValue, $found)) {
269324
throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Number contains invalid characters.");
270325
}
271326

272327
return match (true) {
273-
1 === preg_match('/^-?\d{1,12}\.\d{1,3}$/', $found['number']) => [(float) $found['number'], strlen($found['number'])],
274-
1 === preg_match('/^-?\d{1,15}$/', $found['number']) => [(int) $found['number'], strlen($found['number'])],
328+
1 === preg_match(self::REGEXP_DECIMAL, $found['number']) => [(float) $found['number'], strlen($found['number'])],
329+
1 === preg_match(self::REGEXP_INTEGER, $found['number']) => [(int) $found['number'], strlen($found['number'])],
275330
default => throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Number contains too much digit."),
276331
};
277332
}
@@ -285,7 +340,7 @@ private static function parseNumber(string $httpValue): array
285340
*/
286341
private static function parseDate(string $httpValue): array
287342
{
288-
if (1 !== preg_match('/^@(?<date>-?\d{1,15})(?:[^\d.]|$)/', $httpValue, $found)) {
343+
if (1 !== preg_match(self::REGEXP_DATE, $httpValue, $found)) {
289344
throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Date contains invalid characters.");
290345
}
291346

@@ -314,7 +369,7 @@ private static function parseString(string $httpValue): array
314369
return [$output, $offset];
315370
}
316371

317-
if (1 === preg_match("/[^\x20-\x7E]/", $char)) {
372+
if (1 === preg_match(self::REGEXP_INVALID_CHARACTERS, $char)) {
318373
throw new SyntaxError("The HTTP textual representation \"$originalHttpValue\" for a String contains an invalid end string.");
319374
}
320375

@@ -348,7 +403,7 @@ private static function parseString(string $httpValue): array
348403
*/
349404
private static function parseToken(string $httpValue): array
350405
{
351-
preg_match("/^(?<token>[a-z*][a-z\d:\/!#\$%&'*+\-.^_`|~]*)/i", $httpValue, $found);
406+
preg_match(self::REGEXP_TOKEN, $httpValue, $found);
352407

353408
return [Token::fromString($found['token']), strlen($found['token'])];
354409
}
@@ -362,7 +417,7 @@ private static function parseToken(string $httpValue): array
362417
*/
363418
private static function parseByteSequence(string $httpValue): array
364419
{
365-
if (1 !== preg_match('/^(?<sequence>:(?<byte>[a-z\d+\/=]*):)/i', $httpValue, $matches)) {
420+
if (1 !== preg_match(self::REGEXP_BYTE_SEQUENCE, $httpValue, $matches)) {
366421
throw new SyntaxError("The HTTP textual representation \"$httpValue\" for a Byte Sequence contains invalid characters.");
367422
}
368423

src/ParserBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public function benchParsingAnItemFormAnHTTPHeaderValue(): void
3232
#[Bench\Assert('mode(variant.mem.peak) < 2097152'), Bench\Assert('mode(variant.time.avg) < 10000000')]
3333
public function benchParsingAnDictionaryFormAnHTTPHeaderValue(): void
3434
{
35-
$httpValue = 'lang="en-US", type=42.0; expires=@1623233894; samesite=Strict; secure';
35+
$httpValue = 'lang="en-US"; samesite=Strict; secure, type=42.0; expires=@1623233894';
3636
for ($i = 0; $i < 100_000; $i++) {
3737
Parser::parseDictionary($httpValue);
3838
}

0 commit comments

Comments
 (0)