Skip to content

Remove newlines from tokens #63

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
"phpstan/phpstan": "^2.0",
"phpstan/extension-installer": "^1.4.3",
"illuminate/support": "^11.30",
"symfony/process": "^7.2"
"symfony/process": "^7.2",
"rogervila/array-diff-multidimensional": "^2.1"
},
"config": {
"allow-plugins": {
Expand Down
57 changes: 54 additions & 3 deletions meta/sample.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
set_time_limit(2);

use Phiki\Environment\Environment;
use Phiki\Grammar\DefaultGrammars;
use Phiki\Phiki;
use Phiki\Theme\Theme;
use Phiki\Token\Token;
use Symfony\Component\Process\Process;

require_once __DIR__.'/../vendor/autoload.php';
require_once __DIR__ . '/../vendor/autoload.php';

set_error_handler(function ($severity, $message, $file, $line) {
throw new ErrorException($message, 0, $severity, $file, $line);
Expand All @@ -20,10 +23,43 @@
$grammars = $repository->getAllGrammarNames();
natsort($grammars);

$sample = file_get_contents(__DIR__.'/../resources/samples/'.$grammar.'.sample');
$sample = file_get_contents($samplePath = __DIR__ . '/../resources/samples/' . $grammar . '.sample');
$tokens = (new Phiki($environment))->codeToTokens($sample, $grammar);
$html = (new Phiki($environment))->codeToHtml($sample, $grammar, ['light' => Theme::GithubLight, 'dark' => Theme::GithubDark], $withGutter);

$process = new Process(
[
'node',
__DIR__ . '/../tests/Fixtures/vscode-textmate-compliance.js',
$samplePath,
array_flip(DefaultGrammars::SCOPES_TO_NAMES)[$grammar],
json_encode(collect(DefaultGrammars::SCOPES_TO_NAMES)
->mapWithKeys(fn(string $name, string $scope) => [$scope => DefaultGrammars::NAMES_TO_PATHS[$name]])
->all()),
],
);

$process->run();

if (! $process->isSuccessful()) {
throw new RuntimeException($process->getErrorOutput() . ':' . PHP_EOL . $process->getOutput());
}

$vscodeTextmateOutput = array_map(
fn(array $lineTokens) => array_map(
fn(array $token) => new Token(
scopes: $token['scopes'],
text: $token['text'],
start: $token['start'],
end: $token['end'],
),
$lineTokens
),
json_decode($process->getOutput(), true),
);

$tokenDiff = array_diff_multidimensional($tokens, $vscodeTextmateOutput, false);

?>

<!DOCTYPE html>
Expand Down Expand Up @@ -102,7 +138,22 @@ class="flex items-center gap-x-4">

<?= $html ?>

<?php dump($tokens); ?>
<div class="grid grid-cols-2 gap-10">
<div>
<p class="text-xl text-white mb-4">Phiki tokens:</p>
<?php dump($tokens); ?>
</div>

<div>
<p class="text-xl text-white mb-4">vscode-textmate tokens:</p>
<?php dump($vscodeTextmateOutput); ?>
</div>

<div>
<p class="text-xl text-white mb-4">Differences:</p>
<?php dump($tokenDiff); ?>
</div>
</div>
</main>
</body>

Expand Down
2 changes: 1 addition & 1 deletion resources/samples/actionscript-3.sample
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ private function createParticles( count ):void{
}
}

// From https://code.tutsplus.com/tutorials/actionscript-30-optimization-a-practical-example--active-11295
// From https://code.tutsplus.com/tutorials/actionscript-30-optimization-a-practical-example--active-11295
2 changes: 1 addition & 1 deletion src/Generators/HtmlGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ private function buildLine(array $line, int $index): string
$output[] = $this->buildToken($token);
}

return '<span class="line">'.implode($output).'</span>';
return '<span class="line">'.implode($output)."</span>\n";
}

private function buildLineNumber(int $lineNumber): string
Expand Down
2 changes: 2 additions & 0 deletions src/Generators/TerminalGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public function generate(array $tokens): string
$output .= Color::ANSI_RESET;
}
}

$output .= PHP_EOL;
}

return $output;
Expand Down
21 changes: 20 additions & 1 deletion src/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,26 @@ public function tokenize(string $input): array
$this->state->resetAnchorPositions();

$this->tokenizeLine($line, $lineText."\n");

// If the last token added has a newline character, we need to remove it.
if (isset($this->tokens[$line]) && count($this->tokens[$line]) > 0) {
$lastToken = end($this->tokens[$line]);

// If we have a newline token and the line has more than just that newline token,
// we need to remove it from the tokens array.
if ($lastToken->text === "\n" && count($this->tokens[$line]) > 1) {
array_pop($this->tokens[$line]);
// Otherwise, if the last token is a newline character, we need to remove that from the token text.
} elseif ($lastToken->text === "\n") {
$lastToken->text = '';
$lastToken->end = $lastToken->start + 1;
// And if the last token ends with a newline character, we need to remove that from the token text
// and update the end position accordingly.
} elseif (str_ends_with($lastToken->text, "\n")) {
$lastToken->text = substr($lastToken->text, 0, -1);
$lastToken->end = $lastToken->start + strlen($lastToken->text) + 1;
}
}
}

return $this->tokens;
Expand All @@ -63,7 +83,6 @@ public function tokenizeLine(int $line, string $lineText): void
$this->checkWhileConditions($line, $lineText);

while ($this->state->getLinePosition() < strlen($lineText)) {
$remainingText = substr($lineText, $this->state->getLinePosition());
$root = $this->state->getPattern();
$matched = $this->match($lineText);
$endIsMatched = false;
Expand Down
2 changes: 1 addition & 1 deletion tests/Integration/VscodeTextmateTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
$expected = vscodeTextmateTokenize($samplePath, $grammar);
$actual = (new Phiki)->codeToTokens(file_get_contents($samplePath), $grammar);

expect($actual)->toEqualCanonicalizing($expected);
expect($actual)->toEqualCanonicalizing($expected, 'Phiki produced different tokens than vscode-textmate.');
})
->with('grammars');
1 change: 0 additions & 1 deletion tests/Languages/CppTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
new Token(['source.cpp', 'comment.block.cpp', 'punctuation.definition.comment.begin.cpp'], '/*', 10, 12),
new Token(['source.cpp', 'comment.block.cpp'], ' comment ', 12, 21),
new Token(['source.cpp', 'comment.block.cpp', 'punctuation.definition.comment.end.cpp'], '*/', 21, 23),
new Token(['source.cpp'], "\n", 23, 23),
],
]);
})->issue(57);
4 changes: 0 additions & 4 deletions tests/Languages/HtmlTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
new Token(['text.html.basic', 'meta.tag.structure.div.end.html', 'punctuation.definition.tag.begin.html'], '</', 5, 7),
new Token(['text.html.basic', 'meta.tag.structure.div.end.html', 'entity.name.tag.html'], 'div', 7, 10),
new Token(['text.html.basic', 'meta.tag.structure.div.end.html', 'punctuation.definition.tag.end.html'], '>', 10, 11),
new Token(['text.html.basic'], "\n", 11, 11),
],
]);
});
Expand All @@ -32,7 +31,6 @@
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'punctuation.definition.tag.begin.html'], '</', 17, 19),
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'entity.name.tag.html'], 'h1', 19, 21),
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'punctuation.definition.tag.end.html'], '>', 21, 22),
new Token(['text.html.basic'], "\n", 22, 22),
],
]);
});
Expand All @@ -50,7 +48,6 @@
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'punctuation.definition.tag.begin.html'], '</', 11, 13),
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'entity.name.tag.html'], 'h1', 13, 15),
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'punctuation.definition.tag.end.html'], '>', 15, 16),
new Token(['text.html.basic'], "\n", 16, 16),
],
]);
});
Expand All @@ -72,7 +69,6 @@
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'punctuation.definition.tag.begin.html'], '</', 16, 18),
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'entity.name.tag.html'], 'h1', 18, 20),
new Token(['text.html.basic', 'meta.tag.structure.h1.end.html', 'punctuation.definition.tag.end.html'], '>', 20, 21),
new Token(['text.html.basic'], "\n", 21, 21),
],
]);
});
Expand Down
1 change: 0 additions & 1 deletion tests/Languages/JavascriptTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
[
new Token(['source.js', 'comment.line.double-slash.js', 'punctuation.definition.comment.js'], '//', 0, 2),
new Token(['source.js', 'comment.line.double-slash.js'], ' This is a comment.', 2, 21),
new Token(['source.js'], "\n", 21, 21),
],
]);
});
Expand Down
9 changes: 0 additions & 9 deletions tests/Languages/PhpTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
new Token(['source.php', 'string.quoted.double.php', 'punctuation.definition.string.begin.php'], '"', 0, 1),
new Token(['source.php', 'string.quoted.double.php'], 'Hello, world!', 1, 14),
new Token(['source.php', 'string.quoted.double.php', 'punctuation.definition.string.end.php'], '"', 14, 15),
new Token(['source.php'], "\n", 15, 15),
],
]);
});
Expand All @@ -24,7 +23,6 @@
[
new Token(['source.php', 'variable.other.php', 'punctuation.definition.variable.php'], '$', 0, 1),
new Token(['source.php', 'variable.other.php'], 'name', 1, 5),
new Token(['source.php'], "\n", 5, 5),
],
]);
});
Expand All @@ -42,7 +40,6 @@
new Token(['source.php', 'string.quoted.double.php', 'punctuation.definition.variable.php'], '}', 14, 15),
new Token(['source.php', 'string.quoted.double.php'], '!', 15, 16),
new Token(['source.php', 'string.quoted.double.php', 'punctuation.definition.string.end.php'], '"', 16, 17),
new Token(['source.php'], "\n", 17, 17),
],
]);
});
Expand All @@ -62,7 +59,6 @@
new Token(['source.php', 'meta.class.php'], ' ', 17, 18),
new Token(['source.php', 'meta.class.php', 'punctuation.definition.class.begin.bracket.curly.php'], '{', 18, 19),
new Token(['source.php', 'meta.class.php', 'punctuation.definition.class.end.bracket.curly.php'], '}', 19, 20),
new Token(['source.php'], "\n", 20, 20),
],
]);
});
Expand All @@ -76,7 +72,6 @@
new Token(['source.php', 'meta.use.php'], ' ', 3, 4),
new Token(['source.php', 'meta.use.php', 'support.class.php'], 'A', 4, 5),
new Token(['source.php', 'punctuation.terminator.expression.php'], ';', 5, 6),
new Token(['source.php'], "\n", 6, 6),
],
]);
});
Expand All @@ -94,7 +89,6 @@
new Token(['source.php', 'meta.use.php', 'support.other.namespace.php', 'punctuation.separator.inheritance.php'], '\\', 7, 8),
new Token(['source.php', 'meta.use.php', 'support.class.php'], 'C', 8, 9),
new Token(['source.php', 'punctuation.terminator.expression.php'], ';', 9, 10),
new Token(['source.php'], "\n", 10, 10),
],
]);
});
Expand All @@ -116,7 +110,6 @@
new Token(['source.php'], ' ', 16, 17),
new Token(['source.php', 'punctuation.definition.begin.bracket.curly.php'], '{', 17, 18),
new Token(['source.php', 'punctuation.definition.end.bracket.curly.php'], '}', 18, 19),
new Token(['source.php'], "\n", 19, 19),
],
]);
});
Expand All @@ -140,7 +133,6 @@
new Token(['source.php'], ' ', 18, 19),
new Token(['source.php', 'punctuation.definition.begin.bracket.curly.php'], '{', 19, 20),
new Token(['source.php', 'punctuation.definition.end.bracket.curly.php'], '}', 20, 21),
new Token(['source.php'], "\n", 21, 21),
],
]);
});
Expand All @@ -164,7 +156,6 @@
new Token(['source.php'], ' ', 24, 25),
new Token(['source.php', 'punctuation.definition.begin.bracket.curly.php'], '{', 25, 26),
new Token(['source.php', 'punctuation.definition.end.bracket.curly.php'], '}', 26, 27),
new Token(['source.php'], "\n", 27, 27),
],
]);
});
Expand Down
2 changes: 0 additions & 2 deletions tests/Languages/TomlTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
new Token(['source.toml', 'meta.group.toml', 'punctuation.definition.section.begin.toml'], '[', 0, 1),
new Token(['source.toml', 'meta.group.toml', 'entity.name.section.toml'], 'group', 1, 6),
new Token(['source.toml', 'meta.group.toml', 'punctuation.definition.section.begin.toml'], ']', 6, 7),
new Token(['source.toml'], "\n", 7, 7),
],
]);
});
Expand All @@ -27,7 +26,6 @@
new Token(['source.toml', 'meta.group.toml'], '.', 6, 7),
new Token(['source.toml', 'meta.group.toml', 'entity.name.section.toml'], 'subgroup', 7, 15),
new Token(['source.toml', 'meta.group.toml', 'punctuation.definition.section.begin.toml'], ']', 15, 16),
new Token(['source.toml'], "\n", 16, 16),
],
]);
});
Expand Down
1 change: 0 additions & 1 deletion tests/Languages/YamlTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
new Token(['source.yaml', 'string.quoted.double.yaml', 'punctuation.definition.string.begin.yaml'], '"', 6, 7),
new Token(['source.yaml', 'string.quoted.double.yaml'], 'Hello, world', 7, 19),
new Token(['source.yaml', 'string.quoted.double.yaml', 'punctuation.definition.string.end.yaml'], '"', 19, 20),
new Token(['source.yaml'], "\n", 20, 20),
],
]);
});
Expand Down
Loading