From 3c7a28de29a98118f251bd74a6d34be9c8e4e9ff Mon Sep 17 00:00:00 2001 From: Mikhail Katychev Date: Fri, 6 Jun 2025 19:12:16 -0500 Subject: [PATCH 1/6] updated languages --- lib/linguist/languages.yml | 6 ++ samples/Tree-sitter Query/asm_highlights.scm | 66 ++++++++++++++++++++ samples/Tree-sitter Query/asm_injections.scm | 5 ++ 3 files changed, 77 insertions(+) create mode 100644 samples/Tree-sitter Query/asm_highlights.scm create mode 100644 samples/Tree-sitter Query/asm_injections.scm diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index fa9225b705..29ea514c2a 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -7704,6 +7704,12 @@ Tree-sitter Query: - tsq extensions: - ".scm" + filenames: + - highlights.scm + - injections.scm + - folds.scm + - indents.scm + - locals.scm tm_scope: source.scm ace_mode: text language_id: 436081647 diff --git a/samples/Tree-sitter Query/asm_highlights.scm b/samples/Tree-sitter Query/asm_highlights.scm new file mode 100644 index 0000000000..eccf9c9967 --- /dev/null +++ b/samples/Tree-sitter Query/asm_highlights.scm @@ -0,0 +1,66 @@ +; General +(label + [ + (ident) + (word) + ] @label) + +(reg) @variable.builtin + +(meta + kind: (_) @function.builtin) + +(instruction + kind: (_) @function.builtin) + +(const + name: (word) @constant) + +; Comments +[ + (line_comment) + (block_comment) +] @comment @spell + +; Literals +(int) @number + +(float) @number.float + +(string) @string + +; Keywords +[ + "byte" + "word" + "dword" + "qword" + "ptr" + "rel" + "label" + "const" +] @keyword + +; Operators & Punctuation +[ + "+" + "-" + "*" + "/" + "%" + "|" + "^" + "&" +] @operator + +[ + "(" + ")" + "[" + "]" +] @punctuation.bracket + +[ + "," + ":" +] @punctuation.delimiter diff --git a/samples/Tree-sitter Query/asm_injections.scm b/samples/Tree-sitter Query/asm_injections.scm new file mode 100644 index 0000000000..3cd6aac8e4 --- /dev/null +++ b/samples/Tree-sitter Query/asm_injections.scm @@ -0,0 +1,5 @@ +([ + (line_comment) + (block_comment) +] @injection.content + (#set! injection.language "comment")) From 7dd0beeb4d43f76ce26ceb4624c221e6bee3076c Mon Sep 17 00:00:00 2001 From: Mikhail Katychev Date: Fri, 6 Jun 2025 19:35:36 -0500 Subject: [PATCH 2/6] updated scheme and tsq heuristics --- lib/linguist/heuristics.yml | 2 + samples/Tree-sitter Query/func_highlights.scm | 167 ++++++++ .../Tree-sitter Query/tlaplus_highlights.scm | 380 ++++++++++++++++++ 3 files changed, 549 insertions(+) create mode 100644 samples/Tree-sitter Query/func_highlights.scm create mode 100644 samples/Tree-sitter Query/tlaplus_highlights.scm diff --git a/lib/linguist/heuristics.yml b/lib/linguist/heuristics.yml index 9f46e28c0e..d90057d6f0 100644 --- a/lib/linguist/heuristics.yml +++ b/lib/linguist/heuristics.yml @@ -744,12 +744,14 @@ disambiguations: negative_pattern: - '\(#[\w-]+[!\?]' - '[\)\]"_]\s*(?:[\*\+\?]|@\w)' + - '@[\w.-]+(?:\s|$)' - language: Tree-sitter Query pattern: - '\(#[\w-]+[!\?]' - '[\)\]"_]\s*(?:[\*\+\?]|@\w)' - '(?:^\s*\w+:\s*[\(\[\"])' - '\(#(?:set!|(?:not-)?(?:any-of|match)\?)' + - '@[\w.-]+(?:\s|$)' negative_pattern: - '\([+\-:<>\/=~\)]' - extensions: ['.sol'] diff --git a/samples/Tree-sitter Query/func_highlights.scm b/samples/Tree-sitter Query/func_highlights.scm new file mode 100644 index 0000000000..9fd6dd82db --- /dev/null +++ b/samples/Tree-sitter Query/func_highlights.scm @@ -0,0 +1,167 @@ +; Include +"#include" @keyword.import + +(include_path) @string + +; Preproc +"#pragma" @keyword.directive + +(pragma_directive + [ + "version" + "not-version" + "test-version-set" + ] @keyword.directive) + +; Keywords +[ + "asm" + "impure" + "inline" + "inline_ref" + "method_id" + "type" +] @keyword + +"return" @keyword.return + +; Conditionals +[ + "if" + "ifnot" + "else" + "elseif" + "elseifnot" + "until" +] @keyword.conditional + +; Exceptions +[ + "try" + "catch" +] @keyword.exception + +; Repeats +[ + "do" + "forall" + "repeat" + "while" +] @keyword.repeat + +; Qualifiers +[ + "const" + "global" + (var) +] @keyword.modifier + +; Variables +(identifier) @variable + +; Constants +(const_var_declarations + name: (identifier) @constant) + +; Functions/Methods +(function_definition + name: (function_name) @function) + +(function_application + function: (identifier) @function) + +(method_call + method_name: (identifier) @function.method.call) + +; Parameters +(parameter) @variable.parameter + +; Types +(type_identifier) @type + +(primitive_type) @type.builtin + +; Operators +[ + "=" + "+=" + "-=" + "*=" + "/=" + "~/=" + "^/=" + "%=" + "~%=" + "^%=" + "<<=" + ">>=" + "~>>=" + "^>>=" + "&=" + "|=" + "^=" + "==" + "<" + ">" + "<=" + ">=" + "!=" + "<=>" + "<<" + ">>" + "~>>" + "^>>" + "-" + "+" + "|" + "^" + "*" + "/" + "%" + "~/" + "^/" + "~%" + "^%" + "/%" + "&" + "~" +] @operator + +; Literals +[ + (string) + (asm_instruction) +] @string + +[ + (string_type) + (underscore) +] @character.special + +(number) @number + +; Punctuation +[ + "{" + "}" +] @punctuation.bracket + +[ + "(" + ")" + "()" +] @punctuation.bracket + +[ + "[" + "]" +] @punctuation.bracket + +[ + ";" + "," + "->" +] @punctuation.delimiter + +; Comments +(comment) @comment @spell diff --git a/samples/Tree-sitter Query/tlaplus_highlights.scm b/samples/Tree-sitter Query/tlaplus_highlights.scm new file mode 100644 index 0000000000..2eca927e70 --- /dev/null +++ b/samples/Tree-sitter Query/tlaplus_highlights.scm @@ -0,0 +1,380 @@ +; Keywords +[ + "ACTION" + "ASSUME" + "ASSUMPTION" + "AXIOM" + "BY" + "CASE" + "CHOOSE" + "CONSTANT" + "CONSTANTS" + "COROLLARY" + "DEF" + "DEFINE" + "DEFS" + "ELSE" + "EXCEPT" + "EXTENDS" + "HAVE" + "HIDE" + "IF" + "IN" + "INSTANCE" + "LAMBDA" + "LEMMA" + "LET" + "LOCAL" + "MODULE" + "NEW" + "OBVIOUS" + "OMITTED" + "ONLY" + "OTHER" + "PICK" + "PROOF" + "PROPOSITION" + "PROVE" + "QED" + "RECURSIVE" + "SF_" + "STATE" + "SUFFICES" + "TAKE" + "TEMPORAL" + "THEN" + "THEOREM" + "USE" + "VARIABLE" + "VARIABLES" + "WF_" + "WITH" + "WITNESS" + (address) + (all_map_to) + (assign) + (case_arrow) + (case_box) + (def_eq) + (exists) + (forall) + (gets) + (label_as) + (maps_to) + (set_in) + (temporal_exists) + (temporal_forall) +] @keyword + +; Pluscal keywords +[ + (pcal_algorithm_start) + "algorithm" + "assert" + "begin" + "call" + "define" + "end" + "fair" + "goto" + "macro" + "or" + "procedure" + "process" + (pcal_skip) + "variable" + "variables" + "when" + "with" +] @keyword + +"await" @keyword.coroutine + +(pcal_with + "=" @keyword) + +(pcal_process + "=" @keyword) + +[ + "if" + "then" + "else" + "elsif" + (pcal_end_if) + "either" + (pcal_end_either) +] @keyword.conditional + +[ + "while" + "do" + (pcal_end_while) + "with" + (pcal_end_with) +] @keyword.repeat + +(pcal_return) @keyword.return + +"print" @function.macro + +; Literals +(binary_number + (format) @keyword) + +(binary_number + (value) @number) + +(boolean) @boolean + +(boolean_set) @type + +(hex_number + (format) @keyword) + +(hex_number + (value) @number) + +(int_number_set) @type + +(nat_number) @number + +(nat_number_set) @type + +(octal_number + (format) @keyword) + +(octal_number + (value) @number) + +(real_number) @number + +(real_number_set) @type + +(string) @string + +(escape_char) @string.escape + +(string_set) @type + +; Namespaces +(extends + (identifier_ref) @module) + +(instance + (identifier_ref) @module) + +(module + name: (identifier) @module) + +(pcal_algorithm + name: (identifier) @module) + +; Operators, functions, and macros +(bound_infix_op + symbol: (_) @operator) + +(bound_nonfix_op + symbol: (_) @operator) + +(bound_postfix_op + symbol: (_) @operator) + +(bound_prefix_op + symbol: (_) @operator) + +(prefix_op_symbol) @operator + +(infix_op_symbol) @operator + +(postfix_op_symbol) @operator + +(function_definition + name: (identifier) @function) + +(module_definition + name: (_) @keyword.import) + +(operator_definition + name: (_) @function.macro) + +(pcal_macro_decl + name: (identifier) @function.macro) + +(pcal_macro_call + name: (identifier) @function.macro) + +(pcal_proc_decl + name: (identifier) @function.macro) + +(pcal_process + name: (identifier) @function) + +(recursive_declaration + (identifier) @function.macro) + +(recursive_declaration + (operator_declaration + name: (_) @function.macro)) + +; Constants and variables +(constant_declaration + (identifier) @constant) + +(constant_declaration + (operator_declaration + name: (_) @constant)) + +(pcal_var_decl + (identifier) @variable) + +(pcal_with + (identifier) @variable.parameter) + +("." + . + (identifier) @attribute) + +(record_literal + (identifier) @attribute) + +(set_of_records + (identifier) @attribute) + +(variable_declaration + (identifier) @variable) + +; Parameters +(choose + (identifier) @variable.parameter) + +(choose + (tuple_of_identifiers + (identifier) @variable.parameter)) + +(lambda + (identifier) @variable.parameter) + +(module_definition + (operator_declaration + name: (_) @variable.parameter)) + +(module_definition + parameter: (identifier) @variable.parameter) + +(operator_definition + (operator_declaration + name: (_) @variable.parameter)) + +(operator_definition + parameter: (identifier) @variable.parameter) + +(pcal_macro_decl + parameter: (identifier) @variable.parameter) + +(pcal_proc_var_decl + (identifier) @variable.parameter) + +(quantifier_bound + (identifier) @variable.parameter) + +(quantifier_bound + (tuple_of_identifiers + (identifier) @variable.parameter)) + +(unbounded_quantification + (identifier) @variable.parameter) + +; Delimiters +[ + (langle_bracket) + (rangle_bracket) + (rangle_bracket_sub) + "{" + "}" + "[" + "]" + "]_" + "(" + ")" +] @punctuation.bracket + +[ + "," + ":" + "." + "!" + ";" + (bullet_conj) + (bullet_disj) + (prev_func_val) + (placeholder) +] @punctuation.delimiter + +; Proofs +(assume_prove + (new + (identifier) @variable.parameter)) + +(assume_prove + (new + (operator_declaration + name: (_) @variable.parameter))) + +(assumption + name: (identifier) @constant) + +(pick_proof_step + (identifier) @variable.parameter) + +(proof_step_id + "<" @punctuation.bracket) + +(proof_step_id + (level) @label) + +(proof_step_id + (name) @label) + +(proof_step_id + ">" @punctuation.bracket) + +(proof_step_ref + "<" @punctuation.bracket) + +(proof_step_ref + (level) @label) + +(proof_step_ref + (name) @label) + +(proof_step_ref + ">" @punctuation.bracket) + +(take_proof_step + (identifier) @variable.parameter) + +(theorem + name: (identifier) @constant) + +; Comments and tags +(block_comment + "(*" @comment) + +(block_comment + "*)" @comment) + +(block_comment_text) @comment @spell + +(comment) @comment @spell + +(single_line) @comment + +(_ + label: (identifier) @label) + +(label + name: (_) @label) + +(pcal_goto + statement: (identifier) @label) From a6cb45dd78b6a3def58cc07de5cf6462dcccbd10 Mon Sep 17 00:00:00 2001 From: Mikhail Katychev Date: Fri, 6 Jun 2025 19:39:36 -0500 Subject: [PATCH 3/6] Tree-sitter Query moved above scheme --- lib/linguist/heuristics.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/linguist/heuristics.yml b/lib/linguist/heuristics.yml index d90057d6f0..3727735737 100644 --- a/lib/linguist/heuristics.yml +++ b/lib/linguist/heuristics.yml @@ -737,14 +737,6 @@ disambiguations: pattern: '^#+\s+(NAME|SYNOPSIS|DESCRIPTION)' - extensions: ['.scm'] rules: - - language: Scheme - pattern: - - '(?:''[\(\*#]|\w->\w|\.\.\.[\s\)]|\([+\-:<>\/=~\)]|~>|[#`]\(|#:\w)' - - '^\s*\((?:define\*?|import|library|lambda)' - negative_pattern: - - '\(#[\w-]+[!\?]' - - '[\)\]"_]\s*(?:[\*\+\?]|@\w)' - - '@[\w.-]+(?:\s|$)' - language: Tree-sitter Query pattern: - '\(#[\w-]+[!\?]' @@ -754,6 +746,14 @@ disambiguations: - '@[\w.-]+(?:\s|$)' negative_pattern: - '\([+\-:<>\/=~\)]' + - language: Scheme + pattern: + - '(?:''[\(\*#]|\w->\w|\.\.\.[\s\)]|\([+\-:<>\/=~\)]|~>|[#`]\(|#:\w)' + - '^\s*\((?:define\*?|import|library|lambda)' + negative_pattern: + - '\(#[\w-]+[!\?]' + - '[\)\]"_]\s*(?:[\*\+\?]|@\w)' + - '@[\w.-]+(?:\s|$)' - extensions: ['.sol'] rules: - language: Solidity From b3a5184e0ea894115510577b786365a4c9ec2202 Mon Sep 17 00:00:00 2001 From: Mikhail Katychev Date: Fri, 6 Jun 2025 22:17:15 -0500 Subject: [PATCH 4/6] remove @w tail --- lib/linguist/heuristics.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/linguist/heuristics.yml b/lib/linguist/heuristics.yml index 3727735737..7f2b1c7554 100644 --- a/lib/linguist/heuristics.yml +++ b/lib/linguist/heuristics.yml @@ -740,7 +740,7 @@ disambiguations: - language: Tree-sitter Query pattern: - '\(#[\w-]+[!\?]' - - '[\)\]"_]\s*(?:[\*\+\?]|@\w)' + - '(?:[\)\]]\s*[\*\+\?](?:\s|$))' - '(?:^\s*\w+:\s*[\(\[\"])' - '\(#(?:set!|(?:not-)?(?:any-of|match)\?)' - '@[\w.-]+(?:\s|$)' @@ -752,7 +752,7 @@ disambiguations: - '^\s*\((?:define\*?|import|library|lambda)' negative_pattern: - '\(#[\w-]+[!\?]' - - '[\)\]"_]\s*(?:[\*\+\?]|@\w)' + - '(?:[\)\]]\s*[\*\+\?](?:\s|$))' - '@[\w.-]+(?:\s|$)' - extensions: ['.sol'] rules: From 081827e7065d271e80fe9cdce617d96123c67be4 Mon Sep 17 00:00:00 2001 From: Mikhail Katychev Date: Fri, 6 Jun 2025 22:27:56 -0500 Subject: [PATCH 5/6] sort filenames --- lib/linguist/languages.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 29ea514c2a..811d5aa5a7 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -7705,10 +7705,10 @@ Tree-sitter Query: extensions: - ".scm" filenames: - - highlights.scm - - injections.scm - folds.scm + - highlights.scm - indents.scm + - injections.scm - locals.scm tm_scope: source.scm ace_mode: text From 3545170d4fe5447c8cf78ff8d4863518d0cc0ca3 Mon Sep 17 00:00:00 2001 From: Mikhail Katychev Date: Tue, 17 Jun 2025 10:10:37 -0500 Subject: [PATCH 6/6] added closing parentheses to TSQ tag heuristic --- lib/linguist/heuristics.yml | 4 ++-- lib/linguist/languages.yml | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/linguist/heuristics.yml b/lib/linguist/heuristics.yml index a2f9a90caa..9af3c1a4bf 100644 --- a/lib/linguist/heuristics.yml +++ b/lib/linguist/heuristics.yml @@ -754,7 +754,7 @@ disambiguations: - '(?:[\)\]]\s*[\*\+\?](?:\s|$))' - '(?:^\s*\w+:\s*[\(\[\"])' - '\(#(?:set!|(?:not-)?(?:any-of|match)\?)' - - '@[\w.-]+(?:\s|$)' + - '@[\w.-]+(?:\)\s|$)' negative_pattern: - '\([+\-:<>\/=~\)]' - language: Scheme @@ -764,7 +764,7 @@ disambiguations: negative_pattern: - '\(#[\w-]+[!\?]' - '(?:[\)\]]\s*[\*\+\?](?:\s|$))' - - '@[\w.-]+(?:\s|$)' + - '@[\w.-]+(?:\)\s|$)' - extensions: ['.sol'] rules: - language: Solidity diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 016df016ad..99f619db7b 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -7712,12 +7712,6 @@ Tree-sitter Query: - tsq extensions: - ".scm" - filenames: - - folds.scm - - highlights.scm - - indents.scm - - injections.scm - - locals.scm tm_scope: source.scm ace_mode: text language_id: 436081647