diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index c499a5c6b..f4942f25c 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -11,3 +11,11 @@ Use fully qualified result types (`anyhow::Result`) instead of importing them. When writing tests, prefer simple assertion macros without custom error messages: - Use `assert_eq!(actual, expected);` instead of `assert_eq!(actual, expected, "custom message");` - Use `assert!(condition);` instead of `assert!(condition, "custom message");` + +Tests are run with `just test`, not `cargo test`. + +When you extract code in a function (or move things around) that function goes +_below_ the calling function. A general goal is to be able to read linearly from +top to bottom with the relevant context and main logic first. The code should be +organised like a call stack. Of course that's not always possible, use best +judgement to produce the clearest code organization. diff --git a/crates/ark/src/lsp/snapshots/ark__lsp__symbols__tests__symbol_assignment_function_nested_section.snap b/crates/ark/src/lsp/snapshots/ark__lsp__symbols__tests__symbol_assignment_function_nested_section.snap index ddef8b970..5b593b3c5 100644 --- a/crates/ark/src/lsp/snapshots/ark__lsp__symbols__tests__symbol_assignment_function_nested_section.snap +++ b/crates/ark/src/lsp/snapshots/ark__lsp__symbols__tests__symbol_assignment_function_nested_section.snap @@ -15,8 +15,8 @@ expression: "test_symbol(\"\n## title0 ----\nfoo <- function() {\n # title1 --- character: 0, }, end: Position { - line: 1, - character: 14, + line: 7, + character: 1, }, }, selection_range: Range { @@ -25,8 +25,8 @@ expression: "test_symbol(\"\n## title0 ----\nfoo <- function() {\n # title1 --- character: 0, }, end: Position { - line: 1, - character: 14, + line: 7, + character: 1, }, }, children: Some( @@ -73,8 +73,8 @@ expression: "test_symbol(\"\n## title0 ----\nfoo <- function() {\n # title1 --- character: 2, }, end: Position { - line: 3, - character: 15, + line: 5, + character: 16, }, }, selection_range: Range { @@ -83,8 +83,8 @@ expression: "test_symbol(\"\n## title0 ----\nfoo <- function() {\n # title1 --- character: 2, }, end: Position { - line: 3, - character: 15, + line: 5, + character: 16, }, }, children: Some( diff --git a/crates/ark/src/lsp/symbols.rs b/crates/ark/src/lsp/symbols.rs index bdd31f13b..26e23ab19 100644 --- a/crates/ark/src/lsp/symbols.rs +++ b/crates/ark/src/lsp/symbols.rs @@ -9,7 +9,6 @@ use std::result::Result::Ok; -use anyhow::anyhow; use ropey::Rope; use stdext::unwrap::IntoResult; use tower_lsp::lsp_types::DocumentSymbol; @@ -28,12 +27,11 @@ use crate::lsp::indexer::IndexEntryData; use crate::lsp::state::WorldState; use crate::lsp::traits::rope::RopeExt; use crate::lsp::traits::string::StringExt; +use crate::treesitter::point_end_of_previous_row; use crate::treesitter::BinaryOperatorType; use crate::treesitter::NodeType; use crate::treesitter::NodeTypeExt; -type StoreStack = Vec<(usize, Option, Vec)>; - fn new_symbol(name: String, kind: SymbolKind, range: Range) -> DocumentSymbol { DocumentSymbol { name, @@ -114,6 +112,16 @@ pub fn symbols(params: &WorkspaceSymbolParams) -> anyhow::Result, + children: Vec, +} + pub(crate) fn document_symbols( state: &WorldState, params: &DocumentSymbolParams, @@ -123,158 +131,133 @@ pub(crate) fn document_symbols( let ast = &document.ast; let contents = &document.contents; - let node = ast.root_node(); + // Start walking from the root node + let root_node = ast.root_node(); + let mut result = Vec::new(); - // Index from the root - match index_node(&node, vec![], &contents) { - Ok(children) => Ok(children), - Err(err) => { - log::error!("Error indexing node: {err:?}"); - return Ok(Vec::new()); - }, + // Extract and process all symbols from the AST + if let Err(err) = collect_symbols(&root_node, contents, 0, &mut result) { + log::error!("Failed to collect symbols: {err:?}"); + return Ok(Vec::new()); } + + Ok(result) } -fn index_node( +/// Collect all document symbols from a node recursively +fn collect_symbols( node: &Node, - store: Vec, contents: &Rope, -) -> anyhow::Result> { - Ok(match node.node_type() { - // Handle comment sections in expression lists + current_level: usize, + symbols: &mut Vec, +) -> anyhow::Result<()> { + match node.node_type() { NodeType::Program | NodeType::BracedExpression => { - index_expression_list(&node, store, contents)? + collect_sections(node, contents, current_level, symbols)?; }, - // Index assignments as object or function symbols + NodeType::BinaryOperator(BinaryOperatorType::LeftAssignment) | NodeType::BinaryOperator(BinaryOperatorType::EqualsAssignment) => { - index_assignment(&node, store, contents)? + collect_assignment(node, contents, symbols)?; }, - // Nothing to index. FIXME: We should handle argument lists, e.g. to - // index inside functions passed as arguments, or inside `test_that()` - // blocks. - _ => store, - }) + + // For all other node types, no symbols need to be added + _ => {}, + } + + Ok(()) } -// Handles root node and braced lists -fn index_expression_list( +fn collect_sections( node: &Node, - store: Vec, contents: &Rope, -) -> anyhow::Result> { + current_level: usize, + symbols: &mut Vec, +) -> anyhow::Result<()> { + // In lists of expressions we track and collect section comments, then + // collect symbols from children nodes + let mut cursor = node.walk(); - // This is a stack of section levels and associated stores for comments of - // the type `# title ----`. It contains all currently active sections. - // The top-level section is the current store and has level 0. It should - // always be in the stack and popping it before we have finished indexing - // the whole expression list is a logic error. - let mut store_stack: StoreStack = vec![(0, None, store)]; + // Track active sections at each level + let mut active_sections: Vec
= Vec::new(); for child in node.children(&mut cursor) { if let NodeType::Comment = child.node_type() { - store_stack = index_comments(&child, store_stack, contents)?; + let comment_text = contents.node_slice(&child)?.to_string(); + + // If we have a section comment, add it to our stack and close any sections if needed + if let Some((level, title)) = parse_comment_as_section(&comment_text) { + let absolute_level = current_level + level; + + // Close any sections with equal or higher level + while !active_sections.is_empty() && + active_sections.last().unwrap().level >= absolute_level + { + // Set end position for the section being closed + if let Some(section) = active_sections.last_mut() { + let pos = point_end_of_previous_row(child.start_position(), contents); + section.end_position = Some(pos); + } + finalize_section(&mut active_sections, symbols, contents)?; + } + + let section = Section { + title, + level: absolute_level, + start_position: child.start_position(), + end_position: None, + children: Vec::new(), + }; + active_sections.push(section); + } + continue; } - // Get the current store to index the child subtree with. - // We restore the store in the stack right after that. - let Some((level, symbol, store)) = store_stack.pop() else { - return Err(anyhow!( - "Internal error: Store stack must have at least one element" - )); - }; - let store = index_node(&child, store, contents)?; - store_stack.push((level, symbol, store)); - } - - // Pop all sections from the stack, assigning their childrens and their - // parents along the way - while store_stack.len() > 0 { - if let Some(store) = store_stack_pop(&mut store_stack)? { - return Ok(store); + // If we get to this point, `child` is not a section comment. + // Recurse into child. + + if active_sections.is_empty() { + // If no active section, extend current vector of symbols + collect_symbols(&child, contents, current_level, symbols)?; + } else { + // Otherwise create new store of symbols for the current section + let mut child_symbols = Vec::new(); + collect_symbols(&child, contents, current_level, &mut child_symbols)?; + + // Nest them inside last section + if !child_symbols.is_empty() { + active_sections + .last_mut() + .unwrap() + .children + .extend(child_symbols); + } } } - Err(anyhow!( - "Internal error: Store stack must have at least one element" - )) -} - -// Pop store from the stack, recording its children and adding it as child to -// its parent (which becomes the last element in the stack). -fn store_stack_pop(store_stack: &mut StoreStack) -> anyhow::Result>> { - let Some((_, symbol, last)) = store_stack.pop() else { - return Ok(None); - }; - - if let Some(mut sym) = symbol { - // Assign children to symbol - sym.children = Some(last); - - let Some((_, _, ref mut parent_store)) = store_stack.last_mut() else { - return Err(anyhow!( - "Internal error: Store stack must have at least one element" - )); - }; - - // Store symbol as child of the last symbol on the stack - parent_store.push(sym); - - Ok(None) - } else { - Ok(Some(last)) - } -} - -fn index_comments( - node: &Node, - mut store_stack: StoreStack, - contents: &Rope, -) -> anyhow::Result { - let comment_text = contents.node_slice(&node)?.to_string(); - - // Check if the comment starts with one or more '#' followed by any text and ends with 4+ punctuations - let Some((level, title)) = parse_comment_as_section(&comment_text) else { - return Ok(store_stack); - }; - - // Create a section symbol based on the parsed comment - let start = convert_point_to_position(contents, node.start_position()); - let end = convert_point_to_position(contents, node.end_position()); - let symbol = new_symbol(title, SymbolKind::STRING, Range { start, end }); - - // Now pop all sections still on the stack that have a higher or equal - // level. Because we pop sections with equal levels, i.e. siblings, we - // ensure that there is only one active section per level on the stack. - // That simplifies things because we need to assign popped sections to their - // parents and we can assume the relevant parent is always the next on the - // stack. - loop { - let Some((last_level, _, _)) = store_stack.last() else { - return Err(anyhow!("Unexpectedly reached the end of the store stack")); - }; - - if *last_level >= level { - if store_stack_pop(&mut store_stack)?.is_some() { - return Err(anyhow!("Unexpectedly reached the end of the store stack")); + // Close any remaining active sections + while !active_sections.is_empty() { + // Set end position to the parent node's end for remaining sections + if let Some(section) = active_sections.last_mut() { + let mut pos = node.end_position(); + if pos.row > section.start_position.row { + pos = point_end_of_previous_row(pos, contents); } - continue; + section.end_position = Some(pos); } - - break; + finalize_section(&mut active_sections, symbols, contents)?; } - store_stack.push((level, Some(symbol), vec![])); - Ok(store_stack) + Ok(()) } -fn index_assignment( +fn collect_assignment( node: &Node, - mut store: Vec, contents: &Rope, -) -> anyhow::Result> { + symbols: &mut Vec, +) -> anyhow::Result<()> { // Check for assignment matches!( node.node_type(), @@ -291,7 +274,7 @@ fn index_assignment( let function = lhs.is_identifier_or_string() && rhs.is_function_definition(); if function { - return index_assignment_with_function(node, store, contents); + return collect_assignment_with_function(node, contents, symbols); } // otherwise, just index as generic object @@ -301,16 +284,16 @@ fn index_assignment( let end = convert_point_to_position(contents, lhs.end_position()); let symbol = new_symbol(name, SymbolKind::VARIABLE, Range { start, end }); - store.push(symbol); + symbols.push(symbol); - Ok(store) + Ok(()) } -fn index_assignment_with_function( +fn collect_assignment_with_function( node: &Node, - mut store: Vec, contents: &Rope, -) -> anyhow::Result> { + symbols: &mut Vec, +) -> anyhow::Result<()> { // check for lhs, rhs let lhs = node.child_by_field_name("lhs").into_result()?; let rhs = node.child_by_field_name("rhs").into_result()?; @@ -336,15 +319,45 @@ fn index_assignment_with_function( let body = rhs.child_by_field_name("body").into_result()?; - // At this point we increase the nesting level. Recurse into the function - // node with a new store of children nodes. - let children = index_node(&body, vec![], contents)?; + // Process the function body to extract child symbols + let mut children = Vec::new(); + collect_symbols(&body, contents, 0, &mut children)?; let mut symbol = new_symbol_node(name, SymbolKind::FUNCTION, range, children); symbol.detail = Some(detail); - store.push(symbol); + symbols.push(symbol); + + Ok(()) +} + +/// Finalize a section by creating a symbol and adding it to the parent section or output +fn finalize_section( + active_sections: &mut Vec
, + symbols: &mut Vec, + contents: &Rope, +) -> anyhow::Result<()> { + if let Some(section) = active_sections.pop() { + let start_pos = section.start_position; + let end_pos = section.end_position.unwrap_or(section.start_position); + + let range = Range { + start: convert_point_to_position(contents, start_pos), + end: convert_point_to_position(contents, end_pos), + }; + + let symbol = new_symbol(section.title, SymbolKind::STRING, range); + + let mut final_symbol = symbol; + final_symbol.children = Some(section.children); + + if let Some(parent) = active_sections.last_mut() { + parent.children.push(final_symbol); + } else { + symbols.push(final_symbol); + } + } - Ok(store) + Ok(()) } // Function to parse a comment and return the section level and title @@ -374,7 +387,9 @@ mod tests { let doc = Document::new(code, None); let node = doc.ast.root_node(); - index_node(&node, vec![], &doc.contents).unwrap() + let mut symbols = Vec::new(); + collect_symbols(&node, &doc.contents, 0, &mut symbols).unwrap(); + symbols } #[test] @@ -511,4 +526,65 @@ foo <- function() { assert_eq!(test_symbol("{ foo <- 1 }"), vec![foo]); } + + #[test] + fn test_symbol_section_ranges_extend() { + let symbols = test_symbol( + "# Section 1 ---- +x <- 1 +y <- 2 +# Section 2 ---- +z <- 3", + ); + + assert_eq!(symbols.len(), 2); + + // First section should extend from line 0 to line 2 (start of next section) + let section1 = &symbols[0]; + assert_eq!(section1.name, "Section 1"); + assert_eq!(section1.range.start.line, 0); + assert_eq!(section1.range.end.line, 2); + + // Second section should extend from line 3 to end of file + let section2 = &symbols[1]; + assert_eq!(section2.name, "Section 2"); + assert_eq!(section2.range.start.line, 3); + assert_eq!(section2.range.end.line, 3); + } + + #[test] + fn test_symbol_section_ranges_in_function() { + let symbols = test_symbol( + "foo <- function() { + # Section A ---- + x <- 1 + y <- 2 + # Section B ---- + z <- 3 +}", + ); + + assert_eq!(symbols.len(), 1); + + // Should have one function symbol + let function = &symbols[0]; + assert_eq!(function.name, "foo"); + assert_eq!(function.kind, SymbolKind::FUNCTION); + + // Function should have two section children + let children = function.children.as_ref().unwrap(); + assert_eq!(children.len(), 2); + + // First section should extend from line 1 to line 3 (start of next section) + let section_a = &children[0]; + assert_eq!(section_a.name, "Section A"); + assert_eq!(section_a.range.start.line, 1); + assert_eq!(section_a.range.end.line, 3); + + // Second section should extend from line 4 to end of function body + let section_b = &children[1]; + assert_eq!(section_b.name, "Section B"); + assert_eq!(section_b.range.start.line, 4); + assert_eq!(section_b.range.end.line, 5); // End of function body + } } diff --git a/crates/ark/src/treesitter.rs b/crates/ark/src/treesitter.rs index 93e82c70c..d8ef0e1e3 100644 --- a/crates/ark/src/treesitter.rs +++ b/crates/ark/src/treesitter.rs @@ -571,3 +571,72 @@ pub(crate) fn node_find_containing_call<'tree>(node: Node<'tree>) -> Option tree_sitter::Point { + if point.row > 0 { + let prev_row = point.row - 1; + let line = contents.line(prev_row as usize); + let line_len = line.len_chars().saturating_sub(1); // Subtract 1 for newline + tree_sitter::Point { + row: prev_row, + column: line_len, + } + } else { + // We're at the very beginning of the document, can't go back further + point.column = 0; + point + } +} + +#[cfg(test)] +mod tests { + use ropey::Rope; + use tree_sitter::Point; + + use super::*; + + #[test] + fn test_point_end_of_previous_row() { + let contents = Rope::from_str("hello world\nfoo bar\nbaz"); + let point = Point { row: 2, column: 1 }; + let result = point_end_of_previous_row(point, &contents); + assert_eq!(result, Point { row: 1, column: 7 }); + } + + #[test] + fn test_point_end_of_previous_row_first_row() { + let contents = Rope::from_str("hello world\nfoo bar\nbaz"); + let point = Point { row: 0, column: 5 }; + let result = point_end_of_previous_row(point, &contents); + assert_eq!(result, Point { row: 0, column: 0 }); + } + + #[test] + fn test_point_end_of_previous_row_empty_previous_line() { + let contents = Rope::from_str("hello\n\nworld"); + + let point = Point { row: 2, column: 1 }; + let result = point_end_of_previous_row(point, &contents); + assert_eq!(result, Point { row: 1, column: 0 }); + + let point = Point { row: 1, column: 1 }; + let result = point_end_of_previous_row(point, &contents); + assert_eq!(result, Point { row: 0, column: 5 }); + } + + #[test] + fn test_point_end_of_previous_row_single_line() { + let contents = Rope::from_str("hello world"); + + let point = Point { row: 0, column: 0 }; + let result = point_end_of_previous_row(point, &contents); + assert_eq!(result, Point { row: 0, column: 0 }); + + let point = Point { row: 0, column: 5 }; + let result = point_end_of_previous_row(point, &contents); + assert_eq!(result, Point { row: 0, column: 0 }); + } +}