diff --git a/crates/ark/src/lsp/diagnostics.rs b/crates/ark/src/lsp/diagnostics.rs index c9296bc99..8dee0c2f1 100644 --- a/crates/ark/src/lsp/diagnostics.rs +++ b/crates/ark/src/lsp/diagnostics.rs @@ -5,8 +5,10 @@ // // +use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; +use std::sync::Arc; use anyhow::bail; use anyhow::Result; @@ -17,14 +19,19 @@ use stdext::*; use tower_lsp::lsp_types::Diagnostic; use tower_lsp::lsp_types::DiagnosticSeverity; use tree_sitter::Node; +use tree_sitter::Point; use tree_sitter::Range; +use crate::lsp; use crate::lsp::declarations::top_level_declare; use crate::lsp::diagnostics_syntax::syntax_diagnostics; use crate::lsp::documents::Document; use crate::lsp::encoding::convert_tree_sitter_range_to_lsp_range; use crate::lsp::indexer; +use crate::lsp::inputs::library::Library; +use crate::lsp::inputs::package::Package; use crate::lsp::state::WorldState; +use crate::lsp::traits::node::NodeExt; use crate::lsp::traits::rope::RopeExt; use crate::treesitter::node_has_error_or_missing; use crate::treesitter::BinaryOperatorType; @@ -55,6 +62,14 @@ pub struct DiagnosticContext<'a> { // The set of packages that are currently installed. pub installed_packages: HashSet, + /// Reference to the library for looking up package exports. + pub library: &'a Library, + + /// The symbols exported by packages loaded via `library()` calls in this + /// document. Currently global. TODO: Store individual exports in a BTreeMap + /// sorted by position in the source? + pub library_symbols: BTreeMap>, + // Whether or not we're inside of a formula. pub in_formula: bool, @@ -69,13 +84,15 @@ impl Default for DiagnosticsConfig { } impl<'a> DiagnosticContext<'a> { - pub fn new(contents: &'a Rope) -> Self { + pub fn new(contents: &'a Rope, library: &'a Library) -> Self { Self { contents, document_symbols: Vec::new(), session_symbols: HashSet::new(), workspace_symbols: HashSet::new(), installed_packages: HashSet::new(), + library, + library_symbols: BTreeMap::new(), in_formula: false, in_call_like_arguments: false, } @@ -86,20 +103,30 @@ impl<'a> DiagnosticContext<'a> { symbols.insert(name.to_string(), location); } - pub fn has_definition(&mut self, name: &str) -> bool { - // First, check document symbols. - for symbols in self.document_symbols.iter() { + pub fn has_definition(&self, name: &str, start_position: Point) -> bool { + // Check document symbols + for symbols in &self.document_symbols { if symbols.contains_key(name) { return true; } } - // Next, check workspace symbols. + // Check workspace symbols if self.workspace_symbols.contains(name) { return true; } - // Finally, check session symbols. + // Check all symbols exported by `library()` calls before the given position + for (library_position, exports) in self.library_symbols.iter() { + if *library_position > start_position { + break; + } + if exports.contains(name) { + return true; + } + } + + // Finally, check session symbols self.session_symbols.contains(name) } } @@ -118,7 +145,7 @@ pub(crate) fn generate_diagnostics(doc: Document, state: WorldState) -> Vec recurse_call_like_arguments_default(node, context, diagnostics)?, + "library" | "require" => { + // Track symbols exported by `library()` or `require()` calls + if let Err(err) = handle_package_attach_call(node, context) { + lsp::log_warn!("Can't handle attach call: {err:?}"); + } + }, + _ => {}, }; + // Continue with default recursion to handle any other arguments + recurse_call_like_arguments_default(node, context, diagnostics)?; + ().ok() } +fn handle_package_attach_call(node: Node, context: &mut DiagnosticContext) -> anyhow::Result<()> { + // Find the first argument (package name). Positionally for now, no attempt + // at argument matching whatsoever. + let Some(package_node) = node.arguments_values().nth(0) else { + return Err(anyhow::anyhow!("Can't unpack attached package argument")); + }; + + // Just bail if `character.only` is passed, even if it's actually `FALSE`. + // We'll do better when we have a more capable argument inspection + // infrastructure. + if let Some(_) = node + .arguments_names_as_string(context.contents) + .find(|n| n == "character.only") + { + return Ok(()); + } + + let package_name = package_node.get_identifier_or_string_text(context.contents)?; + let attach_pos = node.end_position(); + + let package = insert_package_exports(&package_name, attach_pos, context)?; + + // Also attach packages from `Depends` field + let mut attach_dependencies = package.description.depends.clone(); + + // Special handling for the tidyverse and tidymodels packages. Hard-coded + // for now but in the future, this should probably be expressed as a + // `DESCRIPTION` field like `Config/Needs/attach`. + let attach_field = match package.description.name.as_str() { + // https://github.com/tidyverse/tidyverse/blob/0231aafb/R/attach.R#L1 + "tidyverse" => { + vec![ + "dplyr", + "readr", + "forcats", + "stringr", + "ggplot2", + "tibble", + "lubridate", + "tidyr", + "purrr", + ] + }, + // https://github.com/tidymodels/tidymodels/blob/aa3f82cf/R/attach.R#L1 + "tidymodels" => { + vec![ + "broom", + "dials", + "dplyr", + "ggplot2", + "infer", + "modeldata", + "parsnip", + "purrr", + "recipes", + "rsample", + "tibble", + "tidyr", + "tune", + "workflows", + "workflowsets", + "yardstick", + ] + }, + _ => vec![], + }; + attach_dependencies.extend(attach_field.into_iter().map(String::from)); + + for package_name in attach_dependencies { + insert_package_exports(&package_name, attach_pos, context)?; + } + + Ok(()) +} + +fn insert_package_exports( + package_name: &str, + attach_pos: Point, + context: &mut DiagnosticContext, +) -> anyhow::Result> { + let Some(package) = context.library.get(package_name) else { + return Err(anyhow::anyhow!( + "Can't get exports from package {package_name} because it is not installed." + )); + }; + + context + .library_symbols + .entry(attach_pos) + .or_default() + .extend(package.namespace.exports.iter().cloned()); + + Ok(package) +} + fn recurse_subset_or_subset2( node: Node, context: &mut DiagnosticContext, @@ -945,7 +1075,7 @@ fn check_symbol_in_scope( // Skip if a symbol with this name is in scope. let name = context.contents.node_slice(&node)?.to_string(); - if context.has_definition(name.as_str()) { + if context.has_definition(name.as_str(), node.start_position()) { return false.ok(); } @@ -963,6 +1093,8 @@ fn check_symbol_in_scope( #[cfg(test)] mod tests { + use std::path::PathBuf; + use harp::eval::RParseEvalOptions; use once_cell::sync::Lazy; use tower_lsp::lsp_types::Position; @@ -970,6 +1102,11 @@ mod tests { use crate::interface::console_inputs; use crate::lsp::diagnostics::generate_diagnostics; use crate::lsp::documents::Document; + use crate::lsp::inputs::library::Library; + use crate::lsp::inputs::package::Package; + use crate::lsp::inputs::package_description::Dcf; + use crate::lsp::inputs::package_description::Description; + use crate::lsp::inputs::package_namespace::Namespace; use crate::lsp::state::WorldState; use crate::r_task; @@ -1455,4 +1592,227 @@ foo ); }) } + + #[test] + fn test_library_static_exports() { + r_task(|| { + // `mockpkg` exports `foo` and `bar` + let namespace = Namespace { + exports: vec!["foo".to_string(), "bar".to_string()], + imports: vec![], + bulk_imports: vec![], + }; + let description = Description { + name: "mockpkg".to_string(), + version: "1.0.0".to_string(), + depends: vec![], + fields: Dcf::new(), + }; + let package = Package { + path: PathBuf::from("/mock/path"), + description, + namespace, + }; + + // Create a library with `mockpkg` installed + let library = Library::new(vec![]).insert("mockpkg", package); + + // Simulate a search path with `library` in scope + let console_scopes = vec![vec!["library".to_string()]]; + + // Whereas `DEFAULT_STATE` contains base package attached, this world state + // only contains `mockpkg` as installed package and `library()` on + // the search path. + let state = WorldState { + library, + console_scopes, + ..Default::default() + }; + + // Test that exported symbols are recognized + let code = " + library(mockpkg) + foo() + bar + "; + let document = Document::new(code, None); + let diagnostics = generate_diagnostics(document, state.clone()); + + assert_eq!(diagnostics.len(), 0); + + // Test that non-exported symbols still generate diagnostics + let code = " + library(mockpkg) + undefined() + also_undefined + "; + let document = Document::new(code, None); + + let diagnostics = generate_diagnostics(document, state.clone()); + assert_eq!(diagnostics.len(), 2); + + assert!(diagnostics + .get(0) + .unwrap() + .message + .contains("No symbol named 'undefined' in scope")); + assert!(diagnostics + .get(1) + .unwrap() + .message + .contains("No symbol named 'also_undefined' in scope")); + + // Test duplicate call + let code = " + library(mockpkg) + library(mockpkg) # duplicate is fine + foo() + bar + "; + let document = Document::new(code, None); + let diagnostics = generate_diagnostics(document, state.clone()); + assert_eq!(diagnostics.len(), 0); + + // If the library call includes the `character.only` argument, we bail + let code = r#" + library(mockpkg, character.only = TRUE) + foo() + "#; + let document = Document::new(code, None); + let diagnostics = generate_diagnostics(document, state.clone()); + assert_eq!(diagnostics.len(), 1); + + // Same if passed `FALSE`, we're not trying to be smart (yet) + let code = r#" + library(mockpkg, character.only = FALSE) + foo() + "#; + let document = Document::new(code, None); + let diagnostics = generate_diagnostics(document, state); + assert_eq!(diagnostics.len(), 1); + }); + } + + #[test] + fn test_library_static_exports_multiple_packages() { + r_task(|| { + // pkg1 exports `foo` and `bar` + let namespace1 = Namespace { + exports: vec!["foo".to_string(), "bar".to_string()], + imports: vec![], + bulk_imports: vec![], + }; + let description1 = Description { + name: "pkg1".to_string(), + version: "1.0.0".to_string(), + depends: vec![], + fields: Dcf::new(), + }; + let package1 = Package { + path: PathBuf::from("/mock/path1"), + description: description1, + namespace: namespace1, + }; + + // pkg2 exports `bar` and `baz` + let namespace2 = Namespace { + exports: vec!["bar".to_string(), "baz".to_string()], + imports: vec![], + bulk_imports: vec![], + }; + let description2 = Description { + name: "pkg2".to_string(), + version: "1.0.0".to_string(), + depends: vec![], + fields: Dcf::new(), + }; + let package2 = Package { + path: PathBuf::from("/mock/path2"), + description: description2, + namespace: namespace2, + }; + + let library = Library::new(vec![]) + .insert("pkg1", package1) + .insert("pkg2", package2); + + let console_scopes = vec![vec!["library".to_string()]]; + let state = WorldState { + library, + console_scopes, + ..Default::default() + }; + + // Code with two library calls at different points + let code = " + foo # not in scope + bar # not in scope + baz # not in scope + + library(pkg1) + foo # in scope + bar # in scope + baz # not in scope + + library(pkg2) + foo # in scope + bar # in scope + baz # in scope + "; + let document = Document::new(code, None); + let diagnostics = generate_diagnostics(document, state.clone()); + + let messages: Vec<_> = diagnostics.iter().map(|d| d.message.clone()).collect(); + assert!(messages.iter().any(|m| m.contains("No symbol named 'foo'"))); + assert!(messages.iter().any(|m| m.contains("No symbol named 'bar'"))); + assert!(messages.iter().any(|m| m.contains("No symbol named 'baz'"))); + assert!(messages.iter().any(|m| m.contains("No symbol named 'baz'"))); + assert_eq!(messages.len(), 4); + }); + } + + #[test] + fn test_library_static_exports_require() { + r_task(|| { + // `pkg` exports `foo` and `bar` + let namespace = Namespace { + exports: vec!["foo".to_string(), "bar".to_string()], + imports: vec![], + bulk_imports: vec![], + }; + let description = Description { + name: "pkg".to_string(), + version: "1.0.0".to_string(), + depends: vec![], + fields: Dcf::new(), + }; + let package = Package { + path: PathBuf::from("/mock/path"), + description, + namespace, + }; + + let library = Library::new(vec![]).insert("pkg", package); + + let console_scopes = vec![vec!["require".to_string()]]; + let state = WorldState { + library, + console_scopes, + ..Default::default() + }; + + let code = " + foo() + require(pkg) + bar + foo() + "; + let document = Document::new(code, None); + let diagnostics = generate_diagnostics(document, state.clone()); + assert!(diagnostics + .iter() + .any(|d| d.message.contains("No symbol named 'foo'"))); + assert_eq!(diagnostics.len(), 1); + }); + } } diff --git a/crates/ark/src/lsp/diagnostics_syntax.rs b/crates/ark/src/lsp/diagnostics_syntax.rs index 508bf8f96..aa9bef605 100644 --- a/crates/ark/src/lsp/diagnostics_syntax.rs +++ b/crates/ark/src/lsp/diagnostics_syntax.rs @@ -309,10 +309,12 @@ mod tests { use crate::lsp::diagnostics::DiagnosticContext; use crate::lsp::diagnostics_syntax::syntax_diagnostics; use crate::lsp::documents::Document; + use crate::lsp::inputs::library::Library; fn text_diagnostics(text: &str) -> Vec { let document = Document::new(text, None); - let context = DiagnosticContext::new(&document.contents); + let library = Library::default(); + let context = DiagnosticContext::new(&document.contents, &library); let diagnostics = syntax_diagnostics(document.ast.root_node(), &context).unwrap(); diagnostics } diff --git a/crates/ark/src/lsp/inputs/library.rs b/crates/ark/src/lsp/inputs/library.rs new file mode 100644 index 000000000..40d600477 --- /dev/null +++ b/crates/ark/src/lsp/inputs/library.rs @@ -0,0 +1,148 @@ +// +// library.rs +// +// Copyright (C) 2025 by Posit Software, PBC +// + +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::RwLock; + +use super::package::Package; +use crate::lsp; + +/// Lazily manages a list of known R packages by name +#[derive(Default, Clone, Debug)] +pub struct Library { + /// Paths to library directories, i.e. what `base::libPaths()` returns. + pub library_paths: Arc>, + + packages: Arc>>>>, +} + +impl Library { + pub fn new(library_paths: Vec) -> Self { + Self { + packages: Arc::new(RwLock::new(HashMap::new())), + library_paths: Arc::new(library_paths), + } + } + + /// Get a package by name, loading and caching it if necessary. + /// Returns `None` if the package can't be found or loaded. + pub fn get(&self, name: &str) -> Option> { + // Try to get from cache first (could be `None` if we already tried to + // load a non-existent or broken package) + if let Some(entry) = self.packages.read().unwrap().get(name) { + return entry.clone(); + } + + // Not cached, try to load + let pkg = match self.load_package(name) { + Ok(Some(pkg)) => Some(Arc::new(pkg)), + Ok(None) => None, + Err(err) => { + lsp::log_error!("Can't load R package: {err:?}"); + None + }, + }; + + self.packages + .write() + .unwrap() + .insert(name.to_string(), pkg.clone()); + + pkg + } + + /// Insert a package in the library for testing purposes. + #[cfg(test)] + pub fn insert(self, name: &str, package: Package) -> Self { + self.packages + .write() + .unwrap() + .insert(name.to_string(), Some(Arc::new(package))); + self + } + + fn load_package(&self, name: &str) -> anyhow::Result> { + for lib_path in self.library_paths.iter() { + match Package::load(&lib_path, name) { + Ok(Some(pkg)) => return Ok(Some(pkg)), + Ok(None) => (), + Err(err) => lsp::log_warn!("Can't load package: {err:?}"), + } + } + + Ok(None) + } +} + +#[cfg(test)] +mod tests { + use std::fs::File; + use std::fs::{self}; + use std::io::Write; + + use tempfile::TempDir; + + use super::*; + + // Helper to create a temporary package directory with DESCRIPTION and NAMESPACE + fn create_temp_package( + pkg_name: &str, + description: &str, + namespace: &str, + ) -> (TempDir, PathBuf) { + let temp_dir = TempDir::new().unwrap(); + let pkg_dir = temp_dir.path().join(pkg_name); + fs::create_dir(&pkg_dir).unwrap(); + + let desc_path = pkg_dir.join("DESCRIPTION"); + let mut desc_file = File::create(&desc_path).unwrap(); + desc_file.write_all(description.as_bytes()).unwrap(); + + let ns_path = pkg_dir.join("NAMESPACE"); + let mut ns_file = File::create(&ns_path).unwrap(); + ns_file.write_all(namespace.as_bytes()).unwrap(); + + (temp_dir, pkg_dir) + } + + #[test] + fn test_load_and_cache_package() { + let pkg_name = "mypkg"; + let description = r#" +Package: mypkg +Version: 1.0 + "#; + let namespace = r#" +export(foo) +export(bar) +importFrom(pkg, baz) + "#; + + let (temp_dir, _pkg_dir) = create_temp_package(pkg_name, description, namespace); + + // Library should point to the temp_dir as its only library path + let lib = Library::new(vec![temp_dir.path().to_path_buf()]); + + // First access loads from disk + let pkg = lib.get(pkg_name).unwrap(); + assert_eq!(pkg.description.name, "mypkg"); + + // Second access uses cache (note that we aren't testing that we are + // indeed caching, just exercising the cache code path) + assert!(lib.get(pkg_name).is_some()); + + // Negative cache: missing package + assert!(lib.get("notapkg").is_none()); + // Now cached as absent + assert!(lib.get("notapkg").is_none()); + + // Namespace is parsed + assert_eq!(pkg.namespace.exports, vec!["bar", "foo"]); + assert_eq!(pkg.namespace.imports, vec!["baz"]); + } +} diff --git a/crates/ark/src/lsp/inputs/mod.rs b/crates/ark/src/lsp/inputs/mod.rs new file mode 100644 index 000000000..cfe7854fa --- /dev/null +++ b/crates/ark/src/lsp/inputs/mod.rs @@ -0,0 +1,12 @@ +// +// mod.rs +// +// Copyright (C) 2025 by Posit Software, PBC +// +// + +pub mod library; +pub mod package; +pub mod package_description; +pub mod package_namespace; +pub mod source_root; diff --git a/crates/ark/src/lsp/inputs/package.rs b/crates/ark/src/lsp/inputs/package.rs new file mode 100644 index 000000000..5dd9c2d69 --- /dev/null +++ b/crates/ark/src/lsp/inputs/package.rs @@ -0,0 +1,59 @@ +// +// package.rs +// +// Copyright (C) 2025 by Posit Software, PBC +// +// + +use std::fs; +use std::path::PathBuf; + +use crate::lsp::inputs::package_description::Description; +use crate::lsp::inputs::package_namespace::Namespace; + +/// Represents an R package and its metadata relevant for static analysis. +#[derive(Clone, Debug)] +pub struct Package { + /// Path to the directory that contains `DESCRIPTION`. Could be an installed + /// package, or a package source. + pub path: PathBuf, + + pub description: Description, + pub namespace: Namespace, +} + +impl Package { + /// Attempts to load a package from the given path and name. + pub fn load(lib_path: &std::path::Path, name: &str) -> anyhow::Result> { + let package_path = lib_path.join(name); + + let description_path = package_path.join("DESCRIPTION"); + let namespace_path = package_path.join("NAMESPACE"); + + // Only consider libraries that have a folder named after the + // requested package and that contains a description file + if !description_path.is_file() { + return Ok(None); + } + + // This fails if there is no `Package` field, so we're never loading + // folders like bookdown projects as package + let description_contents = fs::read_to_string(&description_path)?; + let description = Description::parse(&description_contents)?; + + if description.name != name { + return Err(anyhow::anyhow!( + "`Package` field in `DESCRIPTION` doesn't match folder name '{name}'" + )); + } + + let namespace_contents = fs::read_to_string(&namespace_path)?; + let namespace = Namespace::parse(&namespace_contents)?; + + Ok(Some(Package { + path: package_path.to_path_buf(), + description, + namespace, + })) + } +} diff --git a/crates/ark/src/lsp/inputs/package_description.rs b/crates/ark/src/lsp/inputs/package_description.rs new file mode 100644 index 000000000..1d42f223f --- /dev/null +++ b/crates/ark/src/lsp/inputs/package_description.rs @@ -0,0 +1,236 @@ +// +// package_description.rs +// +// Copyright (C) 2025 by Posit Software, PBC +// + +use std::collections::HashMap; + +use anyhow; + +/// Parsed DCF file (Debian Control File, e.g. DESCRIPTION). Simple wrapper +/// around the map of fields whose `get()` method returns a `&str` that's easier +/// to work with. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct Dcf { + pub fields: HashMap, +} + +impl Dcf { + pub fn new() -> Self { + Dcf { + fields: HashMap::new(), + } + } + + pub fn parse(input: &str) -> Self { + Dcf { + fields: parse_dcf(input), + } + } + + /// Get a field value by key + pub fn get(&self, key: &str) -> Option<&str> { + self.fields.get(key).map(|s| s.as_str()) + } +} + +/// Parsed DESCRIPTION file +#[derive(Clone, Debug)] +pub struct Description { + pub name: String, + pub version: String, + + /// `Depends` field. Currently doesn't contain versions. + pub depends: Vec, + + /// Raw DCF fields + pub fields: Dcf, +} + +impl Description { + /// Parse a DESCRIPTION file in DCF format + pub fn parse(contents: &str) -> anyhow::Result { + let fields = Dcf::parse(contents); + + let name = fields + .get("Package") + .ok_or_else(|| anyhow::anyhow!("Missing Package field in DESCRIPTION"))? + .to_string(); + + let version = fields + .get("Version") + .ok_or_else(|| anyhow::anyhow!("Missing Version field in DESCRIPTION"))? + .to_string(); + + let depends = fields + .get("Depends") + .and_then(|deps| { + let mut pkgs = parse_comma_separated(deps); + + // Remove dependency on R. In the future we will record it to a field with + // the minimum version the package depends on. + pkgs.retain(|pkg| pkg != "R"); + + Some(pkgs) + }) + .unwrap_or_default(); + + Ok(Description { + name, + version, + depends, + fields, + }) + } +} + +/// Parse a DCF (Debian Control File) format string into a key-value map. +/// https://www.debian.org/doc/debian-policy/ch-controlfields.html +fn parse_dcf(input: &str) -> std::collections::HashMap { + use std::collections::HashMap; + + let mut fields = HashMap::new(); + let mut current_key: Option = None; + let mut current_value = String::new(); + + for line in input.lines() { + // Indented line: This is a continuation, even if empty + if line.starts_with(char::is_whitespace) { + current_value.push_str(line); + current_value.push('\n'); + continue; + } + + // Non-whitespace at start and contains a colon: This is a new field + if !line.is_empty() && line.contains(':') { + // Save previous field + if let Some(key) = current_key.take() { + fields.insert(key, current_value.trim_end().to_string()); + } + + let idx = line.find(':').unwrap(); + let key = line[..idx].trim().to_string(); + let value = line[idx + 1..].trim_start(); + + current_key = Some(key); + + current_value.clear(); + current_value.push_str(value); + current_value.push('\n'); + + continue; + } + } + + // Finish last field + if let Some(key) = current_key { + fields.insert(key, current_value.trim_end().to_string()); + } + + fields +} + +/// Parse a comma-separated list of package dependencies +fn parse_comma_separated(input: &str) -> Vec { + input + .split(',') + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| { + // Remove version constraints like "R (>= 3.5.0)" + if let Some(idx) = s.find('(') { + s[..idx].trim().to_string() + } else { + s.to_string() + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_description_basic() { + let desc = r#"Package: mypackage +Version: 1.0.0 +Title: My Package +Description: A simple package for testing."#; + let parsed = Description::parse(desc).unwrap(); + assert_eq!(parsed.name, "mypackage"); + assert_eq!(parsed.version, "1.0.0"); + assert!(parsed.depends.is_empty()); + } + + #[test] + fn parses_description_with_depends() { + let desc = r#"Package: mypackage +Version: 1.0.0 +Depends: R (>= 3.5.0), utils, stats +Title: My Package"#; + let parsed = Description::parse(desc).unwrap(); + assert_eq!(parsed.name, "mypackage"); + assert_eq!(parsed.version, "1.0.0"); + assert_eq!(parsed.depends, vec!["utils", "stats"]); + } + + #[test] + fn parses_description_with_multiline_field() { + let desc = r#"Package: mypackage +Version: 1.0.0 +Description: This is a long description + that spans multiple lines + and should be preserved correctly."#; + let parsed = Description::parse(desc).unwrap(); + assert_eq!(parsed.name, "mypackage"); + assert_eq!(parsed.version, "1.0.0"); + } + + #[test] + fn parses_dcf_basic() { + let dcf = r#"Package: mypackage +Version: 1.0.0 +Title: My Package +Description: A simple package for testing."#; + let parsed = Dcf::parse(dcf); + assert_eq!(parsed.get("Package"), Some("mypackage")); + assert_eq!(parsed.get("Version"), Some("1.0.0")); + assert_eq!(parsed.get("Title"), Some("My Package")); + assert_eq!( + parsed.get("Description"), + Some("A simple package for testing.") + ); + } + + #[test] + fn parses_dcf_multiline_field() { + let dcf = r#"Package: mypackage +Version: 1.0.0 +Description: This is a long description + that spans multiple lines + and should be preserved correctly."#; + let parsed = Dcf::parse(dcf); + assert_eq!( + parsed.get("Description"), + Some("This is a long description\n that spans multiple lines\n and should be preserved correctly.") + ); + } + + // Empty lines are ignored in DCF files. They are supported via a dot + // notation (` .` represents an empty line) bug we don't support that. + #[test] + fn parses_dcf_empty_continuation_line() { + let dcf = r#"Package: mypackage +Description: First line + second line + + third line"#; + let parsed = Dcf::parse(dcf); + assert_eq!( + parsed.get("Description"), + Some("First line\n second line\n third line") + ); + } +} diff --git a/crates/ark/src/lsp/inputs/package_namespace.rs b/crates/ark/src/lsp/inputs/package_namespace.rs new file mode 100644 index 000000000..64b7e8e5d --- /dev/null +++ b/crates/ark/src/lsp/inputs/package_namespace.rs @@ -0,0 +1,159 @@ +// +// package_namespace.rs +// +// Copyright (C) 2025 by Posit Software, PBC +// + +use tree_sitter::Parser; + +use crate::treesitter::TSQuery; + +/// Parsed NAMESPACE file +#[derive(Clone, Debug)] +pub struct Namespace { + /// Names of objects exported with `export()` + pub exports: Vec, + /// Names of objects imported with `importFrom()` + pub imports: Vec, + /// Names of packages bulk-imported with `import()` + pub bulk_imports: Vec, +} + +impl Namespace { + /// Parse a NAMESPACE file using tree-sitter to extract exports and imports. + pub fn parse(contents: &str) -> anyhow::Result { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_r::LANGUAGE.into()) + .map_err(|err| anyhow::anyhow!("Failed to set tree-sitter language: {err:?}"))?; + + let tree = parser + .parse(contents, None) + .ok_or_else(|| anyhow::anyhow!("Failed to parse NAMESPACE file"))?; + let root_node = tree.root_node(); + + let query_str = r#" + (call + function: (identifier) @fn_name + arguments: (arguments (argument value: (identifier) @exported)) + (#eq? @fn_name "export") + ) + (call + function: (identifier) @fn_name + arguments: (arguments (argument value: (identifier) @pkg) (argument value: (identifier) @imported)) + (#eq? @fn_name "importFrom") + ) + (call + function: (identifier) @fn_name + arguments: (arguments (argument value: (identifier) @bulk_imported)) + (#eq? @fn_name "import") + ) + "#; + let mut ts_query = TSQuery::new(query_str)?; + + let mut exports = Vec::new(); + for capture in ts_query.captures_for(root_node, "exported", contents.as_bytes()) { + let symbol = capture + .utf8_text(contents.as_bytes()) + .unwrap_or("") + .to_string(); + exports.push(symbol); + } + + let mut imports = Vec::new(); + for capture in ts_query.captures_for(root_node, "imported", contents.as_bytes()) { + let symbol = capture + .utf8_text(contents.as_bytes()) + .unwrap_or("") + .to_string(); + imports.push(symbol); + } + + let mut bulk_imports = Vec::new(); + for capture in ts_query.captures_for(root_node, "bulk_imported", contents.as_bytes()) { + let symbol = capture + .utf8_text(contents.as_bytes()) + .unwrap_or("") + .to_string(); + bulk_imports.push(symbol); + } + + // Take unique values of imports and exports. In the future we'll lint + // this but for now just be defensive. + exports.sort(); + exports.dedup(); + imports.sort(); + imports.dedup(); + bulk_imports.sort(); + bulk_imports.dedup(); + + Ok(Namespace { + imports, + exports, + bulk_imports, + }) + } + + /// TODO: Take a `Library` and incorporate bulk imports + pub(crate) fn _resolve_imports(&self) -> &Vec { + &self.imports + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_exports() { + let ns = r#" + export(foo) + export(bar) + exports(baz) # typo + "#; + let parsed = Namespace::parse(ns).unwrap(); + assert_eq!(parsed.exports, vec!["bar", "foo"]); + assert!(parsed.imports.is_empty()); + } + + #[test] + fn parses_importfrom() { + let ns = r#" + importFrom(stats, median) + importFrom(utils, head) + importsFrom(utils, tail) # typo + "#; + let parsed = Namespace::parse(ns).unwrap(); + assert_eq!(parsed.imports, vec!["head", "median"]); + assert!(parsed.exports.is_empty()); + } + + #[test] + fn parses_mixed_namespace_with_duplicates() { + let ns = r#" + export(foo) + importFrom(stats, median) + export(bar) + importFrom(utils, head) + importFrom(utils, median) + "#; + let parsed = Namespace::parse(ns).unwrap(); + assert_eq!(parsed.exports, vec!["bar", "foo"]); + assert_eq!(parsed.imports, vec!["head", "median"]); + } + + #[test] + fn parses_bulk_imports() { + let ns = r#" + import(rlang) + import(utils) + export(foo) + import(utils) + importFrom(stats, median) + "#; + let parsed = Namespace::parse(ns).unwrap(); + assert_eq!(parsed.bulk_imports, vec!["rlang", "utils"]); + assert_eq!(parsed.exports, vec!["foo"]); + assert_eq!(parsed.imports, vec!["median"]); + } +} diff --git a/crates/ark/src/lsp/inputs/source_root.rs b/crates/ark/src/lsp/inputs/source_root.rs new file mode 100644 index 000000000..f3805c9db --- /dev/null +++ b/crates/ark/src/lsp/inputs/source_root.rs @@ -0,0 +1,16 @@ +// +// source_root.rs +// +// Copyright (C) 2025 by Posit Software, PBC +// +// + +use super::package::Package; + +/// The root of a source tree. +/// Currently only supports packages, but can be extended to scripts. +#[derive(Clone, Debug)] +pub enum SourceRoot { + Package(Package), + // Scripts(Vec