diff --git a/crates/ark/src/lsp/goto_definition.rs b/crates/ark/src/lsp/goto_definition.rs index 9ed8b2331..382c9dea5 100644 --- a/crates/ark/src/lsp/goto_definition.rs +++ b/crates/ark/src/lsp/goto_definition.rs @@ -23,16 +23,10 @@ pub(crate) fn goto_definition( document.position_encoding, )?; - let index = document.semantic_index(); + let (index, file_scope) = state.file_analysis(&uri, document); let root = document.syntax(); - let targets = oak_ide::goto_definition( - offset, - &uri, - &root, - &index, - &state.file_scope(&uri), - &state.library, - ); + let targets = + oak_ide::goto_definition(offset, &uri, &root, &index, &file_scope, &state.library); if targets.is_empty() { return Ok(None); @@ -504,4 +498,659 @@ mod tests { // `is.null` is missing from the INDEX-based export list. assert_eq!(result, None); } + + // --- source() directive in scripts --- + + #[test] + fn test_script_source_resolves_from_workspace_root() { + // source() paths are resolved relative to the workspace root, + // not the sourcing file's directory. Here script.R is in a + // subdirectory but sources "helpers.R" which lives at the root. + let dir = tempfile::tempdir().unwrap(); + let subdir = dir.path().join("subdir"); + std::fs::create_dir(&subdir).unwrap(); + + std::fs::write(dir.path().join("helpers.R"), "helper <- function() 1\n").unwrap(); + + let script_doc = Document::new("source(\"helpers.R\")\nhelper\n", None); + let script_uri = lsp_types::Url::from_file_path(subdir.join("script.R")).unwrap(); + + let helpers_uri = lsp_types::Url::from_file_path(dir.path().join("helpers.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + state.workspace.folders = vec![lsp_types::Url::from_directory_path(dir.path()).unwrap()]; + + let params = make_params(script_uri, 1, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + } + ); + } + + #[test] + fn test_script_source_directive_resolves() { + // script.R has `source("helpers.R")` then uses `helper`. + // WorldState::file_analysis() should resolve the source() directive + // and make helpers.R's exports visible via the search path. + let script_dir = std::env::temp_dir().join("test_script_source"); + + let helpers_doc = Document::new("helper <- function() 1\n", None); + let helpers_uri = lsp_types::Url::from_file_path(script_dir.join("helpers.R")).unwrap(); + + let script_doc = Document::new("source(\"helpers.R\")\nhelper\n", None); + let script_uri = lsp_types::Url::from_file_path(script_dir.join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state.documents.insert(helpers_uri.clone(), helpers_doc); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + // Cursor on `helper` (line 1, col 0) + let params = make_params(script_uri, 1, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + assert_eq!( + links[0].target_range, + lsp_types::Range { + start: lsp_types::Position::new(0, 0), + end: lsp_types::Position::new(0, 6), + } + ); + } + ); + } + + #[test] + fn test_script_source_directive_resolves_nested_library() { + // helpers.R has `library(dplyr)` and defines `helper`. + // script.R sources helpers.R then uses `mutate` (from dplyr). + // The nested library() directive should be visible through + // the source() resolution. + let Some(library) = r_library() else { + eprintln!("skipping: R not found"); + return; + }; + + let script_dir = std::env::temp_dir().join("test_script_source_nested"); + + let helpers_doc = Document::new("library(dplyr)\nhelper <- function() 1\n", None); + let helpers_uri = lsp_types::Url::from_file_path(script_dir.join("helpers.R")).unwrap(); + + let script_doc = Document::new("source(\"helpers.R\")\nmutate\nhelper\n", None); + let script_uri = lsp_types::Url::from_file_path(script_dir.join("script.R")).unwrap(); + + let mut state = make_state(&script_uri, &script_doc); + state.library = library; + state.documents.insert(helpers_uri.clone(), helpers_doc); + + // `mutate` (line 1) resolves via dplyr, attached by helpers.R's library() call. + // Package symbol, no NavigationTarget. + let params = make_params(script_uri.clone(), 1, 0); + let result = goto_definition(&script_doc, params, &state).unwrap(); + assert_eq!(result, None); + + // `helper` (line 2) resolves to helpers.R's definition + let params = make_params(script_uri, 2, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + assert_eq!( + links[0].target_range, + lsp_types::Range { + start: lsp_types::Position::new(1, 0), + end: lsp_types::Position::new(1, 6), + } + ); + } + ); + } + + #[test] + fn test_script_source_resolves_from_disk() { + // helpers.R exists on disk but is NOT in state.documents. + // script.R sources it. The resolver should read from disk. + let script_dir = tempfile::tempdir().unwrap(); + + let helpers_path = script_dir.path().join("helpers.R"); + std::fs::write(&helpers_path, "helper <- function() 1\n").unwrap(); + let helpers_uri = lsp_types::Url::from_file_path(&helpers_path).unwrap(); + + let script_doc = Document::new("source(\"helpers.R\")\nhelper\n", None); + let script_uri = + lsp_types::Url::from_file_path(script_dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + // helpers.R is intentionally NOT inserted into state.documents + + let params = make_params(script_uri, 1, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + assert_eq!( + links[0].target_range, + lsp_types::Range { + start: lsp_types::Position::new(0, 0), + end: lsp_types::Position::new(0, 6), + } + ); + } + ); + } + + #[test] + fn test_script_file_scope_from_disk() { + // The script itself is on disk, not in state.documents. + // file_scope should still read it and resolve its directives. + let script_dir = tempfile::tempdir().unwrap(); + + let helpers_path = script_dir.path().join("helpers.R"); + std::fs::write(&helpers_path, "helper <- function() 1\n").unwrap(); + + let script_path = script_dir.path().join("script.R"); + std::fs::write(&script_path, "source(\"helpers.R\")\nhelper\n").unwrap(); + let script_uri = lsp_types::Url::from_file_path(&script_path).unwrap(); + + let script_doc = Document::new("source(\"helpers.R\")\nhelper\n", None); + + // Neither file is in state.documents + let state = WorldState::default(); + + let params = make_params(script_uri, 1, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!( + links[0].target_range, + lsp_types::Range { + start: lsp_types::Position::new(0, 0), + end: lsp_types::Position::new(0, 6), + } + ); + } + ); + } + + #[test] + fn test_script_source_transitive() { + // script.R sources a.R, a.R sources b.R. + // script.R should see b.R's exports transitively. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write( + dir.path().join("b.R"), + "library(dplyr)\nfrom_b <- function() 1\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("a.R"), + "source(\"b.R\")\nfrom_a <- function() 2\n", + ) + .unwrap(); + + let script_doc = Document::new("source(\"a.R\")\nfrom_a\nfrom_b\nmutate\n", None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let Some(library) = r_library() else { + eprintln!("skipping: R not found"); + return; + }; + + let mut state = make_state(&script_uri, &script_doc); + state.library = library; + + // `from_a` (line 1) — defined in a.R + let a_uri = lsp_types::Url::from_file_path(dir.path().join("a.R")).unwrap(); + let params = make_params(script_uri.clone(), 1, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, a_uri); + } + ); + + // `from_b` (line 2) — defined in b.R, reachable transitively + let b_uri = lsp_types::Url::from_file_path(dir.path().join("b.R")).unwrap(); + let params = make_params(script_uri.clone(), 2, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, b_uri); + } + ); + + // `mutate` (line 3) — from dplyr, attached by b.R's library() call. + // Package symbol, no NavigationTarget. + let params = make_params(script_uri, 3, 0); + let result = goto_definition(&script_doc, params, &state).unwrap(); + assert_eq!(result, None); + } + + #[test] + fn test_script_source_cycle_does_not_hang() { + // a.R sources b.R, b.R sources a.R. Should not recurse infinitely. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("a.R"), "source(\"b.R\")\nfrom_a <- 1\n").unwrap(); + std::fs::write(dir.path().join("b.R"), "source(\"a.R\")\nfrom_b <- 2\n").unwrap(); + + let script_doc = Document::new("source(\"a.R\")\nfrom_a\nfrom_b\n", None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + // Should resolve without hanging. Both symbols are reachable + // because a.R is visited first (gets its exports + b.R's exports), + // and b.R's attempt to re-source a.R is a no-op due to cycle detection. + let a_uri = lsp_types::Url::from_file_path(dir.path().join("a.R")).unwrap(); + let params = make_params(script_uri.clone(), 1, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, a_uri); + } + ); + + let b_uri = lsp_types::Url::from_file_path(dir.path().join("b.R")).unwrap(); + let params = make_params(script_uri, 2, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, b_uri); + } + ); + } + + #[test] + fn test_script_source_in_function_scoping() { + // `source(local = FALSE)` inside a function scopes directives to the + // function scope, so definitions are NOT visible at file scope. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("helpers.R"), "helper <- function() 1\n").unwrap(); + + // Line 0: "f <- function() {\n" + // Line 1: " source(\"helpers.R\")\n" + // Line 2: " helper\n" <- inside f, after source() + // Line 3: " function() helper\n" <- nested scope inside f + // Line 4: "}\n" + // Line 5: "helper\n" <- outside f + let script_source = + "f <- function() {\n source(\"helpers.R\")\n helper\n function() helper\n}\nhelper\n"; + let script_doc = Document::new(script_source, None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + let helpers_uri = lsp_types::Url::from_file_path(dir.path().join("helpers.R")).unwrap(); + + // `helper` on line 2 (inside f, after source()) — should resolve + let params = make_params(script_uri.clone(), 2, 2); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + } + ); + + // `helper` on line 3 (nested function inside f) — should resolve + let params = make_params(script_uri.clone(), 3, 14); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + } + ); + + // `helper` on line 5 (outside f) — NOT visible + let params = make_params(script_uri, 5, 0); + let result = goto_definition(&script_doc, params, &state).unwrap(); + assert_eq!(result, None); + } + + #[test] + fn test_script_source_does_not_shadow_local_def() { + // A local definition should take precedence over a sourced one. + // `source()` at file scope defines `foo`, but a subsequent local + // `foo <- "local"` should shadow it. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("helpers.R"), "foo <- function() 1\n").unwrap(); + + // Line 0: "source(\"helpers.R\")\n" + // Line 1: "foo <- \"local\"\n" + // Line 2: "foo\n" + let script_source = "source(\"helpers.R\")\nfoo <- \"local\"\nfoo\n"; + let script_doc = Document::new(script_source, None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + // `foo` on line 2 should resolve to the LOCAL definition on line 1, + // not to the sourced one from helpers.R. + let params = make_params(script_uri.clone(), 2, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, script_uri); + assert_eq!( + links[0].target_range, + lsp_types::Range { + start: lsp_types::Position::new(1, 0), + end: lsp_types::Position::new(1, 3), + } + ); + } + ); + } + + #[test] + fn test_script_source_diamond_dependency() { + // Diamond: a.R and b.R both source helpers.R. + // script.R sources both a.R and b.R. + // `helper` (from helpers.R) should be visible — the grey-set + // cycle detection must not prevent re-resolving a shared dependency. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("helpers.R"), "helper <- function() 1\n").unwrap(); + std::fs::write( + dir.path().join("a.R"), + "source(\"helpers.R\")\nfrom_a <- 1\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("b.R"), + "source(\"helpers.R\")\nfrom_b <- 2\n", + ) + .unwrap(); + + let script_doc = Document::new( + "source(\"a.R\")\nsource(\"b.R\")\nhelper\nfrom_a\nfrom_b\n", + None, + ); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + let helpers_uri = lsp_types::Url::from_file_path(dir.path().join("helpers.R")).unwrap(); + let a_uri = lsp_types::Url::from_file_path(dir.path().join("a.R")).unwrap(); + let b_uri = lsp_types::Url::from_file_path(dir.path().join("b.R")).unwrap(); + + // `helper` (line 2) — from helpers.R, reachable through both a.R and b.R + let params = make_params(script_uri.clone(), 2, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + } + ); + + // `from_a` (line 3) — from a.R + let params = make_params(script_uri.clone(), 3, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, a_uri); + } + ); + + // `from_b` (line 4) — from b.R + let params = make_params(script_uri, 4, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, b_uri); + } + ); + } + + #[test] + fn test_script_source_self_reference() { + // script.R sources itself. The grey-set pre-seeds the current file + // so the self-reference is a no-op and doesn't create duplicate + // definitions. + let dir = tempfile::tempdir().unwrap(); + + let script_path = dir.path().join("script.R"); + std::fs::write(&script_path, "source(\"script.R\")\nmy_var <- 1\nmy_var\n").unwrap(); + let script_uri = lsp_types::Url::from_file_path(&script_path).unwrap(); + + let script_doc = Document::new("source(\"script.R\")\nmy_var <- 1\nmy_var\n", None); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + // `my_var` (line 2) should resolve to its own definition on line 1, + // not to a Sourced duplicate. + let params = make_params(script_uri.clone(), 2, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, script_uri); + assert_eq!( + links[0].target_range, + lsp_types::Range { + start: lsp_types::Position::new(1, 0), + end: lsp_types::Position::new(1, 6), + } + ); + } + ); + } + + #[test] + fn test_script_source_in_function_packages_scoped() { + // `source(local = FALSE)` inside a function scopes package directives + // to the function scope, so they are NOT visible at file scope. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write( + dir.path().join("helpers.R"), + "library(dplyr)\nhelper <- function() 1\n", + ) + .unwrap(); + + // "mutate\n" offset 0 + // "f <- function() {\n" offset 7 + // " source(\"helpers.R\")\n" offset 25 + // " mutate\n" offset 46 + // "}\n" offset 55 + // "mutate\n" offset 57 + let script_source = + "mutate\nf <- function() {\n source(\"helpers.R\")\n mutate\n}\nmutate\n"; + let script_doc = Document::new(script_source, None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + let (index, file_scope) = state.file_analysis(&script_uri, &script_doc); + + let has_dplyr = |layers: &[oak_index::external::BindingSource]| -> bool { + layers.iter().any(|l| matches!(l, oak_index::external::BindingSource::PackageExports(pkg) if pkg == "dplyr")) + }; + + // Before f (offset 0, on "mutate"): dplyr is NOT visible because the + // directive's offset is the `source()` call site inside f. + let before_offset = biome_rowan::TextSize::from(0); + let before_chain = file_scope.at(&index, before_offset); + assert!(!has_dplyr(&before_chain)); + + // Inside f (offset 48, on "mutate"): dplyr should be in the scope chain + let inner_offset = biome_rowan::TextSize::from(48); + let inner_chain = file_scope.at(&index, inner_offset); + assert!(has_dplyr(&inner_chain)); + + // After f (offset 57, on "mutate"): dplyr is NOT visible + let outer_offset = biome_rowan::TextSize::from(57); + let outer_chain = file_scope.at(&index, outer_offset); + assert!(!has_dplyr(&outer_chain)); + } + + #[test] + fn test_script_source_later_shadows_earlier() { + // When two sourced files define the same name, the later + // source() call shadows the earlier one. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("a.R"), "foo <- 1\n").unwrap(); + std::fs::write(dir.path().join("b.R"), "foo <- 2\n").unwrap(); + + let script_doc = Document::new("source(\"a.R\")\nsource(\"b.R\")\nfoo\n", None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + let b_uri = lsp_types::Url::from_file_path(dir.path().join("b.R")).unwrap(); + + // `foo` (line 2) should resolve to b.R (later source shadows earlier) + let params = make_params(script_uri, 2, 0); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, b_uri); + } + ); + } + + #[test] + fn test_script_source_local_true_in_function_scoping() { + // `source(local = TRUE)` injects definitions into the function + // scope, so `helper` is visible inside f but not outside. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("helpers.R"), "helper <- function() 1\n").unwrap(); + + let script_source = + "f <- function() {\n source(\"helpers.R\", local = TRUE)\n helper\n}\nhelper\n"; + let script_doc = Document::new(script_source, None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + let helpers_uri = lsp_types::Url::from_file_path(dir.path().join("helpers.R")).unwrap(); + + // `helper` on line 2 (inside f, after source(local = TRUE)) — resolves + let params = make_params(script_uri.clone(), 2, 2); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + } + ); + + // `helper` on line 4 (outside f) — does NOT resolve + let params = make_params(script_uri, 4, 0); + let result = goto_definition(&script_doc, params, &state).unwrap(); + assert_eq!(result, None); + } + + #[test] + fn test_script_source_local_true_shadows_local_def() { + // `source(local = TRUE)` injects into the use-def map and + // shadows a prior local binding. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("helpers.R"), "foo <- function() 1\n").unwrap(); + + // Line 0: "f <- function() {\n" + // Line 1: " foo <- \"local\"\n" + // Line 2: " source(\"helpers.R\", local = TRUE)\n" + // Line 3: " foo\n" + // Line 4: "}\n" + let script_source = + "f <- function() {\n foo <- \"local\"\n source(\"helpers.R\", local = TRUE)\n foo\n}\n"; + let script_doc = Document::new(script_source, None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + let helpers_uri = lsp_types::Url::from_file_path(dir.path().join("helpers.R")).unwrap(); + + // `foo` on line 3 resolves to helpers.R (sourced def shadows local) + let params = make_params(script_uri, 3, 2); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, helpers_uri); + } + ); + } + + #[test] + fn test_script_source_local_false_does_not_shadow_local_def() { + // `source()` (default `local = FALSE`) in a function scope does + // not shadow a prior local binding. + let dir = tempfile::tempdir().unwrap(); + + std::fs::write(dir.path().join("helpers.R"), "foo <- function() 1\n").unwrap(); + + // Line 0: "f <- function() {\n" + // Line 1: " foo <- \"local\"\n" + // Line 2: " source(\"helpers.R\")\n" + // Line 3: " foo\n" + // Line 4: "}\n" + let script_source = + "f <- function() {\n foo <- \"local\"\n source(\"helpers.R\")\n foo\n}\n"; + let script_doc = Document::new(script_source, None); + let script_uri = lsp_types::Url::from_file_path(dir.path().join("script.R")).unwrap(); + + let mut state = WorldState::default(); + state + .documents + .insert(script_uri.clone(), script_doc.clone()); + + // `foo` on line 3 resolves to the local definition on line 1 + let params = make_params(script_uri.clone(), 3, 2); + assert_matches!( + goto_definition(&script_doc, params, &state).unwrap(), + Some(GotoDefinitionResponse::Link(ref links)) => { + assert_eq!(links[0].target_uri, script_uri); + assert_eq!( + links[0].target_range, + lsp_types::Range { + start: lsp_types::Position::new(1, 2), + end: lsp_types::Position::new(1, 5), + } + ); + } + ); + } } diff --git a/crates/ark/src/lsp/state.rs b/crates/ark/src/lsp/state.rs index 83b972c6d..e47d2eae0 100644 --- a/crates/ark/src/lsp/state.rs +++ b/crates/ark/src/lsp/state.rs @@ -3,11 +3,16 @@ use std::collections::HashSet; use std::path::Path; use anyhow::anyhow; +use biome_rowan::TextRange; use oak_core::file::list_r_files; use oak_ide::FileScope; +use oak_index::external::directive_layers; use oak_index::external::file_layers; use oak_index::external::package_root_layers; use oak_index::external::BindingSource; +use oak_index::semantic_index::SemanticIndex; +use oak_index::semantic_index_with_source_resolver; +use oak_index::SourceResolution; use oak_package::collation::collation_order; use oak_package::library::Library; use stdext::result::ResultExt; @@ -89,15 +94,39 @@ impl WorldState { } } - /// Create a scope chain for a particular file, taking into account the - /// current project type. For packages, this creates a scope containing - /// imports and top-level definitions in other files, respecting the - /// collation order. - pub(crate) fn file_scope(&self, file: &Url) -> FileScope { - let Some(SourceRoot::Package(ref pkg)) = self.root else { - return FileScope::search_path(default_search_path()); - }; + /// Look up a document by URL: returns an open document if available, + /// otherwise reads from disk. + /// + /// TODO: Replace with a proper VFS so non-opened workspace documents + /// are cached rather than re-read on every query. + fn workspace_document(&self, uri: &Url) -> Option { + if let Some(doc) = self.documents.get(uri) { + return Some(doc.clone()); + } + let path = uri.to_file_path().log_err()?; + let contents = std::fs::read_to_string(&path).log_err()?; + Some(Document::new(&contents, None)) + } + /// Create the semantic index and scope chain for a particular file. + /// + /// For scripts, the index is built with a source resolver so that + /// `source()` directives carry the sourced file's exports. + /// For packages, cross-file visibility comes from NAMESPACE imports and + /// collation ordering. + pub(crate) fn file_analysis(&self, file: &Url, doc: &Document) -> (SemanticIndex, FileScope) { + match self.root { + Some(SourceRoot::Package(ref pkg)) => self.package_file_analysis(file, doc, pkg), + _ => self.script_file_analysis(file, doc), + } + } + + fn package_file_analysis( + &self, + file: &Url, + doc: &Document, + pkg: &oak_package::package::Package, + ) -> (SemanticIndex, FileScope) { let root_layers = package_root_layers(&pkg.namespace); // Collect R source filenames from open documents and disk. Open @@ -149,16 +178,8 @@ impl WorldState { let Some(uri) = Url::from_file_path(&path).log_err() else { continue; }; - - // Use the open document if available, otherwise read from disk. - // TODO: Store non-opened workspace documents in VFS. - let doc = if let Some(open) = self.documents.get(&uri) { - open - } else { - let Ok(contents) = std::fs::read_to_string(&path) else { - continue; - }; - &Document::new(&contents, None) + let Some(doc) = self.workspace_document(&uri) else { + continue; }; let layers = file_layers(uri, &doc.semantic_index()); @@ -173,7 +194,98 @@ impl WorldState { lazy.extend(root_layers); lazy.push(BindingSource::PackageExports("base".to_string())); - FileScope::package(top_level, lazy) + (doc.semantic_index(), FileScope::package(top_level, lazy)) + } + + fn script_file_analysis(&self, file: &Url, doc: &Document) -> (SemanticIndex, FileScope) { + // Resolve `source()` paths relative to the workspace root, + // matching RStudio's behaviour of setting the working directory + // to the project root. Fall back to the file's own directory + // when no workspace folder is open. + let file_dir = file + .to_file_path() + .ok() + .and_then(|p| p.parent().map(|d| d.to_path_buf())); + let source_root = self + .workspace + .folders + .first() + .and_then(|url| url.to_file_path().ok()) + .or(file_dir); + + let mut stack = HashSet::new(); + stack.insert(file.clone()); + + let index = semantic_index_with_source_resolver(&doc.parse.tree(), |path| { + let dir = source_root.as_ref()?; + self.resolve_source(dir, path, &mut stack) + }); + + let directives = directive_layers(index.file_directives()); + ( + index, + FileScope::search_path(directives, default_search_path()), + ) + } + + /// Resolve a `source()` call into a [`SourceResolution`] containing the + /// sourced file's exported definitions and `library()` package attachments. + /// + /// `stack` tracks files currently being resolved (grey set) to break + /// cycles. A file is added when resolution starts and removed when it + /// finishes, so shared dependencies (diamond patterns) are resolved + /// independently for each parent. + fn resolve_source( + &self, + base_dir: &Path, + path: &str, + stack: &mut HashSet, + ) -> Option { + let resolved = base_dir.join(path); + let url = Url::from_file_path(&resolved).log_err()?; + + if !stack.insert(url.clone()) { + return None; + } + + let sourced_doc = self.workspace_document(&url)?; + + // Build the sourced file's index with a nested resolver so that + // transitive `source()` calls are also resolved. The base + // directory stays the same (workspace root) throughout the chain. + let index = semantic_index_with_source_resolver(&sourced_doc.parse.tree(), |nested_path| { + self.resolve_source(base_dir, nested_path, stack) + }); + + let mut definitions: Vec<(String, Url, TextRange)> = index + .file_all_definitions(&url) + .into_iter() + .map(|(name, file, range)| (name.to_string(), file, range)) + .collect(); + + let mut packages = Vec::new(); + for d in index.file_directives() { + match d.kind() { + oak_index::semantic_index::DirectiveKind::Attach(pkg) => { + packages.push(pkg.clone()); + }, + oak_index::semantic_index::DirectiveKind::Source { + file: source_file, + exports, + } => { + for (name, range) in exports { + definitions.push((name.clone(), source_file.clone(), *range)); + } + }, + } + } + + stack.remove(&url); + + Some(SourceResolution { + definitions, + packages, + }) } } diff --git a/crates/oak_core/src/declaration.rs b/crates/oak_core/src/declaration.rs new file mode 100644 index 000000000..0a9880cec --- /dev/null +++ b/crates/oak_core/src/declaration.rs @@ -0,0 +1,134 @@ +//! Helpers for detecting `declare()` annotations in R source code. +//! +//! `declare()` is a no-op function in R (>= 4.5) meant to hold static +//! annotations. The compat syntax uses `~declare(...)` (a formula, also a +//! no-op) for older R versions. +//! +//! This module recognises the `declare()` wrapper and returns its arguments for +//! the caller to interpret. + +use aether_syntax::AnyRExpression; +use aether_syntax::RCall; +use aether_syntax::RCallArguments; +use aether_syntax::RSyntaxKind; + +use crate::syntax_ext::RIdentifierExt; + +/// If `expr` is `declare(...)` or `~declare(...)`, return the arguments +/// of the `declare()` call. Returns `None` if the expression doesn't +/// match either pattern. +pub fn as_declare_args(expr: &AnyRExpression) -> Option { + let call = as_declare_call(expr)?; + call.arguments().ok() +} + +/// Unwrap `declare(...)` or `~declare(...)` to get the `declare` call node. +fn as_declare_call(expr: &AnyRExpression) -> Option { + match expr { + AnyRExpression::RCall(call) if is_declare(call) => Some(call.clone()), + + AnyRExpression::RUnaryExpression(unary) => { + let op = unary.operator().ok()?; + if op.kind() != RSyntaxKind::TILDE { + return None; + } + let AnyRExpression::RCall(call) = unary.argument().ok()? else { + return None; + }; + if is_declare(&call) { + Some(call) + } else { + None + } + }, + + _ => None, + } +} + +fn is_declare(call: &RCall) -> bool { + let Ok(AnyRExpression::RIdentifier(ident)) = call.function() else { + return false; + }; + ident.name_text() == "declare" +} + +#[cfg(test)] +mod tests { + use aether_parser::RParserOptions; + use aether_syntax::AnyRExpression; + use biome_rowan::AstNode; + use biome_rowan::AstNodeList; + use biome_rowan::AstSeparatedList; + + use super::*; + + fn parse_single_expr(code: &str) -> AnyRExpression { + let parsed = aether_parser::parse(code, RParserOptions::default()); + parsed.tree().expressions().iter().next().unwrap() + } + + fn declare_arg_values(code: &str) -> Option> { + let expr = parse_single_expr(code); + let args = as_declare_args(&expr)?; + Some( + args.items() + .iter() + .filter_map(|arg| { + let arg = arg.ok()?; + Some(arg.value()?.syntax().text_trimmed().to_string()) + }) + .collect(), + ) + } + + #[test] + fn test_declare_returns_arguments() { + let values = declare_arg_values("declare(source(\"helpers.R\"))"); + assert_eq!(values, Some(vec!["source(\"helpers.R\")".to_string()])); + } + + #[test] + fn test_tilde_declare_returns_arguments() { + let values = declare_arg_values("~declare(source(\"helpers.R\"))"); + assert_eq!(values, Some(vec!["source(\"helpers.R\")".to_string()])); + } + + #[test] + fn test_bare_call_not_declare() { + let values = declare_arg_values("source(\"helpers.R\")"); + assert_eq!(values, None); + } + + #[test] + fn test_tilde_not_declare() { + let values = declare_arg_values("~other(source(\"helpers.R\"))"); + assert_eq!(values, None); + } + + #[test] + fn test_declare_no_args() { + let values = declare_arg_values("declare()"); + assert_eq!(values, Some(vec![])); + } + + #[test] + fn test_declare_multiple_args() { + let values = declare_arg_values("declare(source(\"a.R\"), source(\"b.R\"))"); + assert_eq!( + values, + Some(vec![ + "source(\"a.R\")".to_string(), + "source(\"b.R\")".to_string(), + ]) + ); + } + + #[test] + fn test_declare_preserves_named_args() { + let expr = parse_single_expr("declare(foo = source(\"a.R\"))"); + let args = as_declare_args(&expr).unwrap(); + let arg = args.items().iter().next().unwrap().unwrap(); + assert!(arg.name_clause().is_some()); + } +} diff --git a/crates/oak_core/src/lib.rs b/crates/oak_core/src/lib.rs index 3e367a1a6..113bb73ae 100644 --- a/crates/oak_core/src/lib.rs +++ b/crates/oak_core/src/lib.rs @@ -1,2 +1,3 @@ +pub mod declaration; pub mod file; pub mod syntax_ext; diff --git a/crates/oak_ide/src/goto_definition.rs b/crates/oak_ide/src/goto_definition.rs index 3b92b4259..9b52bd768 100644 --- a/crates/oak_ide/src/goto_definition.rs +++ b/crates/oak_ide/src/goto_definition.rs @@ -4,6 +4,7 @@ use oak_index::external::resolve_external_name; use oak_index::external::resolve_in_package; use oak_index::external::BindingSource; use oak_index::external::ExternalDefinition; +use oak_index::semantic_index::DefinitionKind; use oak_index::semantic_index::SemanticIndex; use oak_index::DefinitionId; use oak_index::ScopeId; @@ -60,7 +61,7 @@ pub fn goto_definition( }, Identifier::Use { scope_id, use_id } => { let scope_chain = scope.at(index, offset); - resolve_use(scope_id, use_id, file, index, scope_chain, library) + resolve_use(scope_id, use_id, file, index, &scope_chain, library) }, Identifier::NamespaceAccess { ref package, @@ -90,8 +91,12 @@ fn resolve_use( defs.iter() .map(|&def_id| { let def = &index.definitions(scope)[def_id]; + let target_file = match def.kind() { + DefinitionKind::Sourced { file: source_file } => source_file.clone(), + _ => file.clone(), + }; NavigationTarget { - file: file.clone(), + file: target_file, name: symbol_name.to_string(), full_range: def.range(), focus_range: def.range(), diff --git a/crates/oak_ide/src/lib.rs b/crates/oak_ide/src/lib.rs index 74915ffda..3a20b8113 100644 --- a/crates/oak_ide/src/lib.rs +++ b/crates/oak_ide/src/lib.rs @@ -1,11 +1,14 @@ mod goto_definition; mod identifier; +use std::borrow::Cow; + use biome_rowan::TextRange; use biome_rowan::TextSize; pub use goto_definition::goto_definition; pub use identifier::Identifier; use oak_index::external::BindingSource; +use oak_index::semantic_index::ScopeId; use oak_index::semantic_index::ScopeKind; use oak_index::semantic_index::SemanticIndex; use url::Url; @@ -25,14 +28,27 @@ pub enum FileScope { }, /// Script or file outside a package. The scope chain is the R - /// search path: `library()` attachments from the file itself, + /// search path: `library()` and `source()` directives from the file + /// itself (position-stamped, only active after their call site), /// default packages (stats, graphics, etc.), and base. - SearchPath(Vec), + SearchPath { + /// Layers from the file's own top-level directives, each stamped with + /// the offset of the directive that produced them. Only layers with + /// offset <= cursor position are active. + directive_layers: Vec<(TextSize, ScopeId, BindingSource)>, + // FIXME: Redundant with `ScopeId(0)` check but avoids importing + // the index type at construction time. Remove once we have salsa. + /// Always-visible base layers (default packages, base). + base: Vec, + }, } impl Default for FileScope { fn default() -> Self { - Self::SearchPath(Vec::new()) + Self::SearchPath { + directive_layers: Vec::new(), + base: Vec::new(), + } } } @@ -41,33 +57,74 @@ impl FileScope { Self::Package { top_level, lazy } } - pub fn search_path(layers: Vec) -> Self { - Self::SearchPath(layers) + pub fn search_path( + directive_layers: Vec<(TextSize, ScopeId, BindingSource)>, + base: Vec, + ) -> Self { + Self::SearchPath { + directive_layers, + base, + } } /// Return the scope chain appropriate for the given offset. For /// packages, top-level scope uses predecessors only while lazy - /// (function) scopes see all files. For scripts, the same search - /// path applies everywhere. - pub fn at(&self, index: &SemanticIndex, offset: TextSize) -> &[BindingSource] { + /// (function) scopes see all files. For scripts, only directives + /// whose scope is an ancestor of the cursor's scope and whose + /// offset <= cursor position are visible, plus the base layers. + pub fn at(&self, index: &SemanticIndex, offset: TextSize) -> Cow<'_, [BindingSource]> { match self { Self::Package { top_level, lazy } => { let scope = index.scope_at(offset); match index.scope(scope).kind() { - ScopeKind::File => top_level, - ScopeKind::Function => lazy, + ScopeKind::File => Cow::Borrowed(top_level), + ScopeKind::Function => Cow::Borrowed(lazy), } }, - Self::SearchPath(layers) => layers, + Self::SearchPath { + directive_layers, + base, + } => { + let cursor_scope = index.scope_at(offset); + // Reverse so later directives are searched first, matching + // R's LIFO search path (last `library()` or `source()` + // wins when two layers define the same name). + let mut layers: Vec = directive_layers + .iter() + .rev() + .filter(|(dir_offset, dir_scope, _)| { + *dir_offset <= offset && + index.ancestor_scopes(cursor_scope).any(|s| s == *dir_scope) + }) + .map(|(_, _, layer)| layer.clone()) + .collect(); + layers.extend(base.iter().cloned()); + Cow::Owned(layers) + }, } } /// The full scope for lazy contexts. Useful for features that don't /// have a cursor position (e.g. completions, workspace symbols). - pub fn lazy(&self) -> &[BindingSource] { + /// Only file-scope directives are included since we don't know which + /// function the cursor is in. + pub fn lazy(&self) -> Cow<'_, [BindingSource]> { match self { - Self::Package { lazy, .. } => lazy, - Self::SearchPath(layers) => layers, + Self::Package { lazy, .. } => Cow::Borrowed(lazy), + Self::SearchPath { + directive_layers, + base, + } => { + let file_scope = ScopeId::from(0); + let mut layers: Vec = directive_layers + .iter() + .rev() + .filter(|(_, scope, _)| *scope == file_scope) + .map(|(_, _, l)| l.clone()) + .collect(); + layers.extend(base.iter().cloned()); + Cow::Owned(layers) + }, } } } diff --git a/crates/oak_ide/tests/goto_definition.rs b/crates/oak_ide/tests/goto_definition.rs index ed70a26a5..b71bfe77d 100644 --- a/crates/oak_ide/tests/goto_definition.rs +++ b/crates/oak_ide/tests/goto_definition.rs @@ -9,10 +9,15 @@ use biome_rowan::TextSize; use oak_ide::goto_definition; use oak_ide::FileScope; use oak_ide::NavigationTarget; +use oak_index::external::directive_layers; use oak_index::external::file_layers; use oak_index::external::BindingSource; use oak_index::semantic_index; +use oak_index::semantic_index::DirectiveKind; use oak_index::semantic_index::SemanticIndex; +use oak_index::semantic_index_with_source_resolver; +use oak_index::ScopeId; +use oak_index::SourceResolution; use oak_package::library::Library; use oak_package::package::Package; use oak_package::package_description::Description; @@ -1067,3 +1072,355 @@ fn test_namespace_classify_string_selectors() { }) ); } + +// --- source() directive --- + +#[test] +fn test_source_directive_resolves_to_sourced_file() { + // script.R has `source("helpers.R")` then uses `helper`. + // The builder resolves source() via the callback and injects the + // sourced file's exports, enabling goto-definition. + let helpers_source = "helper <- function() 1\n"; + let (_helpers_root, helpers_idx) = parse_source(helpers_source); + let helpers_url = file_url("helpers.R"); + + let script_source = "source(\"helpers.R\")\nhelper\n"; + let script_url = file_url("script.R"); + + let helpers_url_clone = helpers_url.clone(); + let helpers_exports: Vec<_> = helpers_idx + .file_exports() + .into_iter() + .map(|(name, range)| (name.to_string(), helpers_url_clone.clone(), range)) + .collect(); + + let parsed = parse(script_source, RParserOptions::default()); + let script_root = parsed.syntax(); + let script_idx = semantic_index_with_source_resolver(&parsed.tree(), move |_path| { + Some(SourceResolution { + definitions: helpers_exports.clone(), + packages: Vec::new(), + }) + }); + + let dir_layers = directive_layers(script_idx.file_directives()); + let scope = FileScope::search_path(dir_layers, Vec::new()); + + let library = empty_library(); + + let use_offset = script_source.rfind("helper").unwrap() as u32; + let targets = goto_definition( + offset(use_offset), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert_eq!(targets, vec![NavigationTarget { + file: helpers_url, + name: "helper".to_string(), + full_range: text_range(0, 6), + focus_range: text_range(0, 6), + }]); +} + +#[test] +fn test_source_directive_resolves_nested_library() { + // helpers.R has `library(dplyr)` and defines `helper`. + // script.R sources helpers.R then uses `mutate` (from dplyr). + // The nested library() directive should be visible via the resolver. + let helpers_source = "library(dplyr)\nhelper <- function() 1\n"; + let (_helpers_root, helpers_idx) = parse_source(helpers_source); + let helpers_url = file_url("helpers.R"); + + let helpers_exports: Vec<_> = helpers_idx + .file_exports() + .into_iter() + .map(|(name, range)| (name.to_string(), helpers_url.clone(), range)) + .collect(); + let helpers_packages: Vec<_> = helpers_idx + .file_directives() + .iter() + .filter_map(|d| match d.kind() { + DirectiveKind::Attach(pkg) => Some(pkg.clone()), + DirectiveKind::Source { .. } => None, + }) + .collect(); + + let script_source = "source(\"helpers.R\")\nmutate\n"; + let script_url = file_url("script.R"); + + let library = test_library(vec![("dplyr", vec!["filter", "mutate", "select"])]); + + let exports_clone = helpers_exports.clone(); + let packages_clone = helpers_packages.clone(); + let parsed = parse(script_source, RParserOptions::default()); + let script_root = parsed.syntax(); + let script_idx = semantic_index_with_source_resolver(&parsed.tree(), move |_path| { + Some(SourceResolution { + definitions: exports_clone.clone(), + packages: packages_clone.clone(), + }) + }); + + let dir_layers = directive_layers(script_idx.file_directives()); + let scope = FileScope::search_path(dir_layers, Vec::new()); + + // `mutate` resolves via dplyr (attached by helpers.R's library() call) + let use_offset = script_source.rfind("mutate").unwrap() as u32; + let targets = goto_definition( + offset(use_offset), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + // Package symbol, no NavigationTarget + assert!(targets.is_empty()); + + // `helper` still resolves to helpers.R + let source_with_helper = "source(\"helpers.R\")\nhelper\n"; + + let exports_clone = helpers_exports.clone(); + let packages_clone = helpers_packages.clone(); + let parsed2 = parse(source_with_helper, RParserOptions::default()); + let script_root2 = parsed2.syntax(); + let script_idx2 = semantic_index_with_source_resolver(&parsed2.tree(), move |_path| { + Some(SourceResolution { + definitions: exports_clone.clone(), + packages: packages_clone.clone(), + }) + }); + + let dir_layers = directive_layers(script_idx2.file_directives()); + let scope = FileScope::search_path(dir_layers, Vec::new()); + + let use_offset = source_with_helper.rfind("helper").unwrap() as u32; + let targets = goto_definition( + offset(use_offset), + &script_url, + &script_root2, + &script_idx2, + &scope, + &library, + ); + assert_eq!(targets, vec![NavigationTarget { + file: helpers_url, + name: "helper".to_string(), + full_range: text_range(15, 21), + focus_range: text_range(15, 21), + }]); +} + +#[test] +fn test_directive_not_visible_before_call_site() { + // Directives are position-stamped: only code AFTER a `source()` or + // `library()` call sees its effects. + // + // "mutate\n" offset 0..6 + // "helper\n" offset 7..13 + // "library(dplyr)\n" offset 14..28 + // "source(\"helpers.R\")\n" offset 29..48 + // "mutate\n" offset 49..55 + // "helper\n" offset 56..62 + let helpers_source = "helper <- function() 1\n"; + let (_helpers_root, helpers_idx) = parse_source(helpers_source); + let helpers_url = file_url("helpers.R"); + + let script_source = "mutate\nhelper\nlibrary(dplyr)\nsource(\"helpers.R\")\nmutate\nhelper\n"; + let script_url = file_url("script.R"); + + let library = test_library(vec![("dplyr", vec!["filter", "mutate", "select"])]); + + let helpers_url_clone = helpers_url.clone(); + let helpers_exports: Vec<_> = helpers_idx + .file_exports() + .into_iter() + .map(|(name, range)| (name.to_string(), helpers_url_clone.clone(), range)) + .collect(); + + let parsed = parse(script_source, RParserOptions::default()); + let script_root = parsed.syntax(); + let script_idx = semantic_index_with_source_resolver(&parsed.tree(), move |_path| { + Some(SourceResolution { + definitions: helpers_exports.clone(), + packages: Vec::new(), + }) + }); + + let dir_layers = directive_layers(script_idx.file_directives()); + let scope = FileScope::search_path(dir_layers, Vec::new()); + + // `mutate` before library(dplyr) (offset 0) — should NOT resolve + let targets = goto_definition( + offset(0), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert!(targets.is_empty()); + + // `helper` before source() (offset 7) — should NOT resolve + let targets = goto_definition( + offset(7), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert!(targets.is_empty()); + + // `mutate` after library(dplyr) (offset 49) — package symbol, no NavigationTarget yet (FIXME) + let targets = goto_definition( + offset(49), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert!(targets.is_empty()); + + // `helper` after source() (offset 56) — should resolve to helpers.R + let targets = goto_definition( + offset(56), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert_eq!(targets, vec![NavigationTarget { + file: helpers_url, + name: "helper".to_string(), + full_range: text_range(0, 6), + focus_range: text_range(0, 6), + }]); +} + +#[test] +fn test_directives_in_function_body_are_scoped() { + // `library()` inside a function body produces a scoped directive: + // visible inside the function but not at file scope. + // `source()` without a resolver is still a no-op. + let script_source = + "f <- function() {\n source(\"helpers.R\")\n library(dplyr)\n mutate\n}\nhelper\nmutate\n"; + let script_url = file_url("script.R"); + let (script_root, script_idx) = parse_source(script_source); + + let library = test_library(vec![("dplyr", vec!["filter", "mutate", "select"])]); + + // library() inside f produces a scoped directive + let directives = script_idx.file_directives(); + assert_eq!(directives.len(), 1); + assert_eq!(directives[0].kind(), &DirectiveKind::Attach("dplyr".into())); + assert_ne!(directives[0].scope(), ScopeId::from(0)); + + let dir_layers = directive_layers(script_idx.file_directives()); + let scope = FileScope::search_path(dir_layers, Vec::new()); + + // `mutate` inside f (after library()) — resolves via scoped dplyr + let use_offset = script_source.find(" mutate").unwrap() as u32 + 2; + let targets = goto_definition( + offset(use_offset), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + // Package symbol, no NavigationTarget + assert!(targets.is_empty()); + + // `helper` at file scope — not resolved (source() had no resolver) + let use_offset = script_source.find("\nhelper").unwrap() as u32 + 1; + let targets = goto_definition( + offset(use_offset), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert!(targets.is_empty()); + + // `mutate` at file scope — not resolved (library() directive is + // scoped to f, not visible here) + let use_offset = script_source.rfind("mutate").unwrap() as u32; + let targets = goto_definition( + offset(use_offset), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert!(targets.is_empty()); +} + +#[test] +fn test_source_in_function_body_scoping() { + // `source(local = FALSE)` inside a function body scopes directives to the + // function scope, so sourced definitions are NOT visible at file scope. + let helpers_source = "helper <- function() 1\n"; + let (_helpers_root, helpers_idx) = parse_source(helpers_source); + let helpers_url = file_url("helpers.R"); + + let script_source = "f <- function() {\n source(\"helpers.R\")\n helper\n}\nhelper\n"; + let script_url = file_url("script.R"); + + let helpers_url_clone = helpers_url.clone(); + let helpers_exports: Vec<_> = helpers_idx + .file_exports() + .into_iter() + .map(|(name, range)| (name.to_string(), helpers_url_clone.clone(), range)) + .collect(); + + let parsed = parse(script_source, RParserOptions::default()); + let script_root = parsed.syntax(); + let script_idx = semantic_index_with_source_resolver(&parsed.tree(), move |_path| { + Some(SourceResolution { + definitions: helpers_exports.clone(), + packages: Vec::new(), + }) + }); + + let dir_layers = directive_layers(script_idx.file_directives()); + let scope = FileScope::search_path(dir_layers, Vec::new()); + + let library = empty_library(); + + // `helper` inside the function body — should resolve to helpers.R + let inner_offset = script_source.find(" helper\n}").unwrap() as u32 + 2; + let targets = goto_definition( + offset(inner_offset), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert_eq!(targets, vec![NavigationTarget { + file: helpers_url, + name: "helper".to_string(), + full_range: text_range(0, 6), + focus_range: text_range(0, 6), + }]); + + // `helper` outside the function — NOT visible + let outer_offset = script_source.rfind("\nhelper\n").unwrap() as u32 + 1; + let targets = goto_definition( + offset(outer_offset), + &script_url, + &script_root, + &script_idx, + &scope, + &library, + ); + assert!(targets.is_empty()); +} diff --git a/crates/oak_index/src/builder.rs b/crates/oak_index/src/builder.rs index c816bf1b2..24ef4806a 100644 --- a/crates/oak_index/src/builder.rs +++ b/crates/oak_index/src/builder.rs @@ -1,3 +1,6 @@ +use std::collections::HashMap; + +use aether_syntax::AnyRArgumentName; use aether_syntax::AnyRExpression; use aether_syntax::AnyRParameterName; use aether_syntax::AnyRValue; @@ -20,6 +23,7 @@ use oak_core::syntax_ext::RIdentifierExt; use oak_core::syntax_ext::RStringValueExt; use rustc_hash::FxHashMap; use smallvec::SmallVec; +use url::Url; use crate::index_vec::Idx; use crate::index_vec::IndexVec; @@ -43,16 +47,46 @@ use crate::use_def_map::UseDefMapBuilder; /// Build a [`SemanticIndex`] from a parsed R file. pub fn semantic_index(root: &RRoot) -> SemanticIndex { let range = root.syntax().text_trimmed_range(); - let mut builder = SemanticIndexBuilder::new(range); + let mut builder = SemanticIndexBuilder::new(range, None); + builder.pre_scan_scope(root.syntax()); + builder.collect_expression_list(&root.expressions()); + builder.finish() +} + +/// Build a [`SemanticIndex`] with cross-file `source()` resolution. +/// +/// The resolver callback is called when the builder encounters a +/// `source("path")` call. It should return the sourced file's exported +/// definitions and any `library()` package attachments. See the design +/// comment on `collect_source_directive` for how these are handled. +pub fn semantic_index_with_source_resolver<'a>( + root: &RRoot, + resolver: impl FnMut(&str) -> Option + 'a, +) -> SemanticIndex { + let range = root.syntax().text_trimmed_range(); + let mut builder = SemanticIndexBuilder::new(range, Some(Box::new(resolver))); builder.pre_scan_scope(root.syntax()); builder.collect_expression_list(&root.expressions()); builder.finish() } +/// The result of resolving a `source()` call. Returned by the resolver +/// callback passed to the builder. +pub struct SourceResolution { + /// Definitions to inject as synthetic bindings in the calling scope. + /// Each entry is (name, file_url, range_in_source_file). + pub definitions: Vec<(String, Url, TextRange)>, + /// Package names from `library()` directives in the sourced file + /// (and transitively from files it sources). + pub packages: Vec, +} + +type SourceResolver<'a> = Box Option + 'a>; + // Maintains the preorder allocation invariant on `Scope::descendants`. The // parallel arrays are pushed in lockstep so they stay indexed by the same // `ScopeId`. -struct SemanticIndexBuilder { +struct SemanticIndexBuilder<'a> { scopes: IndexVec, symbol_tables: IndexVec, definitions: IndexVec>, @@ -62,10 +96,11 @@ struct SemanticIndexBuilder { pre_scans: IndexVec, enclosing_snapshots: FxHashMap, directives: Vec, + source_resolver: Option>, } -impl SemanticIndexBuilder { - fn new(range: TextRange) -> Self { +impl<'a> SemanticIndexBuilder<'a> { + fn new(range: TextRange, source_resolver: Option>) -> Self { let mut scopes = IndexVec::new(); let mut symbol_tables = IndexVec::new(); let mut definitions = IndexVec::new(); @@ -102,6 +137,7 @@ impl SemanticIndexBuilder { pre_scans, enclosing_snapshots: FxHashMap::default(), directives: Vec::new(), + source_resolver, } } @@ -344,9 +380,7 @@ impl SemanticIndexBuilder { // also consider nested scopes as long as they're not lazy (e.g. // function definitions or NSE calls that don't evaluate // immediately. - if self.current_scope == ScopeId::from(0) { - self.collect_directive(call); - } + self.collect_directive(call); }, AnyRExpression::RSubset(subset) => { if let Ok(object) = subset.function() { @@ -484,6 +518,12 @@ impl SemanticIndexBuilder { // quoting constructs (`~`, `quote()`, `bquote()`) are recorded as // uses and bindings. Refining this requires special-casing these // forms, which we defer as future work. + // + // Once quoting is handled, `declare()` and `~declare()` will need + // explicit treatment: its arguments are quoted (not evaluated) but + // should still be inspected for directives like `source()`. + // Currently this works by accident because the generic traversal is + // transparent to both `declare()` and `~`. _ => { self.collect_descendants(expr.syntax()); }, @@ -663,26 +703,33 @@ impl SemanticIndexBuilder { } } - /// Detect directives like `library(pkg)` and `require(pkg)` at the - /// file-level scope. fn collect_directive(&mut self, call: &aether_syntax::RCall) { let Ok(AnyRExpression::RIdentifier(ident)) = call.function() else { return; }; let fn_name = ident.name_text(); - if fn_name != "library" && fn_name != "require" { - return; + if fn_name == "library" || fn_name == "require" { + self.collect_attach_directive(call); + } else if fn_name == "source" { + self.collect_source_directive(call); } + } + // ## `library()` / `require()` scoping + // + // In R, `library()` always modifies the global search path regardless + // of where it's called. Statically, we scope the directive to + // `self.current_scope`: at file scope it's visible everywhere (sequential + // execution is guaranteed), but inside a function it's only visible + // within that function and its children, since the function might never + // be called. Same reasoning as `source(local = FALSE)` directives. + fn collect_attach_directive(&mut self, call: &aether_syntax::RCall) { let Ok(args) = call.arguments() else { return; }; let mut items = args.items().iter(); - // For now, only recognise exactly one unnamed argument. We'll do - // argument matching later (`character.only` unquoting is another - // complication). let Some(Ok(first_arg)) = items.next() else { return; }; @@ -693,7 +740,6 @@ impl SemanticIndexBuilder { return; }; - // Extract the package name from identifier or string literal let pkg_name = match &value { AnyRExpression::RIdentifier(ident) => Some(ident.name_text()), AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => s.string_text(), @@ -703,12 +749,173 @@ impl SemanticIndexBuilder { return; }; + let call_offset = call.syntax().text_trimmed_range().start(); self.directives.push(Directive { kind: DirectiveKind::Attach(pkg_name), - offset: call.syntax().text_trimmed_range().start(), + offset: call_offset, + scope: self.current_scope, }); } + // ## `source()` resolution + // + // R's `source(file, local = )` evaluates a file in a target + // environment. The `local` parameter controls where definitions land: + // + // - `local = TRUE`: definitions go into the calling environment. + // - `local = FALSE` (default): definitions go into the global + // environment. + // - `local = `: definitions go into ``. + // + // We model the boolean case with two mechanisms: + // + // ### `local = TRUE`, or `source()` at file scope + // + // Top-level bindings in the sourced file are injected as definitions into + // the use-def map as `DefinitionKind::Sourced` via `add_definition`. They + // fully participate in local resolution and shadow prior bindings, just + // like `<-`. At file scope, `local` doesn't matter because the current + // scope IS the global environment, and sequential execution is guaranteed, + // so `source()` overwrites like any other assignment: + // + // ```r + // foo <- 1 + // source("helpers.R") # also defines foo + // foo # resolves to sourced foo (shadowed) + // ``` + // + // ### `local = FALSE` (default) in a nested scope + // + // External top-level bindings are reached through `DirectiveKind::Source` + // entries, which flow through the `FileScope` scope chain alongside + // `library()` / `Attach` directives. They are only consulted when a symbol + // is unbound after local + enclosing scope resolution, so they never shadow + // local bindings: + // + // ```r + // f <- function() { + // foo <- "local" + // source("helpers.R") # also defines foo, local = FALSE + // foo # resolves to local "local" + // bar # no local def → resolves via directive + // } + // ``` + // + // The directive is scoped to the function (not file scope) because + // top-level code should not assume that the sourcing function will be + // called. We could refine with call analysis in the future though. + // `FileScope::at()` filters by `(offset, scope)`: the directive is visible + // only at cursor positions inside the function (or its children) and after + // the call site. `FileScope::lazy()` only includes file-scope directives, + // so function-scoped ones are conservatively excluded. + // + // ### Resolution chain + // + // Goto-definition resolves a use through three layers: + // + // 1. **Local bindings** (`use_def_map.bindings_at_use`): finds + // `Sourced` definitions from `local = TRUE` / file-scope sources. + // 2. **Enclosing bindings** (`enclosing_bindings`): free variables + // in nested scopes reach ancestor definitions. + // 3. **Scope chain** (`FileScope::at` → `resolve_external_name`): + // `Source` and `Attach` directive layers, searched in reverse + // order (LIFO, matching R's search path where the last + // `library()` or `source()` wins). + fn collect_source_directive(&mut self, call: &aether_syntax::RCall) { + let Ok(args) = call.arguments() else { + return; + }; + + let mut path: Option = None; + let mut is_local = false; + + for item in args.items().iter() { + let Ok(arg) = item else { continue }; + + if let Some(name_clause) = arg.name_clause() { + let Ok(AnyRArgumentName::RIdentifier(name_ident)) = name_clause.name() else { + continue; + }; + if name_ident.name_text() == "local" { + if let Some(value) = arg.value() { + match value { + AnyRExpression::RTrueExpression(_) => is_local = true, + AnyRExpression::RFalseExpression(_) => is_local = false, + _ => {}, + } + } + } + } else if path.is_none() { + // First positional argument: the file path + if let Some(AnyRExpression::AnyRValue(AnyRValue::RStringValue(s))) = arg.value() { + path = s.string_text(); + } + } + } + + let Some(path) = path else { + return; + }; + + let call_offset = call.syntax().text_trimmed_range().start(); + let in_nested_scope = self.current_scope != ScopeId::from(0); + + let Some(resolution) = self.resolve_source(&path) else { + return; + }; + + if is_local || !in_nested_scope { + // `local = TRUE` or at file scope: inject into the + // use-def map so sourced definitions shadow locals. + for (name, file, range) in resolution.definitions { + self.add_definition( + &name, + SymbolFlags::IS_BOUND, + DefinitionKind::Sourced { file }, + range, + ); + } + for pkg in resolution.packages { + self.directives.push(Directive { + kind: DirectiveKind::Attach(pkg), + offset: call_offset, + scope: self.current_scope, + }); + } + } else { + // `local = FALSE` (default) in a nested scope: cross-file + // resolution only via directives, scoped to the current scope + // instead of the file scope. + let mut by_file: HashMap> = HashMap::new(); + for (name, file, range) in resolution.definitions { + by_file.entry(file).or_default().insert(name, range); + } + for (file, exports) in by_file { + self.directives.push(Directive { + kind: DirectiveKind::Source { file, exports }, + offset: call_offset, + scope: self.current_scope, + }); + } + for pkg in resolution.packages { + self.directives.push(Directive { + kind: DirectiveKind::Attach(pkg), + offset: call_offset, + scope: self.current_scope, + }); + } + } + } + + /// Call the source resolver for `path`, temporarily taking it out of + /// `self` to avoid borrow conflicts. + fn resolve_source(&mut self, path: &str) -> Option { + let mut resolver = self.source_resolver.take()?; + let result = resolver(path); + self.source_resolver = Some(resolver); + result + } + fn finish(mut self) -> SemanticIndex { self.scopes[ScopeId::from(0)].descendants.end = self.scopes.next_id(); diff --git a/crates/oak_index/src/external.rs b/crates/oak_index/src/external.rs index 3e6c40479..265fb3478 100644 --- a/crates/oak_index/src/external.rs +++ b/crates/oak_index/src/external.rs @@ -1,11 +1,14 @@ use std::collections::HashMap; use biome_rowan::TextRange; +use biome_rowan::TextSize; use oak_package::library::Library; use oak_package::package_namespace::Namespace; use url::Url; +use crate::semantic_index::Directive; use crate::semantic_index::DirectiveKind; +use crate::semantic_index::ScopeId; use crate::semantic_index::SemanticIndex; /// A layer in the scope chain. Layers are ordered most-local-first; resolution @@ -107,6 +110,9 @@ pub fn resolve_in_package( /// Compute the binding-source layers that a single file contributes to the /// scope chain: one `FileExports` layer from its top-level definitions, plus /// one `PackageExports` layer per `library()`/`require()` directive. +/// +/// Offsets are discarded since all of a predecessor file's layers are +/// unconditionally visible. pub fn file_layers(file: Url, index: &SemanticIndex) -> Vec { let mut layers = Vec::new(); @@ -117,15 +123,34 @@ pub fn file_layers(file: Url, index: &SemanticIndex) -> Vec { } layers.push(BindingSource::FileExports { file, exports }); + let dir_layers = directive_layers(index.file_directives()); + layers.extend(dir_layers.into_iter().map(|(_, _, l)| l)); + + layers +} - for directive in index.file_directives() { +/// Convert directives into scope-chain layers, each paired with the offset +/// of the directive that produced it. +pub fn directive_layers(directives: &[Directive]) -> Vec<(TextSize, ScopeId, BindingSource)> { + let mut layers = Vec::new(); + for directive in directives { + let offset = directive.offset(); match directive.kind() { DirectiveKind::Attach(pkg) => { - layers.push(BindingSource::PackageExports(pkg.clone())); + layers.push(( + offset, + directive.scope(), + BindingSource::PackageExports(pkg.clone()), + )); + }, + DirectiveKind::Source { file, exports } => { + layers.push((offset, directive.scope(), BindingSource::FileExports { + file: file.clone(), + exports: exports.clone(), + })); }, } } - layers } diff --git a/crates/oak_index/src/lib.rs b/crates/oak_index/src/lib.rs index c003582ea..16aa6e46f 100644 --- a/crates/oak_index/src/lib.rs +++ b/crates/oak_index/src/lib.rs @@ -5,6 +5,8 @@ pub mod semantic_index; pub mod use_def_map; pub use builder::semantic_index; +pub use builder::semantic_index_with_source_resolver; +pub use builder::SourceResolution; pub use semantic_index::DefinitionId; pub use semantic_index::ScopeId; pub use semantic_index::UseId; diff --git a/crates/oak_index/src/semantic_index.rs b/crates/oak_index/src/semantic_index.rs index 4832ea4d4..6845c27d6 100644 --- a/crates/oak_index/src/semantic_index.rs +++ b/crates/oak_index/src/semantic_index.rs @@ -1,9 +1,11 @@ +use std::collections::HashMap; use std::ops::Range; use aether_syntax::RSyntaxNode; use biome_rowan::TextRange; use biome_rowan::TextSize; use rustc_hash::FxHashMap; +use url::Url; use crate::index_vec::define_index; use crate::index_vec::IndexVec; @@ -119,6 +121,7 @@ impl SemanticIndex { let symbols = &self.symbol_tables[file_scope]; self.definitions[file_scope] .iter() + .filter(|(_id, def)| !matches!(def.kind(), DefinitionKind::Sourced { .. })) .map(|(_id, def)| { let name = symbols.symbol(def.symbol()).name(); (name, def.range()) @@ -126,6 +129,28 @@ impl SemanticIndex { .collect() } + /// All definitions visible at file scope, including sourced ones. + /// + /// Each entry carries the file URL where the definition lives and + /// the range within that file. For own definitions, `file_url` is + /// passed through; for `Sourced` definitions, the URL comes from + /// the `DefinitionKind`. + pub fn file_all_definitions(&self, file_url: &Url) -> Vec<(&str, Url, TextRange)> { + let file_scope = ScopeId::from(0); + let symbols = &self.symbol_tables[file_scope]; + self.definitions[file_scope] + .iter() + .map(|(_id, def)| { + let name = symbols.symbol(def.symbol()).name(); + let url = match def.kind() { + DefinitionKind::Sourced { file } => file.clone(), + _ => file_url.clone(), + }; + (name, url, def.range()) + }) + .collect() + } + /// File-level directives (e.g. `library()` calls) recorded during indexing. pub fn file_directives(&self) -> &[Directive] { &self.directives @@ -152,6 +177,7 @@ impl SemanticIndex { let def_id = self .definitions(scope) .iter() + .filter(|(_id, def)| !matches!(def.kind(), DefinitionKind::Sourced { .. })) .find_map(|(id, d)| d.range().contains(offset).then_some(id)); Some((scope, def_id?)) } @@ -486,6 +512,11 @@ pub enum DefinitionKind { SuperAssignment(RSyntaxNode), Parameter(RSyntaxNode), ForVariable(RSyntaxNode), + /// Injected from a `source()` call. The definition lives in an external + /// file; `range` on the `Definition` gives the name's range in that file. + Sourced { + file: Url, + }, } impl Definition { @@ -528,13 +559,18 @@ impl Use { pub struct Directive { pub(crate) kind: DirectiveKind, pub(crate) offset: TextSize, + pub(crate) scope: ScopeId, } -// TODO: `Source()` directives #[derive(Debug, Clone, PartialEq, Eq)] pub enum DirectiveKind { /// `library(pkg)` or `require(pkg)`: attaches a package to the search path. Attach(String), + /// `source(file)`: brings exports from another file into scope. + Source { + file: Url, + exports: HashMap, + }, } impl Directive { @@ -545,6 +581,10 @@ impl Directive { pub fn offset(&self) -> TextSize { self.offset } + + pub fn scope(&self) -> ScopeId { + self.scope + } } // --- Iterators --- diff --git a/crates/oak_index/tests/builder.rs b/crates/oak_index/tests/builder.rs index 1884bb582..481a3e9d0 100644 --- a/crates/oak_index/tests/builder.rs +++ b/crates/oak_index/tests/builder.rs @@ -1,6 +1,8 @@ use aether_parser::parse; use aether_parser::RParserOptions; use aether_syntax::RSyntaxKind; +use biome_rowan::TextRange; +use biome_rowan::TextSize; use oak_index::semantic_index; use oak_index::semantic_index::DefinitionId; use oak_index::semantic_index::DefinitionKind; @@ -10,6 +12,9 @@ use oak_index::semantic_index::ScopeKind; use oak_index::semantic_index::SemanticIndex; use oak_index::semantic_index::SymbolFlags; use oak_index::semantic_index::UseId; +use oak_index::semantic_index_with_source_resolver; +use oak_index::SourceResolution; +use url::Url; fn index(source: &str) -> SemanticIndex { let parsed = parse(source, RParserOptions::default()); @@ -1360,9 +1365,14 @@ fn test_directive_no_arguments_ignored() { } #[test] -fn test_directive_not_at_file_scope() { +fn test_directive_library_in_function_scope() { + // library() in a function body now records a scoped directive let index = index("f <- function() { library(dplyr) }"); - assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); + assert_eq!(directive_kinds(&index), [&DirectiveKind::Attach( + "dplyr".into() + )]); + let directives = index.file_directives(); + assert_ne!(directives[0].scope(), ScopeId::from(0)); } #[test] @@ -1378,3 +1388,384 @@ fn test_directive_preserves_offset() { assert_eq!(directives.len(), 1); assert_eq!(directives[0].offset(), biome_rowan::TextSize::from(7)); } + +// --- source() directives --- + +#[test] +fn test_directive_source_no_resolver() { + // Without a resolver, source() produces no directives + let index = index("source(\"helpers.R\")"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_single_quoted_no_resolver() { + let index = index("source('utils/helpers.R')"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_identifier_ignored() { + let index = index("source(my_file)"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_non_static_argument_ignored() { + let index = index("source(paste0(\"path/\", name))"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_named_argument_ignored() { + let index = index("source(file = \"helpers.R\")"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_local_true_without_resolver() { + // `source("helpers.R", local = TRUE)` is recognized but no resolver, so no directives + let index = index("source(\"helpers.R\", local = TRUE)"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_no_arguments_ignored() { + let index = index("source()"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_nested_without_resolver() { + // Nested `source()` is recognized but no resolver, so no directives + let index = index("f <- function() { source(\"helpers.R\") }"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_source_no_resolver_no_directives() { + let index = index("x <- 1\nsource(\"helpers.R\")"); + let directives = index.file_directives(); + assert_eq!(directives.len(), 0); +} + +#[test] +fn test_directive_source_mixed_with_library() { + let index = index("library(dplyr)\nsource(\"helpers.R\")\nlibrary(tidyr)"); + assert_eq!(directive_kinds(&index), [ + &DirectiveKind::Attach("dplyr".into()), + &DirectiveKind::Attach("tidyr".into()), + ]); +} + +// --- declare() directives --- + +#[test] +fn test_directive_declare_source_no_resolver() { + let index = index("declare(source(\"helpers.R\"))"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_declare_source_single_quotes_no_resolver() { + let index = index("declare(source('utils.R'))"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_tilde_declare_source_no_resolver() { + let index = index("~declare(source(\"helpers.R\"))"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_fixme_directive_declare_library_transparent() { + // `declare()` is transparent: the inner `library(dplyr)` is still + // picked up as a directive. + // FIXME: We should declare `declare()` as a quoting function. + let index = index("declare(library(dplyr))"); + assert_eq!(directive_kinds(&index), [&DirectiveKind::Attach( + "dplyr".into() + )]); +} + +#[test] +fn test_directive_declare_not_at_file_scope() { + let index = index("f <- function() { declare(source(\"helpers.R\")) }"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_tilde_declare_not_at_file_scope() { + let index = index("f <- function() { ~declare(source(\"helpers.R\")) }"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_declare_mixed_with_bare() { + let index = index("library(dplyr)\ndeclare(source(\"helpers.R\"))\nsource(\"utils.R\")"); + assert_eq!(directive_kinds(&index), [&DirectiveKind::Attach( + "dplyr".into() + ),]); +} + +#[test] +fn test_directive_declare_source_no_resolver_no_directives() { + let index = index("x <- 1\ndeclare(source(\"helpers.R\"))"); + let directives = index.file_directives(); + assert_eq!(directives.len(), 0); +} + +#[test] +fn test_directive_tilde_declare_source_no_resolver_no_directives() { + let index = index("x <- 1\n~declare(source(\"helpers.R\"))"); + let directives = index.file_directives(); + assert_eq!(directives.len(), 0); +} + +#[test] +fn test_directive_declare_non_call_arg_ignored() { + let index = index("declare(42)"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +#[test] +fn test_directive_declare_identifier_source_arg_ignored() { + let index = index("declare(source(my_file))"); + assert_eq!(directive_kinds(&index), Vec::<&DirectiveKind>::new()); +} + +// --- source() with resolver --- + +fn index_with_resolver( + source: &str, + resolver: impl FnMut(&str) -> Option, +) -> SemanticIndex { + let parsed = parse(source, RParserOptions::default()); + if parsed.has_error() { + panic!("source has syntax errors: {source}"); + } + semantic_index_with_source_resolver(&parsed.tree(), resolver) +} + +fn helper_resolution() -> SourceResolution { + SourceResolution { + definitions: vec![( + "helper".into(), + Url::parse("file:///test/helpers.R").unwrap(), + TextRange::new(TextSize::from(0), TextSize::from(6)), + )], + packages: vec![], + } +} + +#[test] +fn test_source_resolver_injects_definitions() { + // At file scope, source() injects Sourced definitions into the use-def map. + let code = "source(\"helpers.R\")\nhelper\n"; + let index = index_with_resolver(code, |_| Some(helper_resolution())); + let file = ScopeId::from(0); + + // Use 0 is `source`, use 1 is `helper` + let map = index.use_def_map(file); + let bindings = map.bindings_at_use(UseId::from(1)); + assert!(!bindings.definitions().is_empty()); + + let def_id = bindings.definitions()[0]; + let def = &index.definitions(file)[def_id]; + let DefinitionKind::Sourced { file: ref url } = def.kind() else { + panic!("expected Sourced definition, got {:?}", def.kind()); + }; + assert_eq!(url.as_str(), "file:///test/helpers.R"); + + // file_exports() excludes sourced definitions + let exports = index.file_exports(); + assert!(!exports.iter().any(|(name, _)| *name == "helper")); + + // file_all_definitions() includes sourced definitions + let own_url = Url::parse("file:///test/main.R").unwrap(); + let all_defs = index.file_all_definitions(&own_url); + let sourced = all_defs + .iter() + .find(|(name, _, _)| *name == "helper") + .unwrap(); + assert_eq!(sourced.1.as_str(), "file:///test/helpers.R"); +} + +#[test] +fn test_source_resolver_offset_visibility() { + let code = "helper\nsource(\"helpers.R\")\nhelper\n"; + let index = index_with_resolver(code, |_| Some(helper_resolution())); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // First `helper` (before source call) is unbound + let first = map.bindings_at_use(UseId::from(0)); + assert!(first.may_be_unbound()); + + // Second `helper` (after source call) resolves to the sourced definition + // Uses: helper(0), source(1), helper(2) + let second = map.bindings_at_use(UseId::from(2)); + assert!(!second.definitions().is_empty()); + let def_id = second.definitions()[0]; + let def = &index.definitions(file)[def_id]; + assert!(matches!(def.kind(), DefinitionKind::Sourced { .. })); +} + +#[test] +fn test_source_resolver_in_function_scope() { + let code = "f <- function() {\n source(\"helpers.R\")\n helper\n}\nhelper\n"; + let index = index_with_resolver(code, |_| Some(helper_resolution())); + let fun = ScopeId::from(1); + let file = ScopeId::from(0); + + let fun_map = index.use_def_map(fun); + let inner_bindings = fun_map.bindings_at_use(UseId::from(1)); + assert!(inner_bindings.definitions().is_empty()); + assert!(inner_bindings.may_be_unbound()); + + let file_map = index.use_def_map(file); + let outer_bindings = file_map.bindings_at_use(UseId::from(0)); + assert!(outer_bindings.definitions().is_empty()); + assert!(outer_bindings.may_be_unbound()); + + let source_directive = index + .file_directives() + .iter() + .find(|d| matches!(d.kind(), DirectiveKind::Source { .. })); + assert!(source_directive.is_some()); + assert_eq!(source_directive.unwrap().scope(), ScopeId::from(1)); +} + +#[test] +fn test_source_resolver_packages_become_directives() { + let code = "source(\"helpers.R\")\n"; + let index = index_with_resolver(code, |_| { + Some(SourceResolution { + definitions: vec![], + packages: vec!["dplyr".into()], + }) + }); + + assert_eq!(directive_kinds(&index), [&DirectiveKind::Attach( + "dplyr".into() + )]); +} + +#[test] +fn test_source_resolver_later_shadows_earlier() { + // At file scope, both source() calls inject Sourced definitions + // into the use-def map. The later one shadows the earlier. + let code = "source(\"a.R\")\nsource(\"b.R\")\nfoo\n"; + let parsed = parse(code, RParserOptions::default()); + + let a_url = Url::parse("file:///test/a.R").unwrap(); + let b_url = Url::parse("file:///test/b.R").unwrap(); + let a_url_clone = a_url.clone(); + let b_url_clone = b_url.clone(); + + let index = semantic_index_with_source_resolver(&parsed.tree(), move |path| { + let (url, range) = match path { + "a.R" => ( + a_url_clone.clone(), + TextRange::new(TextSize::from(0), TextSize::from(3)), + ), + "b.R" => ( + b_url_clone.clone(), + TextRange::new(TextSize::from(0), TextSize::from(3)), + ), + _ => return None, + }; + Some(SourceResolution { + definitions: vec![("foo".to_string(), url, range)], + packages: Vec::new(), + }) + }); + + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: source(0), source(1), foo(2) + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions().len(), 1); + + let def_id = bindings.definitions()[0]; + let def = &index.definitions(file)[def_id]; + let DefinitionKind::Sourced { file: ref url } = def.kind() else { + panic!("expected Sourced definition, got {:?}", def.kind()); + }; + assert_eq!(*url, b_url); +} + +#[test] +fn test_source_resolver_local_true_in_function_scope() { + // `local = TRUE` injects Sourced definitions into the function + // scope's use-def map, not into directives. + let code = "f <- function() {\n source(\"helpers.R\", local = TRUE)\n helper\n}\nhelper\n"; + let index = index_with_resolver(code, |_| Some(helper_resolution())); + let fun = ScopeId::from(1); + let file = ScopeId::from(0); + + let fun_map = index.use_def_map(fun); + // Function scope uses: source(0), helper(1) + let inner_bindings = fun_map.bindings_at_use(UseId::from(1)); + assert_eq!(inner_bindings.definitions().len(), 1); + let def = &index.definitions(fun)[inner_bindings.definitions()[0]]; + assert!(matches!(def.kind(), DefinitionKind::Sourced { .. })); + + // File scope: `helper` does not resolve + let file_map = index.use_def_map(file); + let outer_bindings = file_map.bindings_at_use(UseId::from(0)); + assert!(outer_bindings.definitions().is_empty()); +} + +#[test] +fn test_source_resolver_local_true_shadows_local_def() { + // `source(local = TRUE)` injects into the use-def map and + // shadows a prior local binding. + let code = "f <- function() {\n foo <- 1\n source(\"helpers.R\", local = TRUE)\n foo\n}\n"; + let index = index_with_resolver(code, |_| { + Some(SourceResolution { + definitions: vec![( + "foo".into(), + Url::parse("file:///test/helpers.R").unwrap(), + TextRange::new(TextSize::from(0), TextSize::from(3)), + )], + packages: vec![], + }) + }); + let fun = ScopeId::from(1); + + let fun_map = index.use_def_map(fun); + // Function scope uses: source(0), foo(1) + let bindings = fun_map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions().len(), 1); + let def = &index.definitions(fun)[bindings.definitions()[0]]; + assert!(matches!(def.kind(), DefinitionKind::Sourced { .. })); +} + +#[test] +fn test_source_resolver_local_false_does_not_shadow_local_def() { + // `source(local = FALSE)` (the default) in a function scope does not + // shadow a prior local binding: the sourced definition becomes a + // directive scoped to the function, leaving the local one intact. + let code = "f <- function() {\n foo <- 1\n source(\"helpers.R\")\n foo\n}\n"; + let index = index_with_resolver(code, |_| { + Some(SourceResolution { + definitions: vec![( + "foo".into(), + Url::parse("file:///test/helpers.R").unwrap(), + TextRange::new(TextSize::from(0), TextSize::from(3)), + )], + packages: vec![], + }) + }); + let fun = ScopeId::from(1); + + let fun_map = index.use_def_map(fun); + // Function scope uses: source(0), foo(1) + let bindings = fun_map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions().len(), 1); + let def = &index.definitions(fun)[bindings.definitions()[0]]; + assert!(matches!(def.kind(), DefinitionKind::Assignment(_))); +} diff --git a/crates/oak_index/tests/external.rs b/crates/oak_index/tests/external.rs index f10eb90d5..b1912deac 100644 --- a/crates/oak_index/tests/external.rs +++ b/crates/oak_index/tests/external.rs @@ -324,6 +324,22 @@ fn test_file_layers_empty_file() { }); } +#[test] +fn test_file_layers_source_directive_skipped() { + let index = index_source("library(dplyr)\nsource(\"helpers.R\")\nx <- 1"); + let layers = file_layers(file_url("script.R"), &index); + + // FileExports + PackageExports(dplyr), source() is not emitted as a layer + assert_eq!(layers.len(), 2); + assert_matches!(&layers[0], BindingSource::FileExports { exports, .. } => { + assert_eq!(exports.len(), 1); + assert!(exports.contains_key("x")); + }); + assert_matches!(&layers[1], BindingSource::PackageExports(pkg) => { + assert_eq!(pkg, "dplyr"); + }); +} + // --- Integration: file_layers -> resolve_external_name --- #[test]