diff --git a/.blue/docs/rfcs/0010-realm-semantic-index.md b/.blue/docs/rfcs/0010-realm-semantic-index.md index 0028bda..d17faac 100644 --- a/.blue/docs/rfcs/0010-realm-semantic-index.md +++ b/.blue/docs/rfcs/0010-realm-semantic-index.md @@ -314,14 +314,14 @@ blue impact src/domain.rs - [x] Add schema to blue.db (file_index, symbol_index, FTS5 tables) - [x] Create versioned indexing prompt for structured YAML extraction -- [ ] Implement Ollama integration with qwen2.5:3b default +- [x] Implement Ollama integration with qwen2.5:3b default - [x] Implement `blue index --all` for bootstrap - [x] Implement `blue index --diff` for staged files - [x] Implement `blue index --file` for single-file updates - [x] Implement `blue index --install-hook` for git hook setup - [x] Implement `blue index --refresh` for stale entry updates - [x] Implement `blue index status` for freshness reporting -- [ ] Add large file handling (>1000 lines warning) +- [x] Add large file handling (>1000 lines warning) - [x] Implement `blue search` with FTS5 backend - [x] Implement `blue impact` for dependency queries - [x] Add MCP tools (5 tools) diff --git a/apps/blue-cli/Cargo.toml b/apps/blue-cli/Cargo.toml index 1899611..9f9423c 100644 --- a/apps/blue-cli/Cargo.toml +++ b/apps/blue-cli/Cargo.toml @@ -13,6 +13,7 @@ path = "src/main.rs" [dependencies] blue-core.workspace = true blue-mcp.workspace = true +blue-ollama.workspace = true clap.workspace = true anyhow.workspace = true tokio.workspace = true diff --git a/apps/blue-cli/src/main.rs b/apps/blue-cli/src/main.rs index da74354..24d8d04 100644 --- a/apps/blue-cli/src/main.rs +++ b/apps/blue-cli/src/main.rs @@ -1530,6 +1530,8 @@ async fn detect_ollama_model() -> Option { async fn handle_index_command(command: IndexCommands) -> Result<()> { use blue_core::store::DocumentStore; + use blue_core::{Indexer, IndexerConfig, is_indexable_file, LocalLlmConfig}; + use blue_ollama::OllamaLlm; use std::path::Path; // Get the .blue database path @@ -1545,21 +1547,64 @@ async fn handle_index_command(command: IndexCommands) -> Result<()> { match command { IndexCommands::All { path, model } => { - let target = path.as_deref().unwrap_or("."); + let target_path = path.as_deref().unwrap_or("."); let model_name = model.as_deref().unwrap_or("qwen2.5:3b"); - println!("Indexing all files in '{}' with model '{}'...", target, model_name); - println!("(Full indexing requires Ollama running with the model pulled)"); + // Collect all indexable files + let files = collect_indexable_files(Path::new(target_path))?; + println!("Found {} indexable files in '{}'", files.len(), target_path); - // For now, show what would be indexed - let count = count_indexable_files(Path::new(target))?; - println!("Found {} indexable files.", count); - println!("\nTo complete indexing:"); - println!(" 1. Ensure Ollama is running: ollama serve"); - println!(" 2. Pull the model: ollama pull {}", model_name); - println!(" 3. Run this command again"); + if files.is_empty() { + println!("No files to index."); + return Ok(()); + } - // TODO: Implement actual indexing with Ollama integration + // Try to connect to Ollama + let llm_config = LocalLlmConfig { + model: model_name.to_string(), + use_external: true, // Use existing Ollama instance + ..Default::default() + }; + + let llm = OllamaLlm::new(&llm_config); + if let Err(e) = llm.start() { + println!("Ollama not available: {}", e); + println!("\nTo index files:"); + println!(" 1. Start Ollama: ollama serve"); + println!(" 2. Pull the model: ollama pull {}", model_name); + println!(" 3. Run this command again"); + return Ok(()); + } + + println!("Indexing with model '{}'...\n", model_name); + + let indexer_config = IndexerConfig { + model: model_name.to_string(), + ..Default::default() + }; + let indexer = Indexer::new(llm, indexer_config); + + let mut indexed = 0; + let mut errors = 0; + + for file_path in &files { + let path = Path::new(file_path); + print!(" {} ... ", file_path); + + match indexer.index_and_store(path, &store) { + Ok(result) => { + let partial = if result.is_partial { " (partial)" } else { "" }; + println!("{} symbols{}", result.symbols.len(), partial); + indexed += 1; + } + Err(e) => { + println!("error: {}", e); + errors += 1; + } + } + } + + println!("\nIndexed {} files ({} errors)", indexed, errors); } IndexCommands::Diff { model } => { @@ -1570,41 +1615,109 @@ async fn handle_index_command(command: IndexCommands) -> Result<()> { .args(["diff", "--cached", "--name-only"]) .output()?; - let staged_files: Vec<&str> = std::str::from_utf8(&output.stdout)? + let staged_files: Vec = std::str::from_utf8(&output.stdout)? .lines() .filter(|l| !l.is_empty()) + .filter(|l| is_indexable_file(Path::new(l))) + .map(|s| s.to_string()) .collect(); if staged_files.is_empty() { - println!("No staged files to index."); + println!("No indexable staged files."); return Ok(()); } - println!("Indexing {} staged file(s) with '{}'...", staged_files.len(), model_name); - for file in &staged_files { - println!(" {}", file); + // Try to connect to Ollama + let llm_config = LocalLlmConfig { + model: model_name.to_string(), + use_external: true, + ..Default::default() + }; + + let llm = OllamaLlm::new(&llm_config); + if let Err(_) = llm.start() { + // Silently skip if Ollama not available (pre-commit hook shouldn't block) + return Ok(()); } - // TODO: Implement actual indexing + println!("Indexing {} staged file(s)...", staged_files.len()); + + let indexer_config = IndexerConfig { + model: model_name.to_string(), + ..Default::default() + }; + let indexer = Indexer::new(llm, indexer_config); + + for file_path in &staged_files { + let path = Path::new(file_path); + if path.exists() { + match indexer.index_and_store(path, &store) { + Ok(result) => { + println!(" {} - {} symbols", file_path, result.symbols.len()); + } + Err(e) => { + println!(" {} - error: {}", file_path, e); + } + } + } + } } IndexCommands::File { path, model } => { let model_name = model.as_deref().unwrap_or("qwen2.5:3b"); + let file_path = Path::new(&path); - if !Path::new(&path).exists() { + if !file_path.exists() { println!("File not found: {}", path); return Ok(()); } + // Try to connect to Ollama + let llm_config = LocalLlmConfig { + model: model_name.to_string(), + use_external: true, + ..Default::default() + }; + + let llm = OllamaLlm::new(&llm_config); + if let Err(e) = llm.start() { + println!("Ollama not available: {}", e); + println!("\nStart Ollama first: ollama serve"); + return Ok(()); + } + println!("Indexing '{}' with '{}'...", path, model_name); - // TODO: Implement single file indexing + let indexer_config = IndexerConfig { + model: model_name.to_string(), + ..Default::default() + }; + let indexer = Indexer::new(llm, indexer_config); + + match indexer.index_and_store(file_path, &store) { + Ok(result) => { + println!("\nSummary: {}", result.summary.unwrap_or_default()); + if let Some(rel) = &result.relationships { + println!("\nRelationships:\n{}", rel); + } + println!("\nSymbols ({}):", result.symbols.len()); + for sym in &result.symbols { + let lines = match (sym.start_line, sym.end_line) { + (Some(s), Some(e)) => format!(" (lines {}-{})", s, e), + (Some(s), None) => format!(" (line {})", s), + _ => String::new(), + }; + println!(" {} ({}){}", sym.name, sym.kind, lines); + } + } + Err(e) => { + println!("Error: {}", e); + } + } } IndexCommands::Refresh { model } => { let model_name = model.as_deref().unwrap_or("qwen2.5:3b"); - - // Get current realm (default to "default" for single-repo) let realm = "default"; let (file_count, symbol_count) = store.get_index_stats(realm)?; @@ -1615,10 +1728,67 @@ async fn handle_index_command(command: IndexCommands) -> Result<()> { return Ok(()); } - println!("Checking for stale entries..."); - println!("(Refresh with model '{}')", model_name); + // Get all indexed files and check which are stale + let indexed_files = store.list_file_index(realm, None)?; + let mut stale_files = Vec::new(); - // TODO: Implement refresh logic - compare hashes + for entry in &indexed_files { + let path = Path::new(&entry.file_path); + if path.exists() { + if let Ok(content) = std::fs::read_to_string(path) { + let current_hash = hash_file_content(&content); + if current_hash != entry.file_hash { + stale_files.push(entry.file_path.clone()); + } + } + } + } + + if stale_files.is_empty() { + println!("All indexed files are up to date."); + return Ok(()); + } + + println!("Found {} stale file(s)", stale_files.len()); + + // Try to connect to Ollama + let llm_config = LocalLlmConfig { + model: model_name.to_string(), + use_external: true, + ..Default::default() + }; + + let llm = OllamaLlm::new(&llm_config); + if let Err(e) = llm.start() { + println!("Ollama not available: {}", e); + println!("\nStale files:"); + for f in &stale_files { + println!(" {}", f); + } + return Ok(()); + } + + println!("Re-indexing stale files with '{}'...\n", model_name); + + let indexer_config = IndexerConfig { + model: model_name.to_string(), + ..Default::default() + }; + let indexer = Indexer::new(llm, indexer_config); + + for file_path in &stale_files { + let path = Path::new(file_path); + print!(" {} ... ", file_path); + + match indexer.index_and_store(path, &store) { + Ok(result) => { + println!("{} symbols", result.symbols.len()); + } + Err(e) => { + println!("error: {}", e); + } + } + } } IndexCommands::InstallHook => { @@ -1669,6 +1839,51 @@ blue index diff 2>/dev/null || true Ok(()) } +/// Collect all indexable files in a directory +fn collect_indexable_files(dir: &std::path::Path) -> Result> { + use blue_core::{is_indexable_file, should_skip_dir}; + use std::fs; + + let mut files = Vec::new(); + + fn walk_dir(dir: &std::path::Path, files: &mut Vec) -> Result<()> { + if !dir.is_dir() { + return Ok(()); + } + + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + + if path.is_dir() { + if !should_skip_dir(name) { + walk_dir(&path, files)?; + } + } else if is_indexable_file(&path) { + if let Some(s) = path.to_str() { + files.push(s.to_string()); + } + } + } + Ok(()) + } + + walk_dir(dir, &mut files)?; + files.sort(); + Ok(files) +} + +/// Hash file content for staleness detection +fn hash_file_content(content: &str) -> String { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + content.hash(&mut hasher); + format!("{:x}", hasher.finish()) +} + async fn handle_search_command(query: &str, symbols_only: bool, limit: usize) -> Result<()> { use blue_core::store::DocumentStore; @@ -1798,49 +2013,3 @@ async fn handle_impact_command(file: &str) -> Result<()> { Ok(()) } - -fn count_indexable_files(dir: &std::path::Path) -> Result { - use std::fs; - use std::path::Path; - - let mut count = 0; - - // File extensions we care about - let extensions: &[&str] = &[ - "rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp", "h", "hpp", - "rb", "php", "swift", "kt", "scala", "clj", "ex", "exs", "erl", "hs", - "ml", "mli", "sql", "sh", "bash", "zsh", "yaml", "yml", "toml", "json", - ]; - - // Directories to skip - let skip_dirs: &[&str] = &[ - "node_modules", "target", ".git", "__pycache__", "venv", ".venv", - "dist", "build", ".next", ".nuxt", "vendor", ".cargo", - ]; - - fn walk_dir(dir: &Path, extensions: &[&str], skip_dirs: &[&str], count: &mut usize) -> Result<()> { - if !dir.is_dir() { - return Ok(()); - } - - for entry in fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); - - if path.is_dir() { - if !skip_dirs.contains(&name) && !name.starts_with('.') { - walk_dir(&path, extensions, skip_dirs, count)?; - } - } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - if extensions.contains(&ext) { - *count += 1; - } - } - } - Ok(()) - } - - walk_dir(dir, extensions, skip_dirs, &mut count)?; - Ok(count) -} diff --git a/crates/blue-core/src/indexer.rs b/crates/blue-core/src/indexer.rs new file mode 100644 index 0000000..9895387 --- /dev/null +++ b/crates/blue-core/src/indexer.rs @@ -0,0 +1,454 @@ +//! Semantic file indexer (RFC 0010) +//! +//! Uses Ollama with qwen2.5:3b to analyze source files and extract: +//! - Summary: one-sentence description +//! - Relationships: dependencies and connections to other files +//! - Symbols: functions, structs, classes with line numbers + +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tracing::{debug, info, warn}; + +use crate::store::{DocumentStore, FileIndexEntry, SymbolIndexEntry}; +use crate::{CompletionOptions, LlmError, LlmProvider}; + +/// Default model for indexing +pub const DEFAULT_INDEX_MODEL: &str = "qwen2.5:3b"; + +/// Maximum file size in lines before partial indexing +pub const MAX_FILE_LINES: usize = 1000; + +/// Indexer configuration +#[derive(Debug, Clone)] +pub struct IndexerConfig { + pub model: String, + pub realm: String, + pub repo: String, + pub max_tokens: usize, + pub temperature: f32, +} + +impl Default for IndexerConfig { + fn default() -> Self { + Self { + model: DEFAULT_INDEX_MODEL.to_string(), + realm: "default".to_string(), + repo: "default".to_string(), + max_tokens: 2048, + temperature: 0.1, // Low temperature for consistent structured output + } + } +} + +/// Result of indexing a file +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct IndexResult { + pub file_path: String, + pub file_hash: String, + pub summary: Option, + pub relationships: Option, + pub symbols: Vec, + pub is_partial: bool, + pub error: Option, +} + +/// A parsed symbol from AI output +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParsedSymbol { + pub name: String, + pub kind: String, + pub start_line: Option, + pub end_line: Option, + pub description: Option, +} + +/// The indexer that uses LLM to analyze files +pub struct Indexer { + provider: P, + config: IndexerConfig, +} + +impl Indexer

{ + /// Create a new indexer with the given LLM provider + pub fn new(provider: P, config: IndexerConfig) -> Self { + Self { provider, config } + } + + /// Index a single file and return the result + pub fn index_file(&self, file_path: &Path) -> Result { + let path_str = file_path.to_string_lossy().to_string(); + + // Read file contents + let content = std::fs::read_to_string(file_path) + .map_err(|e| IndexerError::FileRead(path_str.clone(), e.to_string()))?; + + // Calculate hash + let file_hash = hash_content(&content); + + // Check file size + let line_count = content.lines().count(); + let is_partial = line_count > MAX_FILE_LINES; + + let content_to_index = if is_partial { + // Take first MAX_FILE_LINES lines + content.lines().take(MAX_FILE_LINES).collect::>().join("\n") + } else { + content.clone() + }; + + // Generate prompt + let prompt = generate_index_prompt(&path_str, &content_to_index, is_partial); + + // Call LLM + let options = CompletionOptions { + max_tokens: self.config.max_tokens, + temperature: self.config.temperature, + stop_sequences: vec!["```".to_string()], // Stop at end of YAML block + }; + + let completion = self.provider.complete(&prompt, &options) + .map_err(|e| IndexerError::LlmError(e))?; + + // Parse YAML response + let parsed = parse_index_response(&completion.text); + + Ok(IndexResult { + file_path: path_str, + file_hash, + summary: parsed.summary, + relationships: parsed.relationships, + symbols: parsed.symbols, + is_partial, + error: parsed.error, + }) + } + + /// Index a file and store in the database + pub fn index_and_store( + &self, + file_path: &Path, + store: &DocumentStore, + ) -> Result { + let result = self.index_file(file_path)?; + + // Create file index entry + let mut entry = FileIndexEntry::new( + &self.config.realm, + &self.config.repo, + &result.file_path, + &result.file_hash, + ); + entry.summary = result.summary.clone(); + entry.relationships = result.relationships.clone(); + + // Store in database + let file_id = store.upsert_file_index(&entry) + .map_err(|e| IndexerError::StoreError(e.to_string()))?; + + // Convert and store symbols + let symbols: Vec = result.symbols.iter().map(|s| { + SymbolIndexEntry { + id: None, + file_id, + name: s.name.clone(), + kind: s.kind.clone(), + start_line: s.start_line, + end_line: s.end_line, + description: s.description.clone(), + } + }).collect(); + + store.set_file_symbols(file_id, &symbols) + .map_err(|e| IndexerError::StoreError(e.to_string()))?; + + info!("Indexed {} with {} symbols", result.file_path, symbols.len()); + + Ok(result) + } + + /// Check if a file needs re-indexing + pub fn needs_indexing(&self, file_path: &Path, store: &DocumentStore) -> Result { + let path_str = file_path.to_string_lossy().to_string(); + + // Read file and calculate hash + let content = std::fs::read_to_string(file_path) + .map_err(|e| IndexerError::FileRead(path_str.clone(), e.to_string()))?; + let current_hash = hash_content(&content); + + // Check against stored hash + store.is_file_stale(&self.config.realm, &self.config.repo, &path_str, ¤t_hash) + .map_err(|e| IndexerError::StoreError(e.to_string())) + } +} + +/// Generate the indexing prompt +fn generate_index_prompt(file_path: &str, content: &str, is_partial: bool) -> String { + let partial_note = if is_partial { + "\n\nNote: This is a large file. Only the first 1000 lines are shown. Include a note about this in the summary." + } else { + "" + }; + + format!( + r#"Analyze this source file and provide structured information about it. + +File: {file_path}{partial_note} + +``` +{content} +``` + +Provide your analysis as YAML with this exact structure: + +```yaml +summary: "One sentence describing what this file does" + +relationships: | + Describe how this file relates to other files. + List imports, dependencies, and what uses this file. + Be specific about file names when visible. + +symbols: + - name: "SymbolName" + kind: "function|struct|class|enum|const|trait|interface|type|method" + start_line: 10 + end_line: 25 + description: "What this symbol does" +``` + +Rules: +- Summary must be ONE sentence +- Relationships should mention specific file names when imports are visible +- Only include significant symbols (skip trivial helpers, private internals) +- Line numbers must be accurate +- Kind must be one of: function, struct, class, enum, const, trait, interface, type, method +- Output valid YAML only"# + ) +} + +/// Parsed response from the LLM +#[derive(Debug, Default)] +struct ParsedResponse { + summary: Option, + relationships: Option, + symbols: Vec, + error: Option, +} + +/// Parse the YAML response from the LLM +fn parse_index_response(response: &str) -> ParsedResponse { + // Try to find YAML block + let yaml_content = if let Some(start) = response.find("```yaml") { + let after_marker = &response[start + 7..]; + if let Some(end) = after_marker.find("```") { + after_marker[..end].trim() + } else { + after_marker.trim() + } + } else if let Some(start) = response.find("summary:") { + // No code fence, but starts with summary + response[start..].trim() + } else { + response.trim() + }; + + // Parse YAML + match serde_yaml::from_str::(yaml_content) { + Ok(value) => { + let summary = value.get("summary") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + let relationships = value.get("relationships") + .and_then(|v| v.as_str()) + .map(|s| s.trim().to_string()); + + let symbols = value.get("symbols") + .and_then(|v| v.as_sequence()) + .map(|seq| { + seq.iter().filter_map(|item| { + let name = item.get("name")?.as_str()?.to_string(); + let kind = item.get("kind")?.as_str()?.to_string(); + + Some(ParsedSymbol { + name, + kind, + start_line: item.get("start_line") + .and_then(|v| v.as_i64()) + .map(|n| n as i32), + end_line: item.get("end_line") + .and_then(|v| v.as_i64()) + .map(|n| n as i32), + description: item.get("description") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + }) + }).collect() + }) + .unwrap_or_default(); + + ParsedResponse { + summary, + relationships, + symbols, + error: None, + } + } + Err(e) => { + warn!("Failed to parse YAML response: {}", e); + debug!("Response was: {}", yaml_content); + + ParsedResponse { + summary: None, + relationships: None, + symbols: vec![], + error: Some(format!("YAML parse error: {}", e)), + } + } + } +} + +/// Calculate hash of file content +fn hash_content(content: &str) -> String { + let mut hasher = DefaultHasher::new(); + content.hash(&mut hasher); + format!("{:x}", hasher.finish()) +} + +/// Indexer errors +#[derive(Debug, thiserror::Error)] +pub enum IndexerError { + #[error("Failed to read file '{0}': {1}")] + FileRead(String, String), + + #[error("LLM error: {0}")] + LlmError(#[from] LlmError), + + #[error("Store error: {0}")] + StoreError(String), + + #[error("Index error: {0}")] + Other(String), +} + +/// File extensions we should index +pub fn is_indexable_file(path: &Path) -> bool { + let extensions: &[&str] = &[ + "rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp", "h", "hpp", + "rb", "php", "swift", "kt", "scala", "clj", "ex", "exs", "erl", "hs", + "ml", "mli", "sql", "sh", "bash", "zsh", "yaml", "yml", "toml", "json", + ]; + + path.extension() + .and_then(|e| e.to_str()) + .map(|e| extensions.contains(&e)) + .unwrap_or(false) +} + +/// Directories to skip when indexing +pub fn should_skip_dir(name: &str) -> bool { + let skip_dirs: &[&str] = &[ + "node_modules", "target", ".git", "__pycache__", "venv", ".venv", + "dist", "build", ".next", ".nuxt", "vendor", ".cargo", ".blue", + ]; + + skip_dirs.contains(&name) || name.starts_with('.') +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hash_content() { + let hash1 = hash_content("hello"); + let hash2 = hash_content("hello"); + let hash3 = hash_content("world"); + + assert_eq!(hash1, hash2); + assert_ne!(hash1, hash3); + } + + #[test] + fn test_is_indexable_file() { + assert!(is_indexable_file(Path::new("foo.rs"))); + assert!(is_indexable_file(Path::new("bar.py"))); + assert!(is_indexable_file(Path::new("baz.ts"))); + assert!(!is_indexable_file(Path::new("readme.md"))); + assert!(!is_indexable_file(Path::new("image.png"))); + } + + #[test] + fn test_should_skip_dir() { + assert!(should_skip_dir("node_modules")); + assert!(should_skip_dir("target")); + assert!(should_skip_dir(".git")); + assert!(should_skip_dir(".hidden")); + assert!(!should_skip_dir("src")); + assert!(!should_skip_dir("lib")); + } + + #[test] + fn test_parse_index_response_valid() { + let response = r#"```yaml +summary: "This file handles user authentication" + +relationships: | + Imports auth module from ./auth.rs + Used by main.rs for login flow + +symbols: + - name: "authenticate" + kind: "function" + start_line: 10 + end_line: 25 + description: "Validates user credentials" +```"#; + + let parsed = parse_index_response(response); + assert_eq!(parsed.summary, Some("This file handles user authentication".to_string())); + assert!(parsed.relationships.is_some()); + assert_eq!(parsed.symbols.len(), 1); + assert_eq!(parsed.symbols[0].name, "authenticate"); + assert_eq!(parsed.symbols[0].kind, "function"); + assert_eq!(parsed.symbols[0].start_line, Some(10)); + } + + #[test] + fn test_parse_index_response_no_fence() { + let response = r#"summary: "Test file" + +relationships: | + No dependencies + +symbols: []"#; + + let parsed = parse_index_response(response); + assert_eq!(parsed.summary, Some("Test file".to_string())); + assert!(parsed.symbols.is_empty()); + } + + #[test] + fn test_parse_index_response_invalid() { + let response = "this is not valid yaml { broken }"; + let parsed = parse_index_response(response); + assert!(parsed.error.is_some()); + } + + #[test] + fn test_generate_index_prompt() { + let prompt = generate_index_prompt("test.rs", "fn main() {}", false); + assert!(prompt.contains("test.rs")); + assert!(prompt.contains("fn main()")); + assert!(!prompt.contains("large file")); + } + + #[test] + fn test_generate_index_prompt_partial() { + let prompt = generate_index_prompt("test.rs", "fn main() {}", true); + assert!(prompt.contains("large file")); + } +} diff --git a/crates/blue-core/src/lib.rs b/crates/blue-core/src/lib.rs index ae2aaa6..7c59821 100644 --- a/crates/blue-core/src/lib.rs +++ b/crates/blue-core/src/lib.rs @@ -15,6 +15,7 @@ const _BLUE_SECRET_NAME: &str = "Sheepey"; // pronounced "Shee-paay" pub mod daemon; pub mod documents; +pub mod indexer; pub mod llm; pub mod realm; pub mod repo; @@ -24,6 +25,7 @@ pub mod voice; pub mod workflow; pub use documents::{Adr, Audit, AuditFinding, AuditSeverity, AuditType, Decision, Rfc, Spike, SpikeOutcome, Status, Task, update_markdown_status}; +pub use indexer::{Indexer, IndexerConfig, IndexerError, IndexResult, ParsedSymbol, is_indexable_file, should_skip_dir, DEFAULT_INDEX_MODEL, MAX_FILE_LINES}; pub use llm::{CompletionOptions, CompletionResult, LlmBackendChoice, LlmConfig, LlmError, LlmManager, LlmProvider, LlmProviderChoice, LocalLlmConfig, ApiLlmConfig, KeywordLlm, MockLlm, ProviderStatus}; pub use repo::{detect_blue, BlueHome, RepoError, WorktreeInfo}; pub use state::{ItemType, ProjectState, StateError, StatusSummary, WorkItem};