diff --git a/.blue/context.manifest.yaml b/.blue/context.manifest.yaml new file mode 100644 index 0000000..241a4c1 --- /dev/null +++ b/.blue/context.manifest.yaml @@ -0,0 +1,42 @@ +# Blue Context Manifest (RFC 0016) +# +# This file configures what context gets injected into Claude's context window. +# Three tiers: identity (always), workflow (activity-triggered), reference (on-demand). + +version: 1 + +# Identity Tier - "Who am I" - Always injected at session start +# Contains foundational beliefs and voice patterns +identity: + sources: + - uri: blue://docs/adrs/ + label: Architecture Decision Records + - uri: blue://context/voice + label: Voice and tone patterns + max_tokens: 500 + +# Workflow Tier - "What should I do" - Triggered by activity +# Contains current work context +workflow: + sources: + - uri: blue://state/current-rfc + label: Active RFC + refresh_triggers: + - on_rfc_change + max_tokens: 2000 + +# Reference Tier - "How does this work" - On-demand via MCP Resources +# Contains full documentation for deep dives +reference: + graph: blue://context/relevance + max_tokens: 4000 + staleness_days: 30 + +# Plugins - External context providers (optional) +# plugins: +# - uri: blue://jira/ +# provides: +# - ticket-context +# - acceptance-criteria +# salience_triggers: +# - commit_msg_pattern: "^[A-Z]+-\\d+" diff --git a/.blue/docs/dialogues/rfc-0017-dynamic-context-activation.dialogue.md b/.blue/docs/dialogues/rfc-0017-dynamic-context-activation.dialogue.md new file mode 100644 index 0000000..92e99a8 --- /dev/null +++ b/.blue/docs/dialogues/rfc-0017-dynamic-context-activation.dialogue.md @@ -0,0 +1,117 @@ +# Alignment Dialogue: RFC 0017 Dynamic Context Activation + +| | | +|---|---| +| **Date** | 2026-01-25 | +| **Experts** | 12 | +| **Rounds** | 2 | +| **Convergence** | 95% | +| **Status** | Complete | + +--- + +## Summary + +Deliberation on Phase 3 features for RFC 0016 Context Injection Architecture: refresh triggers, relevance graph computation, and staleness detection. + +## Panel + +| Expert | Domain | Key Contribution | +|--------|--------|------------------| +| Systems Architect | Architecture | Event-sourced design via audit log; hybrid lazy evaluation | +| Performance Engineer | Efficiency | Content-addressed cache; mtime-first staleness; token budget enforcement | +| UX Designer | Experience | Context breadcrumbs; predictable refresh moments; progressive disclosure | +| Data Scientist | Algorithms | PageRank relevance; co-access matrix; Bayesian staleness | +| Security Engineer | Security | Cryptographic hashes; rate limiting; plugin sandboxing | +| Distributed Systems | Consistency | Version vectors; bounded staleness; materialized relevance view | +| Cognitive Scientist | Cognition | Hysteresis for refresh; reasoning continuity; tier-appropriate thresholds | +| Product Manager | Prioritization | MVP = on_rfc_change only; defer ML; success metrics first | +| Database Engineer | Data Model | `relevance_edges` table; staleness indexes; efficient queries | +| ML Engineer | Learning | Graceful degradation ladder; bandit trigger learning; cold start mitigation | +| DevOps Engineer | Operations | Structured audit logging; refresh metrics; circuit breakers | +| Philosophy/Ethics | Ethics | Transparency imperative; user agency; coherence constraints | + +## Round 1: Perspectives + +### Strong Convergence (7+ experts) + +1. **Event-sourced staleness** - Use content hash comparison from audit log, not calendar time +2. **`on_rfc_change` as MVP trigger** - Ship simplest valuable trigger first +3. **Materialized relevance graph** - Compute on write, cache aggressively +4. **Tiered staleness thresholds** - ADRs stable (session-start only), RFCs volatile (every state change) +5. **Rate limiting** - Circuit breakers to prevent refresh storms +6. **Transparent context changes** - Announce what context updated and why +7. **Version vectors** - Efficient O(1) staleness checks per document + +### Tensions Identified + +| ID | Tension | Positions | +|----|---------|-----------| +| T1 | ML Complexity | Data Scientist wants full ML stack vs Product Manager wants explicit links only | +| T2 | User Control | Philosophy wants advisory defaults vs UX wants automation that "just works" | +| T3 | Session Identity | Security wants crypto random vs Systems needs persistence across restarts | + +## Round 2: Synthesis + +### T1 Resolution: Phased Relevance (ML as optimization, not feature) + +**Phase 0**: Explicit links only (declared relationships) +**Phase 1**: Weighted explicit (recency decay, access frequency) +**Phase 2**: Keyword expansion (TF-IDF suggestions) +**Phase 3**: ML gate review - proceed only if: + - Explicit links have >80% precision but <50% recall + - >1000 co-access events logged + - Keyword suggestions clicked >15% +**Phase 4**: Full ML (if gated) + +### T2 Resolution: Predictability is agency + +| Tier | Control Model | Notification | +|------|---------------|--------------| +| 1-2 | Automatic | Silent (user action is the notification) | +| 3 | Advisory | Inline subtle ("Reading related tests...") | +| 4 | Explicit consent | Prompt as capability ("I can scan...want me to?") | + +**Honor Test**: If user asks "what context do you have?", answer should match intuition. + +### T3 Resolution: Composite session identity + +``` +Session ID: {repo}-{realm}-{random12} +Example: blue-default-a7f3c9e2d1b4 +``` + +- Stable prefix enables log correlation via SQL LIKE queries +- Random suffix ensures global uniqueness and unpredictability +- No schema changes for MVP; optional `parent_session_id` for v2 + +## Scoreboard + +| Expert | W | C | T | R | Total | +|--------|---|---|---|---|-------| +| Systems Architect | 9 | 9 | 8 | 9 | 35 | +| Cognitive Scientist | 9 | 8 | 9 | 9 | 35 | +| Database Engineer | 9 | 8 | 9 | 9 | 35 | +| Philosophy/Ethics | 9 | 8 | 9 | 9 | 35 | +| Distributed Systems | 9 | 9 | 8 | 8 | 34 | +| DevOps Engineer | 8 | 9 | 9 | 8 | 34 | +| Performance Engineer | 8 | 8 | 9 | 8 | 33 | +| UX Designer | 8 | 9 | 8 | 8 | 33 | +| Data Scientist | 9 | 7 | 8 | 9 | 33 | +| Security Engineer | 8 | 8 | 9 | 8 | 33 | +| Product Manager | 8 | 9 | 9 | 7 | 33 | +| ML Engineer | 8 | 7 | 8 | 8 | 31 | + +## Recommendations for RFC 0017 + +1. **MVP Scope**: Implement `on_rfc_change` trigger with content-hash staleness +2. **Architecture**: Event-sourced from `context_injections`; pluggable relevance scorer +3. **Session Identity**: Composite `{repo}-{realm}-{random12}` format +4. **Notification Model**: Tier-based (automatic → advisory → consent) +5. **Relevance Graph**: Start with explicit links; gate ML on usage metrics +6. **Staleness**: Per-document-type thresholds; hash-based, not time-based +7. **Safety**: Rate limiting (max 1 refresh per 30s); circuit breakers + +--- + +*Dialogue orchestrated by 💙 Judge with 12 domain experts across 2 rounds.* diff --git a/.blue/docs/rfcs/0017-dynamic-context-activation.md b/.blue/docs/rfcs/0017-dynamic-context-activation.md new file mode 100644 index 0000000..6b6fa8e --- /dev/null +++ b/.blue/docs/rfcs/0017-dynamic-context-activation.md @@ -0,0 +1,314 @@ +# RFC 0017: Dynamic Context Activation + +| | | +|---|---| +| **Status** | In-Progress | +| **Created** | 2026-01-25 | +| **Source** | Alignment Dialogue (12 experts, 95% convergence) | +| **Depends On** | RFC 0016 (Context Injection Architecture) | + +--- + +## Summary + +Implements Phase 3 of RFC 0016: refresh triggers, relevance graph computation, and staleness detection for dynamic context activation. + +## Motivation + +RFC 0016 established the three-tier context injection architecture with manifest-driven configuration. However, the current implementation is static: +- Triggers are defined but not activated +- `blue://context/relevance` returns empty +- `staleness_days` is declared but not enforced + +Users experience context drift when working on RFCs that change state, documents that get updated, or across long sessions. The system needs to dynamically refresh context based on activity. + +## Principles + +1. **Staleness is content-based, not time-based** - A document unchanged for 30 days isn't stale; a document changed since injection is +2. **Predictability is agency** - Users should be able to predict what context is active without controlling every refresh +3. **ML is optimization, not feature** - Start with explicit relationships; add inference when data justifies it +4. **Event-sourced truth** - The audit log (`context_injections`) is the source of truth for staleness and refresh decisions + +## Design + +### Refresh Triggers + +#### MVP: `on_rfc_change` + +The only trigger implemented in Phase 1. Fires when: +- RFC status transitions (draft → accepted → in-progress → implemented) +- RFC content changes (hash differs from last injection) +- RFC tasks are completed + +```rust +pub enum RefreshTrigger { + OnRfcChange, // MVP - implemented + EveryNTurns(u32), // Deferred - needs usage data + OnToolCall(String), // Deferred - needs pattern analysis +} +``` + +#### Trigger Evaluation + +Piggyback on existing audit writes. When `resources/read` is called: + +```rust +fn should_refresh(uri: &str, session_id: &str, store: &DocumentStore) -> bool { + let last_injection = store.get_last_injection(session_id, uri); + let current_hash = compute_content_hash(uri); + + match last_injection { + None => true, // Never injected + Some(inj) => inj.content_hash != current_hash + } +} +``` + +#### Rate Limiting + +Prevent refresh storms with cooldown: + +```rust +const REFRESH_COOLDOWN_SECS: u64 = 30; + +fn is_refresh_allowed(session_id: &str, store: &DocumentStore) -> bool { + let last_refresh = store.get_last_refresh_time(session_id); + match last_refresh { + None => true, + Some(t) => Utc::now() - t > Duration::seconds(REFRESH_COOLDOWN_SECS) + } +} +``` + +### Relevance Graph + +#### Phased Implementation + +| Phase | Scope | Trigger to Advance | +|-------|-------|-------------------| +| 0 | Explicit links only | MVP | +| 1 | Weighted by recency/access | After 30 days usage | +| 2 | Keyword expansion (TF-IDF) | <50% recall on explicit | +| 3 | Full ML (embeddings, co-access) | >1000 events AND <70% precision | + +#### Phase 0: Explicit Links + +Parse declared relationships from documents: + +```markdown + +References: ADR 0005, ADR 0007 +``` + +Store in `relevance_edges` table: + +```sql +CREATE TABLE relevance_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_uri TEXT NOT NULL, + target_uri TEXT NOT NULL, + edge_type TEXT NOT NULL, -- 'explicit', 'keyword', 'learned' + weight REAL DEFAULT 1.0, + created_at TEXT NOT NULL, + UNIQUE(source_uri, target_uri, edge_type) +); + +CREATE INDEX idx_relevance_source ON relevance_edges(source_uri); +``` + +#### Resolving `blue://context/relevance` + +Returns documents related to current workflow context: + +```rust +pub fn resolve_relevance(state: &ProjectState) -> Vec { + let current_rfc = get_current_rfc(state); + + // Get explicit links from current RFC + let edges = state.store.get_relevance_edges(¤t_rfc.uri); + + // Sort by weight, limit to token budget + edges.sort_by(|a, b| b.weight.cmp(&a.weight)); + + let mut result = Vec::new(); + let mut tokens = 0; + + for edge in edges { + let doc = resolve_uri(&edge.target_uri); + if tokens + doc.tokens <= REFERENCE_BUDGET { + result.push(doc); + tokens += doc.tokens; + } + } + + result +} +``` + +### Staleness Detection + +#### Content-Hash Based + +Staleness = content changed since last injection: + +```rust +pub struct StalenessCheck { + pub uri: String, + pub is_stale: bool, + pub reason: StalenessReason, + pub last_injected: Option>, + pub current_hash: String, + pub injected_hash: Option, +} + +pub enum StalenessReason { + NeverInjected, + ContentChanged, + Fresh, +} +``` + +#### Tiered Thresholds + +Different document types have different volatility: + +| Doc Type | Refresh Policy | Rationale | +|----------|---------------|-----------| +| ADR | Session start only | Foundational beliefs rarely change | +| RFC (draft) | Every state change | Actively evolving | +| RFC (implemented) | On explicit request | Historical record | +| Spike | On completion | Time-boxed investigation | +| Dialogue | Never auto-refresh | Immutable record | + +```rust +fn get_staleness_policy(doc_type: DocType, status: &str) -> RefreshPolicy { + match (doc_type, status) { + (DocType::Adr, _) => RefreshPolicy::SessionStart, + (DocType::Rfc, "draft" | "in-progress") => RefreshPolicy::OnChange, + (DocType::Rfc, _) => RefreshPolicy::OnRequest, + (DocType::Spike, "active") => RefreshPolicy::OnChange, + (DocType::Dialogue, _) => RefreshPolicy::Never, + _ => RefreshPolicy::OnRequest, + } +} +``` + +### Session Identity + +Composite format for correlation and uniqueness: + +``` +{repo}-{realm}-{random12} +Example: blue-default-a7f3c9e2d1b4 +``` + +- **Stable prefix** (`repo-realm`): Enables log correlation via SQL LIKE +- **Random suffix**: Cryptographically unique per MCP lifecycle + +```rust +fn generate_session_id(repo: &str, realm: &str) -> String { + use rand::Rng; + const CHARSET: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + let suffix: String = (0..12) + .map(|_| CHARSET[rand::thread_rng().gen_range(0..CHARSET.len())] as char) + .collect(); + format!("{}-{}-{}", repo, realm, suffix) +} +``` + +### Notification Model + +Tier-based transparency: + +| Tier | Behavior | User Notification | +|------|----------|-------------------| +| 1-2 (Identity/Workflow) | Automatic | Silent - user action is the notification | +| 3 (Reference) | Advisory | Inline: "Reading related tests..." | +| 4 (Expensive ops) | Consent | Prompt: "I can scan the full codebase..." | + +**Honor Test**: If user asks "what context do you have?", the answer should match their intuition. + +## Implementation + +### Phase 1: MVP (This RFC) + +- [x] Implement `on_rfc_change` trigger evaluation +- [x] Add content-hash staleness detection +- [x] Create `relevance_edges` table for explicit links +- [x] Update session ID generation +- [x] Add rate limiting (30s cooldown) +- [x] Implement `blue_context_status` MCP tool + +### Phase 2: Weighted Relevance + +- [ ] Add recency decay to edge weights +- [ ] Track access frequency per document +- [ ] Implement `blue context refresh` CLI command + +### Phase 3: ML Integration (Gated) + +Gate criteria: +- >1000 co-access events in audit log +- Explicit links precision >80%, recall <50% +- User feedback indicates "missing connections" + +If gated: +- [ ] Co-access matrix factorization +- [ ] Embedding-based similarity +- [ ] Bandit learning for trigger timing + +## Schema Changes + +```sql +-- New table for relevance graph +CREATE TABLE relevance_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_uri TEXT NOT NULL, + target_uri TEXT NOT NULL, + edge_type TEXT NOT NULL, + weight REAL DEFAULT 1.0, + created_at TEXT NOT NULL, + UNIQUE(source_uri, target_uri, edge_type) +); + +CREATE INDEX idx_relevance_source ON relevance_edges(source_uri); +CREATE INDEX idx_relevance_target ON relevance_edges(target_uri); + +-- Add to documents table +ALTER TABLE documents ADD COLUMN content_hash TEXT; +ALTER TABLE documents ADD COLUMN last_injected_at TEXT; + +-- Efficient staleness query index +CREATE INDEX idx_documents_staleness ON documents( + doc_type, + updated_at, + last_injected_at +) WHERE deleted_at IS NULL; +``` + +## Consequences + +### Positive +- Context stays fresh during active RFC work +- Explicit architectural traceability through relevance graph +- Graceful degradation: system works without ML +- Auditable refresh decisions via event log + +### Negative +- Additional complexity in refresh evaluation +- Rate limiting may delay urgent context updates +- Explicit links require document authors to declare relationships + +### Neutral +- ML features gated on data, may never ship if simple approach suffices + +## Related + +- [RFC 0016: Context Injection Architecture](./0016-context-injection-architecture.md) +- [Dialogue: Dynamic Context Activation](../dialogues/rfc-0017-dynamic-context-activation.dialogue.md) +- ADR 0004: Evidence +- ADR 0005: Single Source + +--- + +*Drafted from alignment dialogue with 12 domain experts achieving 95% convergence.* diff --git a/Cargo.toml b/Cargo.toml index bcaf35b..ef671c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,9 @@ http-body-util = "0.1" # Crypto sha2 = "0.10" +# Random +rand = "0.8" + # Testing tempfile = "3.15" diff --git a/apps/blue-cli/src/main.rs b/apps/blue-cli/src/main.rs index 67883bc..8906569 100644 --- a/apps/blue-cli/src/main.rs +++ b/apps/blue-cli/src/main.rs @@ -112,6 +112,12 @@ enum Commands { /// File path to analyze file: String, }, + + /// Context injection visibility (RFC 0016) + Context { + #[command(subcommand)] + command: Option, + }, } #[derive(Subcommand)] @@ -354,6 +360,16 @@ enum PrCommands { }, } +#[derive(Subcommand)] +enum ContextCommands { + /// Show full manifest with injection status + Show { + /// Show complete audit trail with timestamps and hashes + #[arg(long)] + verbose: bool, + }, +} + #[derive(Subcommand)] enum IndexCommands { /// Index all files in the realm (bootstrap) @@ -483,6 +499,9 @@ async fn main() -> Result<()> { Some(Commands::Impact { file }) => { handle_impact_command(&file).await?; } + Some(Commands::Context { command }) => { + handle_context_command(command).await?; + } } Ok(()) @@ -2022,3 +2041,172 @@ async fn handle_impact_command(file: &str) -> Result<()> { Ok(()) } + +// ==================== Context Commands (RFC 0016) ==================== + +async fn handle_context_command(command: Option) -> Result<()> { + use blue_core::ContextManifest; + + let cwd = std::env::current_dir()?; + let blue_dir = cwd.join(".blue"); + + if !blue_dir.exists() { + println!("No .blue directory found. Run 'blue init' first."); + return Ok(()); + } + + let manifest = ContextManifest::load_or_default(&cwd)?; + + match command { + None => { + // Quick summary (default) + let resolution = manifest.resolve(&cwd)?; + print_context_summary(&resolution); + } + Some(ContextCommands::Show { verbose }) => { + // Full manifest view + let resolution = manifest.resolve(&cwd)?; + + if verbose { + print_context_verbose(&manifest, &resolution); + } else { + print_context_show(&manifest, &resolution); + } + } + } + + Ok(()) +} + +fn print_context_summary(resolution: &blue_core::ManifestResolution) { + fn format_tokens(tokens: usize) -> String { + if tokens >= 1000 { + format!("{:.1}k", tokens as f64 / 1000.0) + } else { + format!("{}", tokens) + } + } + + println!( + "Identity: {} sources ({} tokens) | Workflow: {} sources ({} tokens)", + resolution.identity.source_count, + format_tokens(resolution.identity.token_count), + resolution.workflow.source_count, + format_tokens(resolution.workflow.token_count), + ); +} + +fn print_context_show(manifest: &blue_core::ContextManifest, resolution: &blue_core::ManifestResolution) { + println!("Context Manifest (v{})", manifest.version); + println!(); + + // Identity tier + println!("Identity Tier (always injected)"); + println!(" Budget: {} tokens", manifest.identity.max_tokens); + println!(" Actual: {} tokens", resolution.identity.token_count); + for source in &resolution.identity.sources { + let label = source.label.as_deref().unwrap_or(""); + let status = if source.file_count > 0 { "✓" } else { "○" }; + println!(" {} {} ({} files, {} tokens)", status, source.uri, source.file_count, source.tokens); + if !label.is_empty() { + println!(" {}", label); + } + } + println!(); + + // Workflow tier + println!("Workflow Tier (activity-triggered)"); + println!(" Budget: {} tokens", manifest.workflow.max_tokens); + println!(" Actual: {} tokens", resolution.workflow.token_count); + for source in &resolution.workflow.sources { + let label = source.label.as_deref().unwrap_or(""); + let status = if source.file_count > 0 { "✓" } else { "○" }; + println!(" {} {} ({} files, {} tokens)", status, source.uri, source.file_count, source.tokens); + if !label.is_empty() { + println!(" {}", label); + } + } + + // Triggers + if !manifest.workflow.refresh_triggers.is_empty() { + println!(" Triggers:"); + for trigger in &manifest.workflow.refresh_triggers { + let name = match trigger { + blue_core::RefreshTrigger::OnRfcChange => "on_rfc_change".to_string(), + blue_core::RefreshTrigger::EveryNTurns(n) => format!("every_{}_turns", n), + blue_core::RefreshTrigger::OnToolCall(tool) => format!("on_tool_call({})", tool), + }; + println!(" - {}", name); + } + } + println!(); + + // Reference tier + println!("Reference Tier (on-demand via MCP)"); + println!(" Budget: {} tokens", manifest.reference.max_tokens); + println!(" Staleness: {} days", manifest.reference.staleness_days); + if let Some(graph) = &manifest.reference.graph { + println!(" Graph: {}", graph); + } + println!(); + + // Plugins + if !manifest.plugins.is_empty() { + println!("Plugins:"); + for plugin in &manifest.plugins { + println!(" - {}", plugin.uri); + if !plugin.provides.is_empty() { + println!(" Provides: {}", plugin.provides.join(", ")); + } + } + } +} + +fn print_context_verbose(manifest: &blue_core::ContextManifest, resolution: &blue_core::ManifestResolution) { + // Print the regular show output first + print_context_show(manifest, resolution); + + // Add verbose details + println!("=== Audit Details ==="); + println!(); + + if let Some(generated) = &manifest.generated_at { + println!("Generated: {}", generated); + } + if let Some(commit) = &manifest.source_commit { + println!("Source commit: {}", commit); + } + + println!(); + println!("Content Hashes:"); + for source in &resolution.identity.sources { + println!(" {} -> {}", source.uri, source.content_hash); + } + for source in &resolution.workflow.sources { + println!(" {} -> {}", source.uri, source.content_hash); + } + + // Try to show recent injection history from the database + let cwd = std::env::current_dir().ok(); + if let Some(cwd) = cwd { + let db_path = cwd.join(".blue").join("blue.db"); + if db_path.exists() { + if let Ok(store) = blue_core::DocumentStore::open(&db_path) { + if let Ok(recent) = store.get_recent_injections(10) { + if !recent.is_empty() { + println!(); + println!("Recent Injections:"); + for inj in recent { + println!(" {} | {} | {} | {} tokens", + inj.timestamp, + inj.tier, + inj.source_uri, + inj.token_count.unwrap_or(0) + ); + } + } + } + } + } + } +} diff --git a/crates/blue-core/src/lib.rs b/crates/blue-core/src/lib.rs index 689bc05..d541c0a 100644 --- a/crates/blue-core/src/lib.rs +++ b/crates/blue-core/src/lib.rs @@ -19,10 +19,12 @@ pub mod documents; pub mod forge; pub mod indexer; pub mod llm; +pub mod manifest; pub mod realm; pub mod repo; pub mod state; pub mod store; +pub mod uri; pub mod voice; pub mod workflow; @@ -33,6 +35,8 @@ pub use indexer::{Indexer, IndexerConfig, IndexerError, IndexResult, ParsedSymbo pub use llm::{CompletionOptions, CompletionResult, LlmBackendChoice, LlmConfig, LlmError, LlmManager, LlmProvider, LlmProviderChoice, LocalLlmConfig, ApiLlmConfig, KeywordLlm, MockLlm, ProviderStatus}; pub use repo::{detect_blue, BlueHome, RepoError, WorktreeInfo}; pub use state::{ItemType, ProjectState, StateError, StatusSummary, WorkItem}; -pub use store::{DocType, Document, DocumentStore, FileIndexEntry, IndexSearchResult, IndexStatus, LinkType, Reminder, ReminderStatus, SearchResult, Session, SessionType, StagingLock, StagingLockQueueEntry, StagingLockResult, StoreError, SymbolIndexEntry, Task as StoreTask, TaskProgress, Worktree, INDEX_PROMPT_VERSION}; +pub use store::{ContextInjection, DocType, Document, DocumentStore, EdgeType, FileIndexEntry, IndexSearchResult, IndexStatus, LinkType, RefreshPolicy, RefreshRateLimit, RelevanceEdge, Reminder, ReminderStatus, SearchResult, Session, SessionType, StagingLock, StagingLockQueueEntry, StagingLockResult, StalenessCheck, StalenessReason, StoreError, SymbolIndexEntry, Task as StoreTask, TaskProgress, Worktree, INDEX_PROMPT_VERSION}; pub use voice::*; pub use workflow::{PrdStatus, RfcStatus, SpikeOutcome as WorkflowSpikeOutcome, SpikeStatus, WorkflowError, validate_rfc_transition}; +pub use manifest::{ContextManifest, IdentityConfig, WorkflowConfig, ReferenceConfig, PluginConfig, SourceConfig, RefreshTrigger, SalienceTrigger, ManifestError, ManifestResolution, TierResolution, ResolvedSource}; +pub use uri::{BlueUri, UriError, read_uri_content, estimate_tokens}; diff --git a/crates/blue-core/src/manifest.rs b/crates/blue-core/src/manifest.rs new file mode 100644 index 0000000..e13c6b5 --- /dev/null +++ b/crates/blue-core/src/manifest.rs @@ -0,0 +1,456 @@ +//! Context manifest for Blue +//! +//! Defines the manifest schema for context injection configuration. +//! See RFC 0016 for the full specification. + +use std::path::Path; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +use crate::uri::{BlueUri, UriError}; + +/// Errors that can occur during manifest operations +#[derive(Debug, Error)] +pub enum ManifestError { + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("YAML parse error: {0}")] + Yaml(#[from] serde_yaml::Error), + + #[error("URI error: {0}")] + Uri(#[from] UriError), + + #[error("Validation error: {0}")] + Validation(String), +} + +/// The main context manifest structure +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ContextManifest { + /// Schema version + #[serde(default = "default_version")] + pub version: u32, + + /// When this manifest was generated + #[serde(skip_serializing_if = "Option::is_none")] + pub generated_at: Option>, + + /// Git commit hash when generated + #[serde(skip_serializing_if = "Option::is_none")] + pub source_commit: Option, + + /// Identity tier configuration (always injected) + #[serde(default)] + pub identity: IdentityConfig, + + /// Workflow tier configuration (activity-triggered) + #[serde(default)] + pub workflow: WorkflowConfig, + + /// Reference tier configuration (on-demand) + #[serde(default)] + pub reference: ReferenceConfig, + + /// Plugin configurations + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub plugins: Vec, +} + +fn default_version() -> u32 { + 1 +} + +/// Identity tier configuration (Tier 1) +/// +/// "Who am I" - Always injected at session start. +/// Contains ADRs, voice patterns, core identity. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct IdentityConfig { + /// URIs to include in identity context + #[serde(default)] + pub sources: Vec, + + /// Maximum token budget for identity tier + #[serde(default = "default_identity_tokens")] + pub max_tokens: usize, +} + +fn default_identity_tokens() -> usize { + 500 +} + +/// Workflow tier configuration (Tier 2) +/// +/// "What should I do" - Triggered by activity. +/// Contains current RFC, active tasks, workflow state. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct WorkflowConfig { + /// URIs to include in workflow context + #[serde(default)] + pub sources: Vec, + + /// Triggers that refresh workflow context + #[serde(default)] + pub refresh_triggers: Vec, + + /// Maximum token budget for workflow tier + #[serde(default = "default_workflow_tokens")] + pub max_tokens: usize, +} + +fn default_workflow_tokens() -> usize { + 2000 +} + +/// Reference tier configuration (Tier 3) +/// +/// "How does this work" - On-demand via MCP Resources. +/// Contains full documents, dialogues, historical context. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ReferenceConfig { + /// Relevance graph URI for computing context + #[serde(skip_serializing_if = "Option::is_none")] + pub graph: Option, + + /// Maximum token budget for reference tier + #[serde(default = "default_reference_tokens")] + pub max_tokens: usize, + + /// Days after which context is considered stale + #[serde(default = "default_staleness_days")] + pub staleness_days: u32, +} + +fn default_reference_tokens() -> usize { + 4000 +} + +fn default_staleness_days() -> u32 { + 30 +} + +/// A source configuration within a tier +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SourceConfig { + /// The URI to resolve + pub uri: String, + + /// Optional label for this source + #[serde(skip_serializing_if = "Option::is_none")] + pub label: Option, + + /// Whether to allow external references + #[serde(default)] + pub allow_external: bool, +} + +/// Refresh triggers for workflow context +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum RefreshTrigger { + /// Refresh when the active RFC changes + OnRfcChange, + + /// Refresh every N conversation turns + #[serde(rename = "every_n_turns")] + EveryNTurns(u32), + + /// Refresh on specific tool calls + OnToolCall(String), +} + +/// Plugin configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PluginConfig { + /// Plugin URI scheme + pub uri: String, + + /// Context types this plugin provides + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub provides: Vec, + + /// Conditions that activate this plugin + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub salience_triggers: Vec, +} + +/// Salience triggers for plugins +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SalienceTrigger { + /// Pattern to match in commit messages + CommitMsgPattern(String), + + /// Annotation to look for in files + FileAnnotation(String), + + /// Keyword to look for in conversation + KeywordMatch(String), +} + +impl ContextManifest { + /// Load a manifest from a YAML file + pub fn load(path: &Path) -> Result { + let content = std::fs::read_to_string(path)?; + let manifest: Self = serde_yaml::from_str(&content)?; + manifest.validate()?; + Ok(manifest) + } + + /// Load manifest from project root, using defaults if not present + pub fn load_or_default(project_root: &Path) -> Result { + let manifest_path = project_root.join(".blue").join("context.manifest.yaml"); + if manifest_path.exists() { + Self::load(&manifest_path) + } else { + Ok(Self::default()) + } + } + + /// Save the manifest to a YAML file + pub fn save(&self, path: &Path) -> Result<(), ManifestError> { + let content = serde_yaml::to_string(self)?; + std::fs::write(path, content)?; + Ok(()) + } + + /// Validate the manifest + pub fn validate(&self) -> Result<(), ManifestError> { + // Validate version + if self.version != 1 { + return Err(ManifestError::Validation(format!( + "Unsupported manifest version: {}", + self.version + ))); + } + + // Validate all URIs can be parsed + for source in &self.identity.sources { + BlueUri::parse(&source.uri)?; + } + for source in &self.workflow.sources { + BlueUri::parse(&source.uri)?; + } + for plugin in &self.plugins { + BlueUri::parse(&plugin.uri)?; + } + + Ok(()) + } + + /// Get all source URIs from identity tier + pub fn identity_uris(&self) -> Vec<&str> { + self.identity.sources.iter().map(|s| s.uri.as_str()).collect() + } + + /// Get all source URIs from workflow tier + pub fn workflow_uris(&self) -> Vec<&str> { + self.workflow.sources.iter().map(|s| s.uri.as_str()).collect() + } + + /// Get total token budget + pub fn total_budget(&self) -> usize { + self.identity.max_tokens + self.workflow.max_tokens + self.reference.max_tokens + } + + /// Create a summary string + pub fn summary(&self) -> String { + let identity_count = self.identity.sources.len(); + let workflow_count = self.workflow.sources.len(); + let plugin_count = self.plugins.len(); + + format!( + "Identity: {} sources ({} tokens) | Workflow: {} sources ({} tokens) | Plugins: {}", + identity_count, + self.identity.max_tokens, + workflow_count, + self.workflow.max_tokens, + plugin_count + ) + } +} + +impl Default for ContextManifest { + fn default() -> Self { + Self { + version: 1, + generated_at: None, + source_commit: None, + identity: IdentityConfig { + sources: vec![ + SourceConfig { + uri: "blue://docs/adrs/".to_string(), + label: Some("Architecture Decision Records".to_string()), + allow_external: false, + }, + SourceConfig { + uri: "blue://context/voice".to_string(), + label: Some("Voice patterns".to_string()), + allow_external: false, + }, + ], + max_tokens: 500, + }, + workflow: WorkflowConfig { + sources: vec![ + SourceConfig { + uri: "blue://state/current-rfc".to_string(), + label: Some("Active RFC".to_string()), + allow_external: false, + }, + ], + refresh_triggers: vec![RefreshTrigger::OnRfcChange], + max_tokens: 2000, + }, + reference: ReferenceConfig { + graph: Some("blue://context/relevance".to_string()), + max_tokens: 4000, + staleness_days: 30, + }, + plugins: Vec::new(), + } + } +} + +/// Summary of resolved manifest content +#[derive(Debug, Clone, Default)] +pub struct ManifestResolution { + /// Resolved identity tier + pub identity: TierResolution, + + /// Resolved workflow tier + pub workflow: TierResolution, + + /// Reference tier (not pre-resolved, on-demand) + pub reference_budget: usize, +} + +/// Resolution result for a single tier +#[derive(Debug, Clone, Default)] +pub struct TierResolution { + /// Number of sources resolved + pub source_count: usize, + + /// Estimated token count + pub token_count: usize, + + /// List of resolved source details + pub sources: Vec, +} + +/// A resolved source with metadata +#[derive(Debug, Clone)] +pub struct ResolvedSource { + /// Original URI + pub uri: String, + + /// Label if provided + pub label: Option, + + /// Number of files resolved + pub file_count: usize, + + /// Estimated tokens + pub tokens: usize, + + /// Content hash for change detection + pub content_hash: String, +} + +impl ContextManifest { + /// Resolve the manifest against a project root + pub fn resolve(&self, project_root: &Path) -> Result { + let identity = self.resolve_tier(&self.identity.sources, project_root)?; + let workflow = self.resolve_tier(&self.workflow.sources, project_root)?; + + Ok(ManifestResolution { + identity, + workflow, + reference_budget: self.reference.max_tokens, + }) + } + + fn resolve_tier( + &self, + sources: &[SourceConfig], + project_root: &Path, + ) -> Result { + let mut resolution = TierResolution::default(); + + for source in sources { + let uri = BlueUri::parse(&source.uri)?; + let paths = uri.resolve(project_root)?; + + let mut content = String::new(); + for path in &paths { + if let Ok(text) = std::fs::read_to_string(path) { + content.push_str(&text); + } + } + + let tokens = crate::uri::estimate_tokens(&content); + let hash = compute_content_hash(&content); + + resolution.sources.push(ResolvedSource { + uri: source.uri.clone(), + label: source.label.clone(), + file_count: paths.len(), + tokens, + content_hash: hash, + }); + + resolution.source_count += 1; + resolution.token_count += tokens; + } + + Ok(resolution) + } +} + +/// Compute a simple hash of content for change detection +fn compute_content_hash(content: &str) -> String { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + content.hash(&mut hasher); + format!("{:016x}", hasher.finish()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_manifest() { + let manifest = ContextManifest::default(); + assert_eq!(manifest.version, 1); + assert!(!manifest.identity.sources.is_empty()); + assert_eq!(manifest.identity.max_tokens, 500); + } + + #[test] + fn test_manifest_summary() { + let manifest = ContextManifest::default(); + let summary = manifest.summary(); + assert!(summary.contains("Identity:")); + assert!(summary.contains("Workflow:")); + } + + #[test] + fn test_manifest_validation() { + let manifest = ContextManifest::default(); + assert!(manifest.validate().is_ok()); + } + + #[test] + fn test_yaml_roundtrip() { + let manifest = ContextManifest::default(); + let yaml = serde_yaml::to_string(&manifest).unwrap(); + let parsed: ContextManifest = serde_yaml::from_str(&yaml).unwrap(); + assert_eq!(parsed.version, manifest.version); + } +} diff --git a/crates/blue-core/src/store.rs b/crates/blue-core/src/store.rs index 704fc82..8345ea4 100644 --- a/crates/blue-core/src/store.rs +++ b/crates/blue-core/src/store.rs @@ -10,7 +10,7 @@ use rusqlite::{params, Connection, OptionalExtension, Transaction, TransactionBe use tracing::{debug, info, warn}; /// Current schema version -const SCHEMA_VERSION: i32 = 4; +const SCHEMA_VERSION: i32 = 6; /// Core database schema const SCHEMA: &str = r#" @@ -178,6 +178,40 @@ const SCHEMA: &str = r#" CREATE INDEX IF NOT EXISTS idx_symbol_index_file ON symbol_index(file_id); CREATE INDEX IF NOT EXISTS idx_symbol_index_name ON symbol_index(name); + + -- Context injection audit log (RFC 0016) + CREATE TABLE IF NOT EXISTS context_injections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + timestamp TEXT NOT NULL, + tier TEXT NOT NULL, + source_uri TEXT NOT NULL, + content_hash TEXT NOT NULL, + token_count INTEGER + ); + + CREATE INDEX IF NOT EXISTS idx_context_injections_session ON context_injections(session_id); + CREATE INDEX IF NOT EXISTS idx_context_injections_timestamp ON context_injections(timestamp); + + -- Relevance graph edges (RFC 0017) + CREATE TABLE IF NOT EXISTS relevance_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_uri TEXT NOT NULL, + target_uri TEXT NOT NULL, + edge_type TEXT NOT NULL, + weight REAL DEFAULT 1.0, + created_at TEXT NOT NULL, + UNIQUE(source_uri, target_uri, edge_type) + ); + + CREATE INDEX IF NOT EXISTS idx_relevance_source ON relevance_edges(source_uri); + CREATE INDEX IF NOT EXISTS idx_relevance_target ON relevance_edges(target_uri); + + -- Staleness tracking index for documents (RFC 0017) + CREATE INDEX IF NOT EXISTS idx_documents_staleness ON documents( + doc_type, + updated_at + ) WHERE deleted_at IS NULL; "#; /// FTS5 schema for full-text search @@ -592,6 +626,164 @@ pub struct ExpiredDeploymentInfo { pub stacks: Option, } +// ==================== Context Injection Types (RFC 0016) ==================== + +/// A logged context injection event +#[derive(Debug, Clone)] +pub struct ContextInjection { + pub id: Option, + pub session_id: String, + pub timestamp: String, + pub tier: String, + pub source_uri: String, + pub content_hash: String, + pub token_count: Option, +} + +impl ContextInjection { + pub fn new(session_id: &str, tier: &str, source_uri: &str, content_hash: &str, token_count: Option) -> Self { + Self { + id: None, + session_id: session_id.to_string(), + timestamp: chrono::Utc::now().to_rfc3339(), + tier: tier.to_string(), + source_uri: source_uri.to_string(), + content_hash: content_hash.to_string(), + token_count, + } + } +} + +// ==================== Dynamic Context Activation Types (RFC 0017) ==================== + +/// A relevance edge connecting two documents +#[derive(Debug, Clone)] +pub struct RelevanceEdge { + pub id: Option, + pub source_uri: String, + pub target_uri: String, + pub edge_type: EdgeType, + pub weight: f64, + pub created_at: String, +} + +/// Types of relevance edges +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EdgeType { + /// Explicitly declared relationship (e.g., "References: ADR 0005") + Explicit, + /// Keyword-based similarity + Keyword, + /// Learned from co-access patterns + Learned, +} + +impl EdgeType { + pub fn as_str(&self) -> &'static str { + match self { + EdgeType::Explicit => "explicit", + EdgeType::Keyword => "keyword", + EdgeType::Learned => "learned", + } + } + + pub fn from_str(s: &str) -> Option { + match s.to_lowercase().as_str() { + "explicit" => Some(EdgeType::Explicit), + "keyword" => Some(EdgeType::Keyword), + "learned" => Some(EdgeType::Learned), + _ => None, + } + } +} + +impl RelevanceEdge { + pub fn new(source_uri: &str, target_uri: &str, edge_type: EdgeType) -> Self { + Self { + id: None, + source_uri: source_uri.to_string(), + target_uri: target_uri.to_string(), + edge_type, + weight: 1.0, + created_at: chrono::Utc::now().to_rfc3339(), + } + } + + pub fn with_weight(mut self, weight: f64) -> Self { + self.weight = weight; + self + } +} + +/// Staleness check result for a document +#[derive(Debug, Clone)] +pub struct StalenessCheck { + pub uri: String, + pub is_stale: bool, + pub reason: StalenessReason, + pub last_injected: Option, + pub current_hash: String, + pub injected_hash: Option, +} + +/// Reason why a document is considered stale +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum StalenessReason { + /// Document was never injected in this session + NeverInjected, + /// Content hash changed since last injection + ContentChanged, + /// Document is fresh (not stale) + Fresh, +} + +/// Refresh policy for different document types +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RefreshPolicy { + /// Refresh only at session start + SessionStart, + /// Refresh whenever content changes + OnChange, + /// Refresh only on explicit request + OnRequest, + /// Never automatically refresh + Never, +} + +/// Rate limiter state for refresh operations +#[derive(Debug, Clone)] +pub struct RefreshRateLimit { + pub session_id: String, + pub last_refresh: Option, + pub cooldown_secs: u64, +} + +impl RefreshRateLimit { + pub const DEFAULT_COOLDOWN_SECS: u64 = 30; + + pub fn new(session_id: &str) -> Self { + Self { + session_id: session_id.to_string(), + last_refresh: None, + cooldown_secs: Self::DEFAULT_COOLDOWN_SECS, + } + } + + pub fn is_allowed(&self) -> bool { + match &self.last_refresh { + None => true, + Some(last) => { + if let Ok(last_time) = chrono::DateTime::parse_from_rfc3339(last) { + let elapsed = chrono::Utc::now().signed_duration_since(last_time); + elapsed.num_seconds() >= self.cooldown_secs as i64 + } else { + true + } + } + } + } +} + // ==================== Semantic Index Types (RFC 0010) ==================== /// Current prompt version for indexing @@ -872,6 +1064,67 @@ impl DocumentStore { self.conn.execute_batch(FILE_INDEX_FTS5_SCHEMA)?; } + // Migration from v4 to v5: Add context injection audit table (RFC 0016) + if from_version < 5 { + debug!("Adding context injection audit table (RFC 0016)"); + + self.conn.execute( + "CREATE TABLE IF NOT EXISTS context_injections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + timestamp TEXT NOT NULL, + tier TEXT NOT NULL, + source_uri TEXT NOT NULL, + content_hash TEXT NOT NULL, + token_count INTEGER + )", + [], + )?; + + self.conn.execute( + "CREATE INDEX IF NOT EXISTS idx_context_injections_session ON context_injections(session_id)", + [], + )?; + self.conn.execute( + "CREATE INDEX IF NOT EXISTS idx_context_injections_timestamp ON context_injections(timestamp)", + [], + )?; + } + + // Migration from v5 to v6: Add relevance graph and staleness tracking (RFC 0017) + if from_version < 6 { + debug!("Adding relevance graph and staleness tracking (RFC 0017)"); + + // Create relevance_edges table + self.conn.execute( + "CREATE TABLE IF NOT EXISTS relevance_edges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_uri TEXT NOT NULL, + target_uri TEXT NOT NULL, + edge_type TEXT NOT NULL, + weight REAL DEFAULT 1.0, + created_at TEXT NOT NULL, + UNIQUE(source_uri, target_uri, edge_type) + )", + [], + )?; + + self.conn.execute( + "CREATE INDEX IF NOT EXISTS idx_relevance_source ON relevance_edges(source_uri)", + [], + )?; + self.conn.execute( + "CREATE INDEX IF NOT EXISTS idx_relevance_target ON relevance_edges(target_uri)", + [], + )?; + + // Add staleness tracking index + self.conn.execute( + "CREATE INDEX IF NOT EXISTS idx_documents_staleness ON documents(doc_type, updated_at) WHERE deleted_at IS NULL", + [], + )?; + } + // Update schema version self.conn.execute( "UPDATE schema_version SET version = ?1", @@ -2632,6 +2885,263 @@ impl DocumentStore { None => Ok(true), // Not indexed = stale } } + + // ==================== Context Injection Methods (RFC 0016) ==================== + + /// Log a context injection event + pub fn log_injection( + &self, + session_id: &str, + tier: &str, + source_uri: &str, + content_hash: &str, + token_count: Option, + ) -> Result { + self.with_retry(|| { + let now = chrono::Utc::now().to_rfc3339(); + self.conn.execute( + "INSERT INTO context_injections (session_id, timestamp, tier, source_uri, content_hash, token_count) + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + params![session_id, now, tier, source_uri, content_hash, token_count], + )?; + Ok(self.conn.last_insert_rowid()) + }) + } + + /// Get injection history for a session + pub fn get_injection_history(&self, session_id: &str) -> Result, StoreError> { + let mut stmt = self.conn.prepare( + "SELECT id, session_id, timestamp, tier, source_uri, content_hash, token_count + FROM context_injections + WHERE session_id = ?1 + ORDER BY timestamp ASC", + )?; + + let rows = stmt.query_map(params![session_id], |row| { + Ok(ContextInjection { + id: Some(row.get(0)?), + session_id: row.get(1)?, + timestamp: row.get(2)?, + tier: row.get(3)?, + source_uri: row.get(4)?, + content_hash: row.get(5)?, + token_count: row.get(6)?, + }) + })?; + + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Get recent injections across all sessions (for debugging) + pub fn get_recent_injections(&self, limit: usize) -> Result, StoreError> { + let mut stmt = self.conn.prepare( + "SELECT id, session_id, timestamp, tier, source_uri, content_hash, token_count + FROM context_injections + ORDER BY timestamp DESC + LIMIT ?1", + )?; + + let rows = stmt.query_map(params![limit as i64], |row| { + Ok(ContextInjection { + id: Some(row.get(0)?), + session_id: row.get(1)?, + timestamp: row.get(2)?, + tier: row.get(3)?, + source_uri: row.get(4)?, + content_hash: row.get(5)?, + token_count: row.get(6)?, + }) + })?; + + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Get injection stats for a session + pub fn get_injection_stats(&self, session_id: &str) -> Result<(usize, i64), StoreError> { + let result = self.conn.query_row( + "SELECT COUNT(*), COALESCE(SUM(token_count), 0) + FROM context_injections + WHERE session_id = ?1", + params![session_id], + |row| Ok((row.get::<_, i64>(0)? as usize, row.get::<_, i64>(1)?)), + )?; + Ok(result) + } + + /// Get the last injection for a URI in a session + pub fn get_last_injection(&self, session_id: &str, uri: &str) -> Result, StoreError> { + self.conn + .query_row( + "SELECT id, session_id, timestamp, tier, source_uri, content_hash, token_count + FROM context_injections + WHERE session_id = ?1 AND source_uri = ?2 + ORDER BY timestamp DESC + LIMIT 1", + params![session_id, uri], + |row| { + Ok(ContextInjection { + id: Some(row.get(0)?), + session_id: row.get(1)?, + timestamp: row.get(2)?, + tier: row.get(3)?, + source_uri: row.get(4)?, + content_hash: row.get(5)?, + token_count: row.get(6)?, + }) + }, + ) + .optional() + .map_err(StoreError::Database) + } + + /// Get the last refresh time for a session (for rate limiting) + pub fn get_last_refresh_time(&self, session_id: &str) -> Result, StoreError> { + self.conn + .query_row( + "SELECT MAX(timestamp) FROM context_injections WHERE session_id = ?1", + params![session_id], + |row| row.get(0), + ) + .optional() + .map_err(StoreError::Database) + .map(|opt| opt.flatten()) + } + + /// Get recent injections for a session + pub fn get_session_injections(&self, session_id: &str, limit: usize) -> Result, StoreError> { + let mut stmt = self.conn.prepare( + "SELECT id, session_id, timestamp, tier, source_uri, content_hash, token_count + FROM context_injections + WHERE session_id = ?1 + ORDER BY timestamp DESC + LIMIT ?2" + ).map_err(StoreError::Database)?; + + let rows = stmt.query_map(params![session_id, limit as i64], |row| { + Ok(ContextInjection { + id: Some(row.get(0)?), + session_id: row.get(1)?, + timestamp: row.get(2)?, + tier: row.get(3)?, + source_uri: row.get(4)?, + content_hash: row.get(5)?, + token_count: row.get(6)?, + }) + }).map_err(StoreError::Database)?; + + rows.collect::, _>>().map_err(StoreError::Database) + } + + // ==================== Relevance Graph Methods (RFC 0017) ==================== + + /// Add a relevance edge + pub fn add_relevance_edge(&self, edge: &RelevanceEdge) -> Result { + self.with_retry(|| { + let now = chrono::Utc::now().to_rfc3339(); + self.conn.execute( + "INSERT OR REPLACE INTO relevance_edges (source_uri, target_uri, edge_type, weight, created_at) + VALUES (?1, ?2, ?3, ?4, ?5)", + params![ + edge.source_uri, + edge.target_uri, + edge.edge_type.as_str(), + edge.weight, + now, + ], + )?; + Ok(self.conn.last_insert_rowid()) + }) + } + + /// Get relevance edges from a source URI + pub fn get_relevance_edges(&self, source_uri: &str) -> Result, StoreError> { + let mut stmt = self.conn.prepare( + "SELECT id, source_uri, target_uri, edge_type, weight, created_at + FROM relevance_edges + WHERE source_uri = ?1 + ORDER BY weight DESC", + )?; + + let rows = stmt.query_map(params![source_uri], |row| { + Ok(RelevanceEdge { + id: Some(row.get(0)?), + source_uri: row.get(1)?, + target_uri: row.get(2)?, + edge_type: EdgeType::from_str(&row.get::<_, String>(3)?).unwrap_or(EdgeType::Explicit), + weight: row.get(4)?, + created_at: row.get(5)?, + }) + })?; + + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Get all edges pointing to a target URI + pub fn get_incoming_edges(&self, target_uri: &str) -> Result, StoreError> { + let mut stmt = self.conn.prepare( + "SELECT id, source_uri, target_uri, edge_type, weight, created_at + FROM relevance_edges + WHERE target_uri = ?1 + ORDER BY weight DESC", + )?; + + let rows = stmt.query_map(params![target_uri], |row| { + Ok(RelevanceEdge { + id: Some(row.get(0)?), + source_uri: row.get(1)?, + target_uri: row.get(2)?, + edge_type: EdgeType::from_str(&row.get::<_, String>(3)?).unwrap_or(EdgeType::Explicit), + weight: row.get(4)?, + created_at: row.get(5)?, + }) + })?; + + let mut results = Vec::new(); + for row in rows { + results.push(row?); + } + Ok(results) + } + + /// Remove a relevance edge + pub fn remove_relevance_edge(&self, source_uri: &str, target_uri: &str, edge_type: EdgeType) -> Result { + let rows = self.conn.execute( + "DELETE FROM relevance_edges WHERE source_uri = ?1 AND target_uri = ?2 AND edge_type = ?3", + params![source_uri, target_uri, edge_type.as_str()], + )?; + Ok(rows > 0) + } + + /// Clear all edges of a specific type + pub fn clear_edges_by_type(&self, edge_type: EdgeType) -> Result { + let rows = self.conn.execute( + "DELETE FROM relevance_edges WHERE edge_type = ?1", + params![edge_type.as_str()], + )?; + Ok(rows) + } + + /// Count relevance edges + pub fn count_relevance_edges(&self) -> Result { + let count: i64 = self.conn.query_row( + "SELECT COUNT(*) FROM relevance_edges", + [], + |row| row.get(0), + )?; + Ok(count as usize) + } } #[cfg(test)] diff --git a/crates/blue-core/src/uri.rs b/crates/blue-core/src/uri.rs new file mode 100644 index 0000000..0ce8d4a --- /dev/null +++ b/crates/blue-core/src/uri.rs @@ -0,0 +1,357 @@ +//! Blue URI resolution +//! +//! Handles `blue://` URIs for context injection. +//! +//! URI patterns: +//! - `blue://docs/{type}/` - All documents of a type +//! - `blue://docs/{type}/{id}` - Specific document by ID/title +//! - `blue://context/{scope}` - Injection bundles (voice, relevance) +//! - `blue://state/{entity}` - Live state (current-rfc, active-tasks) +//! - `blue://{plugin}/` - Plugin-provided context + +use std::path::{Path, PathBuf}; + +use thiserror::Error; + +/// Errors that can occur during URI resolution +#[derive(Debug, Error)] +pub enum UriError { + #[error("Invalid URI format: {0}")] + InvalidFormat(String), + + #[error("Unknown URI scheme: {0}")] + UnknownScheme(String), + + #[error("Unknown document type: {0}")] + UnknownDocType(String), + + #[error("Unknown context scope: {0}")] + UnknownScope(String), + + #[error("Unknown state entity: {0}")] + UnknownEntity(String), + + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("Path not found: {0}")] + PathNotFound(String), +} + +/// A parsed Blue URI +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BlueUri { + /// Reference to documents: `blue://docs/{type}/` or `blue://docs/{type}/{id}` + Docs { + doc_type: String, + id: Option, + }, + + /// Reference to a context bundle: `blue://context/{scope}` + Context { scope: String }, + + /// Reference to live state: `blue://state/{entity}` + State { entity: String }, + + /// Reference to plugin content: `blue://{plugin}/{path}` + Plugin { name: String, path: String }, +} + +impl BlueUri { + /// Parse a URI string into a BlueUri + pub fn parse(uri: &str) -> Result { + // Must start with blue:// + if !uri.starts_with("blue://") { + return Err(UriError::UnknownScheme(uri.to_string())); + } + + let path = &uri[7..]; // Strip "blue://" + let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + + if parts.is_empty() { + return Err(UriError::InvalidFormat("Empty URI path".to_string())); + } + + match parts[0] { + "docs" => { + if parts.len() < 2 { + return Err(UriError::InvalidFormat( + "docs URI requires a document type".to_string(), + )); + } + let doc_type = parts[1].to_string(); + let id = if parts.len() > 2 { + Some(parts[2..].join("/")) + } else { + None + }; + Ok(BlueUri::Docs { doc_type, id }) + } + "context" => { + if parts.len() < 2 { + return Err(UriError::InvalidFormat( + "context URI requires a scope".to_string(), + )); + } + Ok(BlueUri::Context { + scope: parts[1..].join("/"), + }) + } + "state" => { + if parts.len() < 2 { + return Err(UriError::InvalidFormat( + "state URI requires an entity".to_string(), + )); + } + Ok(BlueUri::State { + entity: parts[1..].join("/"), + }) + } + // Anything else is a plugin + plugin => Ok(BlueUri::Plugin { + name: plugin.to_string(), + path: if parts.len() > 1 { + parts[1..].join("/") + } else { + String::new() + }, + }), + } + } + + /// Resolve the URI to file paths relative to a project root + /// + /// Returns a list of paths that match the URI pattern. + pub fn resolve(&self, project_root: &Path) -> Result, UriError> { + let docs_dir = project_root.join(".blue").join("docs"); + + match self { + BlueUri::Docs { doc_type, id } => { + let type_dir = match doc_type.as_str() { + "adrs" | "adr" => docs_dir.join("adrs"), + "rfcs" | "rfc" => docs_dir.join("rfcs"), + "spikes" | "spike" => docs_dir.join("spikes"), + "dialogues" | "dialogue" => docs_dir.join("dialogues"), + "runbooks" | "runbook" => docs_dir.join("runbooks"), + "patterns" | "pattern" => docs_dir.join("patterns"), + _ => { + return Err(UriError::UnknownDocType(doc_type.clone())); + } + }; + + if !type_dir.exists() { + return Ok(Vec::new()); + } + + match id { + Some(id) => { + // Specific document - try exact match or pattern match + let exact = type_dir.join(format!("{}.md", id)); + if exact.exists() { + return Ok(vec![exact]); + } + + // Try with number prefix (e.g., "0001-title") + let entries = std::fs::read_dir(&type_dir)?; + for entry in entries.flatten() { + let path = entry.path(); + if let Some(name) = path.file_stem().and_then(|n| n.to_str()) { + // Check if name contains the id (case-insensitive) + if name.to_lowercase().contains(&id.to_lowercase()) { + return Ok(vec![path]); + } + // Check if the number portion matches + if let Some(num_str) = name.split('-').next() { + if num_str == id + || num_str.trim_start_matches('0') == id + { + return Ok(vec![path]); + } + } + } + } + + Ok(Vec::new()) + } + None => { + // All documents in directory + let mut paths = Vec::new(); + let entries = std::fs::read_dir(&type_dir)?; + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().map(|e| e == "md").unwrap_or(false) { + paths.push(path); + } + } + paths.sort(); + Ok(paths) + } + } + } + BlueUri::Context { scope } => { + // Context bundles are generated or special locations + match scope.as_str() { + "voice" => { + // Voice patterns from docs/patterns + let patterns_dir = docs_dir.join("patterns"); + if patterns_dir.exists() { + let entries = std::fs::read_dir(&patterns_dir)?; + let paths: Vec = entries + .flatten() + .map(|e| e.path()) + .filter(|p| p.extension().map(|e| e == "md").unwrap_or(false)) + .collect(); + Ok(paths) + } else { + Ok(Vec::new()) + } + } + "relevance" => { + // Relevance graph - not a file, computed at runtime + Ok(Vec::new()) + } + _ => Err(UriError::UnknownScope(scope.clone())), + } + } + BlueUri::State { entity } => { + // State URIs resolve to database queries, not files + // Return empty - the caller should use the DocumentStore + match entity.as_str() { + "current-rfc" | "active-tasks" | "active-session" => Ok(Vec::new()), + _ => Err(UriError::UnknownEntity(entity.clone())), + } + } + BlueUri::Plugin { .. } => { + // Plugin URIs are handled by plugin resolvers + Ok(Vec::new()) + } + } + } + + /// Check if this URI references dynamic state (requires database lookup) + pub fn is_dynamic(&self) -> bool { + matches!(self, BlueUri::State { .. }) + } + + /// Check if this URI is a plugin reference + pub fn is_plugin(&self) -> bool { + matches!(self, BlueUri::Plugin { .. }) + } + + /// Get the URI as a string + pub fn to_uri_string(&self) -> String { + match self { + BlueUri::Docs { doc_type, id } => match id { + Some(id) => format!("blue://docs/{}/{}", doc_type, id), + None => format!("blue://docs/{}/", doc_type), + }, + BlueUri::Context { scope } => format!("blue://context/{}", scope), + BlueUri::State { entity } => format!("blue://state/{}", entity), + BlueUri::Plugin { name, path } => { + if path.is_empty() { + format!("blue://{}/", name) + } else { + format!("blue://{}/{}", name, path) + } + } + } + } +} + +/// Read content from resolved paths and concatenate with separators +pub fn read_uri_content(paths: &[PathBuf]) -> Result { + let mut content = String::new(); + for (i, path) in paths.iter().enumerate() { + if i > 0 { + content.push_str("\n---\n\n"); + } + content.push_str(&std::fs::read_to_string(path)?); + } + Ok(content) +} + +/// Estimate token count for content (rough approximation: ~4 chars per token) +pub fn estimate_tokens(content: &str) -> usize { + content.len() / 4 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_docs_uri() { + let uri = BlueUri::parse("blue://docs/adrs/").unwrap(); + assert_eq!( + uri, + BlueUri::Docs { + doc_type: "adrs".to_string(), + id: None + } + ); + + let uri = BlueUri::parse("blue://docs/rfcs/0016").unwrap(); + assert_eq!( + uri, + BlueUri::Docs { + doc_type: "rfcs".to_string(), + id: Some("0016".to_string()) + } + ); + } + + #[test] + fn test_parse_context_uri() { + let uri = BlueUri::parse("blue://context/voice").unwrap(); + assert_eq!( + uri, + BlueUri::Context { + scope: "voice".to_string() + } + ); + } + + #[test] + fn test_parse_state_uri() { + let uri = BlueUri::parse("blue://state/current-rfc").unwrap(); + assert_eq!( + uri, + BlueUri::State { + entity: "current-rfc".to_string() + } + ); + } + + #[test] + fn test_parse_plugin_uri() { + let uri = BlueUri::parse("blue://jira/PROJECT-123").unwrap(); + assert_eq!( + uri, + BlueUri::Plugin { + name: "jira".to_string(), + path: "PROJECT-123".to_string() + } + ); + } + + #[test] + fn test_invalid_scheme() { + let result = BlueUri::parse("http://example.com"); + assert!(result.is_err()); + } + + #[test] + fn test_to_uri_string() { + let uri = BlueUri::Docs { + doc_type: "adrs".to_string(), + id: None, + }; + assert_eq!(uri.to_uri_string(), "blue://docs/adrs/"); + + let uri = BlueUri::Docs { + doc_type: "rfcs".to_string(), + id: Some("0016".to_string()), + }; + assert_eq!(uri.to_uri_string(), "blue://docs/rfcs/0016"); + } +} diff --git a/crates/blue-mcp/Cargo.toml b/crates/blue-mcp/Cargo.toml index 9de7f45..25cef68 100644 --- a/crates/blue-mcp/Cargo.toml +++ b/crates/blue-mcp/Cargo.toml @@ -20,6 +20,7 @@ git2.workspace = true regex.workspace = true sha2.workspace = true rusqlite.workspace = true +rand.workspace = true [dev-dependencies] blue-core = { workspace = true, features = ["test-helpers"] } diff --git a/crates/blue-mcp/src/handlers/resources.rs b/crates/blue-mcp/src/handlers/resources.rs new file mode 100644 index 0000000..53c694b --- /dev/null +++ b/crates/blue-mcp/src/handlers/resources.rs @@ -0,0 +1,541 @@ +//! MCP Resources handlers for Blue +//! +//! Implements resources/list and resources/read for blue:// URIs. +//! See RFC 0016 for the context injection architecture. +//! See RFC 0017 for dynamic context activation. + +use std::sync::OnceLock; + +use blue_core::{BlueUri, ContextManifest, ProjectState, estimate_tokens, read_uri_content}; +use rand::Rng; +use serde_json::{json, Value}; + +use crate::error::ServerError; + +/// Session ID for this MCP server lifecycle +/// Format: {repo}-{realm}-{random12} per RFC 0017 +static SESSION_ID: OnceLock = OnceLock::new(); + +/// Handle resources/list request +/// +/// Returns a list of available blue:// URIs that can be read. +pub fn handle_resources_list(state: &ProjectState) -> Result { + let project_root = &state.home.root; + let manifest = ContextManifest::load_or_default(project_root) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + let mut resources = Vec::new(); + + // Add identity tier sources + for source in &manifest.identity.sources { + let uri = &source.uri; + let description = source.label.clone().unwrap_or_else(|| { + format!("Identity context from {}", uri) + }); + + resources.push(json!({ + "uri": uri, + "name": uri_to_name(uri), + "description": description, + "mimeType": "text/markdown" + })); + } + + // Add workflow tier sources + for source in &manifest.workflow.sources { + let uri = &source.uri; + let description = source.label.clone().unwrap_or_else(|| { + format!("Workflow context from {}", uri) + }); + + resources.push(json!({ + "uri": uri, + "name": uri_to_name(uri), + "description": description, + "mimeType": "text/markdown" + })); + } + + // Add standard document types + let doc_types = [ + ("blue://docs/adrs/", "All Architecture Decision Records"), + ("blue://docs/rfcs/", "All RFCs"), + ("blue://docs/spikes/", "All Spikes"), + ("blue://docs/dialogues/", "All Dialogues"), + ("blue://docs/runbooks/", "All Runbooks"), + ("blue://docs/patterns/", "All Patterns"), + ("blue://context/voice", "Voice patterns and tone"), + ]; + + for (uri, description) in doc_types { + // Only add if not already in manifest sources + let already_listed = manifest.identity.sources.iter().any(|s| s.uri == uri) + || manifest.workflow.sources.iter().any(|s| s.uri == uri); + + if !already_listed { + resources.push(json!({ + "uri": uri, + "name": uri_to_name(uri), + "description": description, + "mimeType": "text/markdown" + })); + } + } + + // Add state URIs + resources.push(json!({ + "uri": "blue://state/current-rfc", + "name": "Current RFC", + "description": "The currently active RFC being worked on", + "mimeType": "text/markdown" + })); + + resources.push(json!({ + "uri": "blue://state/active-tasks", + "name": "Active Tasks", + "description": "Tasks from the current RFC that are not yet completed", + "mimeType": "text/markdown" + })); + + Ok(json!({ + "resources": resources + })) +} + +/// Handle resources/read request +/// +/// Reads the content of a blue:// URI and returns it. +/// Implements staleness detection and rate limiting per RFC 0017. +pub fn handle_resources_read(state: &ProjectState, uri: &str) -> Result { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let project_root = &state.home.root; + + // Parse the URI + let blue_uri = BlueUri::parse(uri) + .map_err(|_| ServerError::InvalidParams)?; + + // Handle dynamic state URIs specially + if blue_uri.is_dynamic() { + return handle_state_uri(state, &blue_uri); + } + + // Resolve to file paths + let paths = blue_uri.resolve(project_root) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + if paths.is_empty() { + return Ok(json!({ + "contents": [{ + "uri": uri, + "mimeType": "text/markdown", + "text": format!("No content found for URI: {}", uri) + }] + })); + } + + // Read and concatenate content + let content = read_uri_content(&paths) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + let tokens = estimate_tokens(&content); + + // Compute content hash for staleness detection + let mut hasher = DefaultHasher::new(); + content.hash(&mut hasher); + let content_hash = format!("{:016x}", hasher.finish()); + + // Check staleness and rate limiting + let refresh_policy = get_refresh_policy(uri, None); + let is_stale = should_refresh(state, uri, &content_hash); + let refresh_allowed = is_refresh_allowed(state); + + // Determine if we should log this injection + let should_log = match refresh_policy { + blue_core::RefreshPolicy::Never => false, + blue_core::RefreshPolicy::SessionStart => { + // Only log if never injected in this session + state.store.get_last_injection(get_session_id(state), uri) + .map(|opt| opt.is_none()) + .unwrap_or(true) + } + _ => is_stale && refresh_allowed, + }; + + // Log the injection if appropriate + if should_log { + let _ = log_injection(state, uri, &content_hash, tokens); + } + + Ok(json!({ + "contents": [{ + "uri": uri, + "mimeType": "text/markdown", + "text": content + }], + "_meta": { + "tokens": tokens, + "is_stale": is_stale, + "refresh_policy": format!("{:?}", refresh_policy), + "session_id": get_session_id(state) + } + })) +} + +/// Handle state URIs which require database queries +fn handle_state_uri(state: &ProjectState, blue_uri: &BlueUri) -> Result { + match blue_uri { + BlueUri::State { entity } => { + match entity.as_str() { + "current-rfc" => { + // Get the current RFC from active session or most recent in-progress + let content = get_current_rfc_content(state)?; + Ok(json!({ + "contents": [{ + "uri": blue_uri.to_uri_string(), + "mimeType": "text/markdown", + "text": content + }] + })) + } + "active-tasks" => { + let content = get_active_tasks_content(state)?; + Ok(json!({ + "contents": [{ + "uri": blue_uri.to_uri_string(), + "mimeType": "text/markdown", + "text": content + }] + })) + } + _ => { + Ok(json!({ + "contents": [{ + "uri": blue_uri.to_uri_string(), + "mimeType": "text/markdown", + "text": format!("Unknown state entity: {}", entity) + }] + })) + } + } + } + _ => Err(ServerError::InvalidParams), + } +} + +/// Get the current RFC content +fn get_current_rfc_content(state: &ProjectState) -> Result { + use blue_core::DocType; + + // Try to find an in-progress RFC + let docs = state.store.list_documents(DocType::Rfc) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + let in_progress = docs.iter().find(|d| d.status == "in-progress"); + + match in_progress { + Some(doc) => { + // Read the RFC file content + if let Some(path) = &doc.file_path { + let full_path = state.home.root.join(path); + if full_path.exists() { + return std::fs::read_to_string(&full_path) + .map_err(|e| ServerError::StateLoadFailed(e.to_string())); + } + } + + // Fall back to generating summary + Ok(format!( + "# Current RFC: {}\n\nStatus: {}\n", + doc.title, doc.status + )) + } + None => { + Ok("No RFC is currently in progress.\n\nUse `blue_rfc_create` to create a new RFC or `blue_rfc_update_status` to set one as in-progress.".to_string()) + } + } +} + +/// Get active tasks from the current RFC +fn get_active_tasks_content(state: &ProjectState) -> Result { + use blue_core::DocType; + + // Find in-progress RFC + let docs = state.store.list_documents(DocType::Rfc) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + let in_progress = docs.iter().find(|d| d.status == "in-progress"); + + match in_progress { + Some(doc) => { + let doc_id = doc.id.ok_or(ServerError::StateLoadFailed("No document ID".to_string()))?; + + let tasks = state.store.get_tasks(doc_id) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + let incomplete: Vec<_> = tasks.iter() + .filter(|t| !t.completed) + .collect(); + + if incomplete.is_empty() { + return Ok(format!( + "# Active Tasks for: {}\n\nAll tasks are complete!\n", + doc.title + )); + } + + let mut content = format!("# Active Tasks for: {}\n\n", doc.title); + for (i, task) in incomplete.iter().enumerate() { + content.push_str(&format!("{}. [ ] {}\n", i + 1, task.description)); + } + + Ok(content) + } + None => { + Ok("No RFC is currently in progress. No active tasks.".to_string()) + } + } +} + +/// Generate a composite session ID per RFC 0017 +/// +/// Format: {repo}-{realm}-{random12} +/// - repo: Project name from BlueHome +/// - realm: Realm name or "default" +/// - random12: 12 alphanumeric characters for uniqueness +fn generate_session_id(state: &ProjectState) -> String { + const CHARSET: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + // Get repo name from project state + let repo = state.home.project_name + .as_ref() + .map(|s| sanitize_id_component(s)) + .unwrap_or_else(|| "unknown".to_string()); + + // For now, use "default" realm. Future: load from realm config + let realm = "default"; + + // Generate 12-character random suffix + let mut rng = rand::thread_rng(); + let suffix: String = (0..12) + .map(|_| CHARSET[rng.gen_range(0..CHARSET.len())] as char) + .collect(); + + format!("{}-{}-{}", repo, realm, suffix) +} + +/// Sanitize a string for use in session ID (lowercase, alphanumeric, max 32 chars) +fn sanitize_id_component(s: &str) -> String { + s.chars() + .filter(|c| c.is_alphanumeric() || *c == '-') + .take(32) + .collect::() + .to_lowercase() +} + +/// Get or initialize the session ID for this MCP lifecycle +pub fn get_session_id(state: &ProjectState) -> &str { + SESSION_ID.get_or_init(|| generate_session_id(state)) +} + +/// Refresh rate limit in seconds (RFC 0017) +const REFRESH_COOLDOWN_SECS: u64 = 30; + +/// Check if a refresh is allowed based on rate limiting +fn is_refresh_allowed(state: &ProjectState) -> bool { + use chrono::{DateTime, Utc}; + + let session_id = get_session_id(state); + + match state.store.get_last_refresh_time(session_id) { + Ok(Some(timestamp)) => { + // Parse the timestamp and check if cooldown has elapsed + if let Ok(last_refresh) = DateTime::parse_from_rfc3339(×tamp) { + let elapsed = Utc::now().signed_duration_since(last_refresh.with_timezone(&Utc)); + elapsed.num_seconds() >= REFRESH_COOLDOWN_SECS as i64 + } else { + true // Invalid timestamp, allow refresh + } + } + Ok(None) => true, // No previous refresh, allow + Err(_) => true, // Error checking, allow refresh + } +} + +/// Check if content has changed since last injection (staleness detection) +fn should_refresh(state: &ProjectState, uri: &str, current_hash: &str) -> bool { + let session_id = get_session_id(state); + + match state.store.get_last_injection(session_id, uri) { + Ok(Some(injection)) => { + // Content is stale if hash differs + injection.content_hash != current_hash + } + Ok(None) => true, // Never injected, needs refresh + Err(_) => true, // Error checking, assume needs refresh + } +} + +/// Get the refresh policy for a document type and status +fn get_refresh_policy(uri: &str, _status: Option<&str>) -> blue_core::RefreshPolicy { + use blue_core::RefreshPolicy; + + // Determine policy based on URI pattern (per RFC 0017) + if uri.contains("/adrs/") { + RefreshPolicy::SessionStart + } else if uri.contains("/dialogues/") { + RefreshPolicy::Never + } else if uri.contains("/rfcs/") { + // For RFCs, we'd ideally check status (draft/in-progress vs implemented) + // For now, default to OnChange for active RFCs + RefreshPolicy::OnChange + } else if uri.contains("/spikes/") { + RefreshPolicy::OnChange + } else { + RefreshPolicy::OnRequest + } +} + +/// Log a context injection to the audit trail +fn log_injection(state: &ProjectState, uri: &str, content_hash: &str, tokens: usize) -> Result<(), ServerError> { + // Determine tier from URI + let tier = determine_tier(uri); + + // Get session ID (generated once per MCP lifecycle) + let session_id = get_session_id(state); + + // Log to database + state.store.log_injection(session_id, tier, uri, content_hash, Some(tokens as i32)) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + Ok(()) +} + +/// Determine which tier a URI belongs to based on common patterns +fn determine_tier(uri: &str) -> &'static str { + if uri.contains("/adrs/") || uri.contains("/context/voice") { + "identity" + } else if uri.contains("/state/") || uri.contains("/rfcs/") { + "workflow" + } else { + "reference" + } +} + +/// Handle blue_context_status tool call (RFC 0017) +/// +/// Returns context injection status including session ID, active injections, +/// staleness information, and relevance graph summary. +pub fn handle_context_status(state: &ProjectState) -> Result { + let session_id = get_session_id(state); + + // Get recent injections for this session + let injections = state.store + .get_session_injections(session_id, 10) + .map_err(|e| ServerError::StateLoadFailed(e.to_string()))?; + + // Get relevance edge count + let edge_count = state.store + .count_relevance_edges() + .unwrap_or(0); + + // Format injection summary + let injection_summary: Vec = injections.iter().map(|inj| { + json!({ + "uri": inj.source_uri, + "tier": inj.tier, + "tokens": inj.token_count, + "timestamp": inj.timestamp + }) + }).collect(); + + Ok(json!({ + "status": "success", + "session": { + "id": session_id, + "injection_count": injections.len(), + "injections": injection_summary + }, + "relevance_graph": { + "edge_count": edge_count + }, + "rate_limit": { + "cooldown_secs": REFRESH_COOLDOWN_SECS, + "refresh_allowed": is_refresh_allowed(state) + }, + "message": blue_core::voice::info( + &format!("Session {} with {} injections", session_id, injections.len()), + Some(&format!("{} edges in relevance graph", edge_count)) + ) + })) +} + +/// Convert a URI to a human-readable name +fn uri_to_name(uri: &str) -> String { + // Strip blue:// prefix and convert to readable form + let path = uri.strip_prefix("blue://").unwrap_or(uri); + let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + + match parts.as_slice() { + ["docs", doc_type] => format!("All {}", capitalize(doc_type)), + ["docs", doc_type, id] => format!("{} {}", capitalize(doc_type).trim_end_matches('s'), id), + ["context", scope] => format!("{} Context", capitalize(scope)), + ["state", entity] => capitalize(&entity.replace('-', " ")), + _ => path.to_string(), + } +} + +fn capitalize(s: &str) -> String { + let mut chars = s.chars(); + match chars.next() { + None => String::new(), + Some(c) => c.to_uppercase().chain(chars).collect(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_uri_to_name() { + assert_eq!(uri_to_name("blue://docs/adrs/"), "All Adrs"); + assert_eq!(uri_to_name("blue://docs/rfcs/0016"), "Rfc 0016"); + assert_eq!(uri_to_name("blue://context/voice"), "Voice Context"); + assert_eq!(uri_to_name("blue://state/current-rfc"), "Current rfc"); + } + + #[test] + fn test_determine_tier() { + assert_eq!(determine_tier("blue://docs/adrs/"), "identity"); + assert_eq!(determine_tier("blue://context/voice"), "identity"); + assert_eq!(determine_tier("blue://state/current-rfc"), "workflow"); + assert_eq!(determine_tier("blue://docs/rfcs/0016"), "workflow"); + assert_eq!(determine_tier("blue://docs/dialogues/"), "reference"); + } + + #[test] + fn test_sanitize_id_component() { + assert_eq!(sanitize_id_component("Blue"), "blue"); + assert_eq!(sanitize_id_component("my-project"), "my-project"); + assert_eq!(sanitize_id_component("My Project!"), "myproject"); + assert_eq!(sanitize_id_component("a".repeat(50).as_str()), "a".repeat(32)); + } + + #[test] + fn test_session_id_format() { + let state = ProjectState::for_test(); + let session_id = generate_session_id(&state); + + // Should be in format: repo-realm-random12 + let parts: Vec<&str> = session_id.split('-').collect(); + assert_eq!(parts.len(), 3, "Session ID should have 3 parts: {}", session_id); + assert_eq!(parts[0], "test", "First part should be repo name"); + assert_eq!(parts[1], "default", "Second part should be realm name"); + assert_eq!(parts[2].len(), 12, "Random suffix should be 12 chars"); + + // Random part should be alphanumeric + assert!(parts[2].chars().all(|c| c.is_alphanumeric())); + } +} diff --git a/crates/blue-mcp/src/server.rs b/crates/blue-mcp/src/server.rs index 3b363f2..afc5e6a 100644 --- a/crates/blue-mcp/src/server.rs +++ b/crates/blue-mcp/src/server.rs @@ -92,6 +92,8 @@ impl BlueServer { "initialize" => self.handle_initialize(&req.params), "tools/list" => self.handle_tools_list(), "tools/call" => self.handle_tool_call(&req.params), + "resources/list" => self.handle_resources_list(), + "resources/read" => self.handle_resources_read(&req.params), _ => Err(ServerError::MethodNotFound(req.method.clone())), }; @@ -120,7 +122,10 @@ impl BlueServer { Ok(json!({ "protocolVersion": "2024-11-05", "capabilities": { - "tools": {} + "tools": {}, + "resources": { + "listChanged": true + } }, "serverInfo": { "name": "blue", @@ -2047,11 +2052,45 @@ impl BlueServer { } } } + }, + // RFC 0017: Context Activation tools + { + "name": "blue_context_status", + "description": "Get context injection status: session ID, active injections, staleness, and relevance graph summary.", + "inputSchema": { + "type": "object", + "properties": { + "cwd": { + "type": "string", + "description": "Current working directory" + } + } + } } ] })) } + // ==================== Resources Handlers (RFC 0016) ==================== + + /// Handle resources/list request + fn handle_resources_list(&mut self) -> Result { + let state = self.ensure_state()?; + crate::handlers::resources::handle_resources_list(state) + } + + /// Handle resources/read request + fn handle_resources_read(&mut self, params: &Option) -> Result { + let params = params.as_ref().ok_or(ServerError::InvalidParams)?; + let uri = params + .get("uri") + .and_then(|v| v.as_str()) + .ok_or(ServerError::InvalidParams)?; + + let state = self.ensure_state()?; + crate::handlers::resources::handle_resources_read(state, uri) + } + /// Handle tools/call request fn handle_tool_call(&mut self, params: &Option) -> Result { let params = params.as_ref().ok_or(ServerError::InvalidParams)?; @@ -2179,6 +2218,8 @@ impl BlueServer { "blue_index_impact" => self.handle_index_impact(&call.arguments), "blue_index_file" => self.handle_index_file(&call.arguments), "blue_index_realm" => self.handle_index_realm(&call.arguments), + // RFC 0017: Context Activation tools + "blue_context_status" => self.handle_context_status(&call.arguments), _ => Err(ServerError::ToolNotFound(call.name)), }?; @@ -3300,6 +3341,12 @@ impl BlueServer { let state = self.ensure_state()?; crate::handlers::index::handle_index_realm(state, args) } + + // RFC 0017: Context Activation handlers + fn handle_context_status(&mut self, _args: &Option) -> Result { + let state = self.ensure_state()?; + crate::handlers::resources::handle_context_status(state) + } } impl Default for BlueServer {