diff --git a/Cargo.toml b/Cargo.toml index 67b917f..728b81a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,9 @@ chrono = { version = "0.4", features = ["serde"] } # Git git2 = "0.19" +# Regex +regex = "1.10" + # Internal blue-core = { path = "crates/blue-core" } blue-mcp = { path = "crates/blue-mcp" } diff --git a/crates/blue-mcp/Cargo.toml b/crates/blue-mcp/Cargo.toml index 472dde3..762aa57 100644 --- a/crates/blue-mcp/Cargo.toml +++ b/crates/blue-mcp/Cargo.toml @@ -15,6 +15,7 @@ tokio.workspace = true tracing.workspace = true chrono.workspace = true git2.workspace = true +regex.workspace = true [dev-dependencies] blue-core = { workspace = true, features = ["test-helpers"] } diff --git a/crates/blue-mcp/src/handlers/dialogue.rs b/crates/blue-mcp/src/handlers/dialogue.rs new file mode 100644 index 0000000..0f97ccc --- /dev/null +++ b/crates/blue-mcp/src/handlers/dialogue.rs @@ -0,0 +1,259 @@ +//! Dialogue extraction tool handlers +//! +//! Extracts dialogue content from spawned agent JSONL outputs for scoring. + +use serde::Serialize; +use serde_json::Value; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use crate::error::ServerError; + +/// Extraction status +#[derive(Debug, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ExtractionStatus { + Complete, + Truncated, + PartialError, +} + +/// Extraction result +#[derive(Debug, Serialize)] +pub struct ExtractionResult { + pub text: String, + pub status: ExtractionStatus, + pub source_file: String, + pub message_count: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub errors: Option>, +} + +/// Handle blue_extract_dialogue +pub fn handle_extract_dialogue(args: &Value) -> Result { + let task_id = args.get("task_id").and_then(|v| v.as_str()); + let file_path_arg = args.get("file_path").and_then(|v| v.as_str()); + + // Resolve file path + let file_path = match (task_id, file_path_arg) { + (Some(id), _) => resolve_task_output(id)?, + (None, Some(path)) => PathBuf::from(path), + (None, None) => { + return Err(ServerError::InvalidParams); + } + }; + + // Verify file exists + if !file_path.exists() { + return Err(ServerError::CommandFailed(format!( + "JSONL file not found: {}", + file_path.display() + ))); + } + + // Try jq first, fall back to pure Rust + let result = if jq_available() { + extract_with_jq(&file_path)? + } else { + extract_with_rust(&file_path)? + }; + + let hint = match result.status { + ExtractionStatus::Complete => format!( + "Extracted {} assistant message(s) from {}", + result.message_count, + file_path.file_name().unwrap_or_default().to_string_lossy() + ), + ExtractionStatus::Truncated => format!( + "Extracted {} assistant message(s), output truncated", + result.message_count + ), + ExtractionStatus::PartialError => format!( + "Extracted {} message(s) with {} error(s)", + result.message_count, + result.errors.as_ref().map(|e| e.len()).unwrap_or(0) + ), + }; + + Ok(serde_json::json!({ + "status": "success", + "message": blue_core::voice::info( + &format!("Extracted {} messages", result.message_count), + Some(&hint) + ), + "text": result.text, + "extraction_status": format!("{:?}", result.status).to_lowercase(), + "source_file": result.source_file, + "message_count": result.message_count, + "errors": result.errors + })) +} + +/// Resolve file path from task_id +fn resolve_task_output(task_id: &str) -> Result { + // Look for task output symlink in /tmp/claude/.../tasks/ + let tmp_claude = PathBuf::from("/tmp/claude"); + if !tmp_claude.exists() { + return Err(ServerError::CommandFailed( + "No /tmp/claude directory found. Is Claude Code running?".to_string(), + )); + } + + // Search for task output file + for entry in fs::read_dir(&tmp_claude) + .map_err(|e| ServerError::CommandFailed(format!("Failed to read /tmp/claude: {}", e)))? + { + let entry = entry.map_err(|e| { + ServerError::CommandFailed(format!("Failed to read directory entry: {}", e)) + })?; + let tasks_dir = entry.path().join("tasks"); + if tasks_dir.exists() { + let output_file = tasks_dir.join(format!("{}.output", task_id)); + if output_file.exists() { + // Follow symlink to get actual file + let resolved = fs::read_link(&output_file).unwrap_or(output_file.clone()); + return Ok(resolved); + } + } + } + + Err(ServerError::CommandFailed(format!( + "Task output not found for task_id: {}", + task_id + ))) +} + +/// Check if jq is available +fn jq_available() -> bool { + Command::new("jq") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +/// Extract dialogue using jq (faster for large files) +fn extract_with_jq(file_path: &Path) -> Result { + let output = Command::new("jq") + .arg("-r") + .arg(r#"select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text"#) + .arg(file_path) + .output() + .map_err(|e| ServerError::CommandFailed(format!("Failed to run jq: {}", e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(ServerError::CommandFailed(format!("jq failed: {}", stderr))); + } + + let text = String::from_utf8_lossy(&output.stdout).to_string(); + + // Count messages by counting non-empty segments + let message_count = text.split("\n\n").filter(|s| !s.trim().is_empty()).count(); + + // Check for truncation (arbitrary limit: 500KB) + let status = if text.len() > 500_000 { + ExtractionStatus::Truncated + } else { + ExtractionStatus::Complete + }; + + Ok(ExtractionResult { + text, + status, + source_file: file_path.to_string_lossy().to_string(), + message_count, + errors: None, + }) +} + +/// Extract dialogue using pure Rust (fallback) +fn extract_with_rust(file_path: &Path) -> Result { + let file = File::open(file_path) + .map_err(|e| ServerError::CommandFailed(format!("Failed to open file: {}", e)))?; + + let reader = BufReader::new(file); + let mut texts = Vec::new(); + let mut errors = Vec::new(); + let mut message_count = 0; + + for (line_num, line_result) in reader.lines().enumerate() { + let line = match line_result { + Ok(l) => l, + Err(e) => { + errors.push(format!("Line {}: read error: {}", line_num + 1, e)); + continue; + } + }; + + if line.trim().is_empty() { + continue; + } + + // Parse JSON line + let json_value: Value = match serde_json::from_str(&line) { + Ok(v) => v, + Err(e) => { + errors.push(format!("Line {}: JSON parse error: {}", line_num + 1, e)); + continue; + } + }; + + // Check if this is an assistant message + if json_value.get("type").and_then(|v| v.as_str()) != Some("assistant") { + continue; + } + + // Extract text content from message.content array + if let Some(content_array) = json_value + .get("message") + .and_then(|m| m.get("content")) + .and_then(|c| c.as_array()) + { + for content_item in content_array { + if content_item.get("type").and_then(|v| v.as_str()) == Some("text") { + if let Some(text) = content_item.get("text").and_then(|t| t.as_str()) { + texts.push(text.to_string()); + message_count += 1; + } + } + } + } + } + + let text = texts.join("\n\n"); + + // Determine status + let status = if !errors.is_empty() { + ExtractionStatus::PartialError + } else if text.len() > 500_000 { + ExtractionStatus::Truncated + } else { + ExtractionStatus::Complete + }; + + Ok(ExtractionResult { + text, + status, + source_file: file_path.to_string_lossy().to_string(), + message_count, + errors: if errors.is_empty() { + None + } else { + Some(errors) + }, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_jq_check() { + // Just verify this doesn't panic + let _ = jq_available(); + } +} diff --git a/crates/blue-mcp/src/handlers/dialogue_lint.rs b/crates/blue-mcp/src/handlers/dialogue_lint.rs new file mode 100644 index 0000000..682c2a2 --- /dev/null +++ b/crates/blue-mcp/src/handlers/dialogue_lint.rs @@ -0,0 +1,659 @@ +//! Dialogue lint tool handler +//! +//! Validates dialogue documents against the blue-dialogue-pattern. +//! Returns weighted consistency score with actionable remediation feedback. + +use regex::Regex; +use serde::Serialize; +use serde_json::{json, Value}; +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::path::PathBuf; + +use crate::error::ServerError; + +/// Check severity levels with weights +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum Severity { + Critical, // weight = 3 + Major, // weight = 2 + Minor, // weight = 1 +} + +impl Severity { + fn weight(&self) -> u32 { + match self { + Severity::Critical => 3, + Severity::Major => 2, + Severity::Minor => 1, + } + } +} + +/// Result of a single check +#[derive(Debug, Serialize)] +pub struct CheckResult { + pub name: &'static str, + pub severity: Severity, + pub pass: bool, + pub message: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub fix_hint: Option, +} + +/// Parsed dialogue structure for validation +#[derive(Debug, Default)] +struct ParsedDialogue { + // Header fields + has_draft_link: bool, + has_participants: bool, + has_status: bool, + status_value: Option, + + // Scoreboard + has_scoreboard: bool, + scoreboard_agents: Vec, + scoreboard_totals: HashMap, + claimed_total: Option, + + // Inventories + has_perspectives_inventory: bool, + has_tensions_tracker: bool, + + // Rounds + rounds: Vec, + + // Markers + perspective_ids: Vec, + tension_ids: Vec, + resolved_ids: Vec, + + // For emoji consistency + agent_emojis: HashMap, +} + +/// Handle blue_dialogue_lint +pub fn handle_dialogue_lint(args: &Value) -> Result { + let file_path_str = args + .get("file_path") + .and_then(|v| v.as_str()) + .ok_or(ServerError::InvalidParams)?; + + let file_path = PathBuf::from(file_path_str); + + // Verify file exists + if !file_path.exists() { + return Err(ServerError::CommandFailed(format!( + "Dialogue file not found: {}", + file_path.display() + ))); + } + + // Read file content + let content = fs::read_to_string(&file_path) + .map_err(|e| ServerError::CommandFailed(format!("Failed to read file: {}", e)))?; + + // Parse dialogue structure + let parsed = parse_dialogue(&content); + + // Run all checks + let mut checks = Vec::new(); + + // Critical checks + checks.push(check_rounds_present(&parsed)); + checks.push(check_markers_parseable(&content)); + + // Major checks + checks.push(check_convergence_gate(&parsed)); + checks.push(check_scoreboard_present(&parsed)); + checks.push(check_inventories_present(&parsed)); + checks.push(check_id_uniqueness(&parsed)); + checks.push(check_round_sequencing(&parsed)); + + // Minor checks + checks.push(check_header_completeness(&parsed)); + checks.push(check_scoreboard_math(&parsed)); + checks.push(check_round_numbering(&parsed)); + checks.push(check_emoji_consistency(&parsed)); + + // Calculate weighted score + let mut total_weight = 0u32; + let mut earned_weight = 0u32; + let mut checks_passed = 0usize; + let mut checks_failed = 0usize; + let mut critical_failures = Vec::new(); + + for check in &checks { + let weight = check.severity.weight(); + total_weight += weight; + if check.pass { + earned_weight += weight; + checks_passed += 1; + } else { + checks_failed += 1; + if check.severity == Severity::Critical { + critical_failures.push(check.message.clone()); + } + } + } + + let score = if total_weight > 0 { + (earned_weight as f64) / (total_weight as f64) + } else { + 1.0 + }; + + // Build hint + let hint = if score >= 0.9 { + format!( + "Dialogue passes with score {:.1}% ({}/{} checks)", + score * 100.0, + checks_passed, + checks_passed + checks_failed + ) + } else if score >= 0.7 { + format!( + "Dialogue needs attention: {:.1}% ({} issues)", + score * 100.0, + checks_failed + ) + } else if score >= 0.3 { + format!( + "Dialogue has significant issues: {:.1}% ({} failures)", + score * 100.0, + checks_failed + ) + } else { + format!( + "Dialogue failing: {:.1}% - {} critical issues", + score * 100.0, + critical_failures.len() + ) + }; + + Ok(json!({ + "status": "success", + "message": blue_core::voice::info( + &format!("Dialogue score: {:.1}%", score * 100.0), + Some(&hint) + ), + "score": score, + "checks_passed": checks_passed, + "checks_failed": checks_failed, + "details": checks.iter().map(|c| json!({ + "name": c.name, + "severity": c.severity, + "pass": c.pass, + "message": c.message, + "fix_hint": c.fix_hint + })).collect::>(), + "critical_failures": critical_failures + })) +} + +/// Parse dialogue content into structured form +fn parse_dialogue(content: &str) -> ParsedDialogue { + let mut parsed = ParsedDialogue::default(); + + // Header patterns (case-insensitive, whitespace-tolerant) + let draft_re = Regex::new(r"(?i)\*\*Draft\*\*:").unwrap(); + let participants_re = Regex::new(r"(?i)\*\*Participants\*\*:").unwrap(); + let status_re = Regex::new(r"(?i)\*\*Status\*\*:\s*(.+)").unwrap(); + + parsed.has_draft_link = draft_re.is_match(content); + parsed.has_participants = participants_re.is_match(content); + + if let Some(caps) = status_re.captures(content) { + parsed.has_status = true; + parsed.status_value = Some(caps[1].trim().to_string()); + } + + // Scoreboard detection + let scoreboard_re = Regex::new(r"(?i)##\s*Alignment\s+Scoreboard").unwrap(); + parsed.has_scoreboard = scoreboard_re.is_match(content); + + // Parse scoreboard table for agents and totals + let table_row_re = + Regex::new(r"\|\s*([🧁💙]?\s*\w+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*\*\*(\d+)\*\*\s*\|").unwrap(); + for caps in table_row_re.captures_iter(content) { + let agent = caps[1].trim().to_string(); + let w: u32 = caps[2].parse().unwrap_or(0); + let c: u32 = caps[3].parse().unwrap_or(0); + let t: u32 = caps[4].parse().unwrap_or(0); + let r: u32 = caps[5].parse().unwrap_or(0); + let total: u32 = caps[6].parse().unwrap_or(0); + + parsed.scoreboard_agents.push(agent.clone()); + parsed.scoreboard_totals.insert(agent, w + c + t + r); + parsed.claimed_total = Some(total); + } + + // Total ALIGNMENT line + let total_alignment_re = Regex::new(r"(?i)\*\*Total\s+ALIGNMENT\*\*:\s*(\d+)").unwrap(); + if let Some(caps) = total_alignment_re.captures(content) { + parsed.claimed_total = caps[1].parse().ok(); + } + + // Inventories + let perspectives_re = Regex::new(r"(?i)##\s*Perspectives\s+Inventory").unwrap(); + let tensions_re = Regex::new(r"(?i)##\s*Tensions\s+Tracker").unwrap(); + parsed.has_perspectives_inventory = perspectives_re.is_match(content); + parsed.has_tensions_tracker = tensions_re.is_match(content); + + // Rounds (case-insensitive) + let round_re = Regex::new(r"(?i)##\s*Round\s+(\d+)").unwrap(); + for caps in round_re.captures_iter(content) { + if let Ok(n) = caps[1].parse::() { + parsed.rounds.push(n); + } + } + parsed.rounds.sort(); + + // Agent headers within rounds + let agent_re = Regex::new(r"###\s*(\w+)\s*([🧁💙]?)").unwrap(); + for caps in agent_re.captures_iter(content) { + let agent = caps[1].to_string(); + let emoji = caps + .get(2) + .map(|m: regex::Match| m.as_str().to_string()) + .unwrap_or_default(); + if !emoji.is_empty() { + parsed.agent_emojis.insert(agent.clone(), emoji); + } + } + + // Perspective markers (case-insensitive, whitespace-tolerant) + let perspective_marker_re = Regex::new(r"(?i)\[\s*PERSPECTIVE\s+P(\d{2})\s*:").unwrap(); + for caps in perspective_marker_re.captures_iter(content) { + parsed.perspective_ids.push(format!("P{}", &caps[1])); + } + + // Tension markers + let tension_marker_re = Regex::new(r"(?i)\[\s*TENSION\s+T(\d+)\s*:").unwrap(); + for caps in tension_marker_re.captures_iter(content) { + parsed.tension_ids.push(format!("T{}", &caps[1])); + } + + // Resolved markers + let resolved_marker_re = Regex::new(r"(?i)\[\s*RESOLVED\s+T(\d+)").unwrap(); + for caps in resolved_marker_re.captures_iter(content) { + parsed.resolved_ids.push(format!("T{}", &caps[1])); + } + + parsed +} + +// ===== CRITICAL CHECKS ===== + +fn check_rounds_present(parsed: &ParsedDialogue) -> CheckResult { + let pass = !parsed.rounds.is_empty(); + CheckResult { + name: "rounds-present", + severity: Severity::Critical, + pass, + message: if pass { + format!("Found {} round(s)", parsed.rounds.len()) + } else { + "No rounds found in dialogue".to_string() + }, + fix_hint: if pass { + None + } else { + Some("Add at least one '## Round N' section with agent responses".to_string()) + }, + } +} + +fn check_markers_parseable(content: &str) -> CheckResult { + // Check for malformed markers that might indicate parsing issues + let malformed_perspective = Regex::new(r"\[PERSPECTIV[^E]").unwrap(); + let malformed_tension = Regex::new(r"\[TENSIO[^N]").unwrap(); + + let has_malformed = + malformed_perspective.is_match(content) || malformed_tension.is_match(content); + + CheckResult { + name: "markers-parseable", + severity: Severity::Critical, + pass: !has_malformed, + message: if has_malformed { + "Found potentially malformed markers".to_string() + } else { + "All markers appear well-formed".to_string() + }, + fix_hint: if has_malformed { + Some("Check spelling: [PERSPECTIVE Pnn: ...] and [TENSION Tn: ...]".to_string()) + } else { + None + }, + } +} + +// ===== MAJOR CHECKS ===== + +fn check_convergence_gate(parsed: &ParsedDialogue) -> CheckResult { + // Only applies if status indicates convergence + let is_converged = parsed + .status_value + .as_ref() + .map(|s| s.to_lowercase().contains("converge")) + .unwrap_or(false); + + if !is_converged { + return CheckResult { + name: "convergence-gate", + severity: Severity::Major, + pass: true, + message: "Not converged yet, gate not applicable".to_string(), + fix_hint: None, + }; + } + + // Check all tensions have matching resolved + let tension_set: HashSet<_> = parsed.tension_ids.iter().collect(); + let resolved_set: HashSet<_> = parsed.resolved_ids.iter().collect(); + + let unresolved: Vec<_> = tension_set + .difference(&resolved_set) + .map(|s| s.as_str()) + .collect(); + + let pass = unresolved.is_empty(); + + CheckResult { + name: "convergence-gate", + severity: Severity::Major, + pass, + message: if pass { + "All tensions resolved before convergence".to_string() + } else { + format!("Unresolved tensions: {}", unresolved.join(", ")) + }, + fix_hint: if pass { + None + } else { + Some(format!( + "Add [RESOLVED {}] markers for each unresolved tension", + unresolved.join(", ") + )) + }, + } +} + +fn check_scoreboard_present(parsed: &ParsedDialogue) -> CheckResult { + CheckResult { + name: "scoreboard-present", + severity: Severity::Major, + pass: parsed.has_scoreboard, + message: if parsed.has_scoreboard { + "Scoreboard section found".to_string() + } else { + "Missing '## Alignment Scoreboard' section".to_string() + }, + fix_hint: if parsed.has_scoreboard { + None + } else { + Some("Add '## Alignment Scoreboard' section with W/C/T/R columns".to_string()) + }, + } +} + +fn check_inventories_present(parsed: &ParsedDialogue) -> CheckResult { + let has_both = parsed.has_perspectives_inventory && parsed.has_tensions_tracker; + let missing = match ( + parsed.has_perspectives_inventory, + parsed.has_tensions_tracker, + ) { + (false, false) => "Perspectives Inventory, Tensions Tracker", + (false, true) => "Perspectives Inventory", + (true, false) => "Tensions Tracker", + (true, true) => "", + }; + + CheckResult { + name: "inventories-present", + severity: Severity::Major, + pass: has_both, + message: if has_both { + "Both inventory sections present".to_string() + } else { + format!("Missing: {}", missing) + }, + fix_hint: if has_both { + None + } else { + Some(format!("Add '## {}' section(s)", missing)) + }, + } +} + +fn check_id_uniqueness(parsed: &ParsedDialogue) -> CheckResult { + let mut perspective_seen: HashSet = HashSet::new(); + let mut tension_seen: HashSet = HashSet::new(); + let mut duplicates = Vec::new(); + + for id in &parsed.perspective_ids { + if !perspective_seen.insert(id.clone()) { + duplicates.push(id.clone()); + } + } + for id in &parsed.tension_ids { + if !tension_seen.insert(id.clone()) { + duplicates.push(id.clone()); + } + } + + let pass = duplicates.is_empty(); + + CheckResult { + name: "id-uniqueness", + severity: Severity::Major, + pass, + message: if pass { + "All perspective/tension IDs are unique".to_string() + } else { + format!("Duplicate IDs: {}", duplicates.join(", ")) + }, + fix_hint: if pass { + None + } else { + Some("Renumber duplicate IDs to be unique".to_string()) + }, + } +} + +fn check_round_sequencing(parsed: &ParsedDialogue) -> CheckResult { + if parsed.rounds.is_empty() { + return CheckResult { + name: "round-sequencing", + severity: Severity::Major, + pass: false, + message: "No rounds to check".to_string(), + fix_hint: Some("Add '## Round 1' section".to_string()), + }; + } + + // Check rounds are sequential starting from 1 + let expected: Vec = (1..=parsed.rounds.len() as u32).collect(); + let pass = parsed.rounds == expected; + + CheckResult { + name: "round-sequencing", + severity: Severity::Major, + pass, + message: if pass { + format!("Rounds 1-{} sequential", parsed.rounds.len()) + } else { + format!( + "Round sequence gap: found {:?}, expected {:?}", + parsed.rounds, expected + ) + }, + fix_hint: if pass { + None + } else { + Some("Renumber rounds sequentially starting from 1".to_string()) + }, + } +} + +// ===== MINOR CHECKS ===== + +fn check_header_completeness(parsed: &ParsedDialogue) -> CheckResult { + let missing: Vec<&str> = [ + (!parsed.has_draft_link, "Draft"), + (!parsed.has_participants, "Participants"), + (!parsed.has_status, "Status"), + ] + .iter() + .filter_map(|(missing, name)| if *missing { Some(*name) } else { None }) + .collect(); + + let pass = missing.is_empty(); + + CheckResult { + name: "header-completeness", + severity: Severity::Minor, + pass, + message: if pass { + "All header fields present".to_string() + } else { + format!("Missing header fields: {}", missing.join(", ")) + }, + fix_hint: if pass { + None + } else { + Some(format!( + "Add **{}**: fields to header", + missing.join("**, **") + )) + }, + } +} + +fn check_scoreboard_math(parsed: &ParsedDialogue) -> CheckResult { + if !parsed.has_scoreboard || parsed.scoreboard_totals.is_empty() { + return CheckResult { + name: "scoreboard-math", + severity: Severity::Minor, + pass: true, + message: "No scoreboard to verify".to_string(), + fix_hint: None, + }; + } + + // Sum up all agent totals + let computed_total: u32 = parsed.scoreboard_totals.values().sum(); + let claimed = parsed.claimed_total.unwrap_or(0); + + // Allow some tolerance for parsing issues + let pass = (computed_total as i32 - claimed as i32).abs() <= 2; + + CheckResult { + name: "scoreboard-math", + severity: Severity::Minor, + pass, + message: if pass { + format!("Total ALIGNMENT: {}", claimed) + } else { + format!( + "Math mismatch: claimed {}, computed {}", + claimed, computed_total + ) + }, + fix_hint: if pass { + None + } else { + Some(format!( + "Update **Total ALIGNMENT**: {} to match sum of agent scores", + computed_total + )) + }, + } +} + +fn check_round_numbering(parsed: &ParsedDialogue) -> CheckResult { + if parsed.rounds.is_empty() { + return CheckResult { + name: "round-numbering", + severity: Severity::Minor, + pass: true, + message: "No rounds to check".to_string(), + fix_hint: None, + }; + } + + let starts_at_one = parsed.rounds.first() == Some(&1); + + CheckResult { + name: "round-numbering", + severity: Severity::Minor, + pass: starts_at_one, + message: if starts_at_one { + "Rounds start at 1".to_string() + } else { + format!("Rounds don't start at 1: {:?}", parsed.rounds) + }, + fix_hint: if starts_at_one { + None + } else { + Some("Start round numbering at 1".to_string()) + }, + } +} + +fn check_emoji_consistency(parsed: &ParsedDialogue) -> CheckResult { + let has_emojis = !parsed.agent_emojis.is_empty(); + + CheckResult { + name: "emoji-consistency", + severity: Severity::Minor, + pass: has_emojis, + message: if has_emojis { + format!("Found {} agents with emoji", parsed.agent_emojis.len()) + } else { + "No agent emojis found".to_string() + }, + fix_hint: if has_emojis { + None + } else { + Some("Add emoji to agent headers: ### Muffin 🧁".to_string()) + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_dialogue_rounds() { + let content = r#" +## Round 1 +### Muffin 🧁 +Some content +## Round 2 +### Cupcake 🧁 +More content +"#; + let parsed = parse_dialogue(content); + assert_eq!(parsed.rounds, vec![1, 2]); + } + + #[test] + fn test_check_rounds_present_pass() { + let mut parsed = ParsedDialogue::default(); + parsed.rounds = vec![1, 2]; + let result = check_rounds_present(&parsed); + assert!(result.pass); + } + + #[test] + fn test_check_rounds_present_fail() { + let parsed = ParsedDialogue::default(); + let result = check_rounds_present(&parsed); + assert!(!result.pass); + assert!(result.fix_hint.is_some()); + } +} diff --git a/crates/blue-mcp/src/handlers/mod.rs b/crates/blue-mcp/src/handlers/mod.rs index d4eb13e..1775a9f 100644 --- a/crates/blue-mcp/src/handlers/mod.rs +++ b/crates/blue-mcp/src/handlers/mod.rs @@ -5,9 +5,12 @@ pub mod adr; pub mod audit; pub mod decision; +pub mod dialogue; +pub mod dialogue_lint; pub mod env; pub mod guide; pub mod lint; +pub mod playwright; pub mod pr; pub mod prd; pub mod release; diff --git a/crates/blue-mcp/src/handlers/playwright.rs b/crates/blue-mcp/src/handlers/playwright.rs new file mode 100644 index 0000000..c4aeccc --- /dev/null +++ b/crates/blue-mcp/src/handlers/playwright.rs @@ -0,0 +1,452 @@ +//! Playwright verification handler +//! +//! Provides browser-based test verification using Playwright MCP. +//! Generates verification plans that Claude can execute via Playwright tools. + +use regex::Regex; +use serde::Serialize; +use serde_json::{json, Value}; + +use crate::error::ServerError; + +/// A single verification step for Playwright execution +#[derive(Debug, Clone, Serialize)] +pub struct VerificationStep { + pub step: usize, + pub action: VerificationAction, + pub description: String, + pub mcp_tool: String, + pub assertion: String, +} + +/// Types of verification actions +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum VerificationAction { + Navigate, + Snapshot, + Screenshot, + Click, + Fill, + Resize, +} + +/// URL safety level +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum UrlSafetyLevel { + Localhost, + Development, + Staging, + Production, + Unknown, +} + +/// Handle blue_playwright_verify +pub fn handle_verify(args: &Value) -> Result { + let task = args + .get("task") + .and_then(|v| v.as_str()) + .ok_or(ServerError::InvalidParams)?; + + let base_url = args + .get("base_url") + .and_then(|v| v.as_str()) + .ok_or(ServerError::InvalidParams)?; + + let path = args.get("path").and_then(|v| v.as_str()); + let allow_staging = args + .get("allow_staging") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + let expected_outcomes: Vec = args + .get("expected_outcomes") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect() + }) + .unwrap_or_default(); + + // Validate URL safety + let safety_level = classify_url_safety(base_url); + validate_url_safety(&safety_level, allow_staging)?; + + // Build full URL + let target_url = if let Some(p) = path { + format!("{}{}", base_url.trim_end_matches('/'), p) + } else { + base_url.to_string() + }; + + // Generate verification steps from task description + let steps = generate_verification_steps(task, &target_url); + + // Generate the Playwright MCP tool sequence + let playwright_sequence = generate_playwright_sequence(&steps, &target_url); + + let hint = format!( + "Generated {} verification steps for '{}'. Execute the playwright_sequence in order.", + steps.len(), + task + ); + + Ok(json!({ + "status": "success", + "message": blue_core::voice::info( + &format!("{} verification steps generated", steps.len()), + Some(&hint) + ), + "verification_plan": { + "task": task, + "target_url": target_url, + "safety_level": safety_level, + "steps": steps, + "expected_outcomes": expected_outcomes + }, + "playwright_sequence": playwright_sequence, + "safety": { + "url_safety_level": safety_level, + "requires_approval": safety_level != UrlSafetyLevel::Localhost, + "blocked": safety_level == UrlSafetyLevel::Production + }, + "evidence_guidance": { + "screenshot_tool": "mcp__playwright__browser_take_screenshot", + "snapshot_tool": "mcp__playwright__browser_snapshot", + "recommended": "Take screenshots before and after key actions" + }, + "suggested_tools": [ + "mcp__playwright__browser_navigate", + "mcp__playwright__browser_snapshot", + "mcp__playwright__browser_take_screenshot" + ] + })) +} + +/// Classify URL safety level +fn classify_url_safety(url: &str) -> UrlSafetyLevel { + let lower = url.to_lowercase(); + + // Check for localhost first + if lower.contains("localhost") || lower.contains("127.0.0.1") || lower.contains("[::1]") { + return UrlSafetyLevel::Localhost; + } + + // Production patterns - blocked + let production_patterns = [ + "prod.", ".prod", "production.", ".production", "live.", ".live", "www.", + ]; + if production_patterns.iter().any(|p| lower.contains(p)) { + return UrlSafetyLevel::Production; + } + + // Development patterns + let dev_patterns = [ + "dev.", ".dev", "development.", ".development", "local.", ".local", ":3000", ":3001", + ":5173", ":5174", ":8080", ":8000", ":4200", + ]; + if dev_patterns.iter().any(|p| lower.contains(p)) { + return UrlSafetyLevel::Development; + } + + // Staging patterns + let staging_patterns = [ + "staging.", ".staging", "stage.", ".stage", "test.", ".test", "qa.", ".qa", "uat.", ".uat", + "preview.", ".preview", + ]; + if staging_patterns.iter().any(|p| lower.contains(p)) { + return UrlSafetyLevel::Staging; + } + + // If it looks like an IP address with a port, likely development + if let Ok(re) = Regex::new(r"\d+\.\d+\.\d+\.\d+:\d+") { + if re.is_match(&lower) { + return UrlSafetyLevel::Development; + } + } + + UrlSafetyLevel::Unknown +} + +/// Validate URL safety and return error if blocked +fn validate_url_safety(safety_level: &UrlSafetyLevel, allow_staging: bool) -> Result<(), ServerError> { + match safety_level { + UrlSafetyLevel::Localhost | UrlSafetyLevel::Development => Ok(()), + UrlSafetyLevel::Staging => { + if allow_staging { + Ok(()) + } else { + Err(ServerError::CommandFailed( + "Staging URLs require explicit approval. Pass allow_staging=true to proceed." + .to_string(), + )) + } + } + UrlSafetyLevel::Production => Err(ServerError::CommandFailed( + "Cannot run Playwright verification against production URLs. Use localhost or staging." + .to_string(), + )), + UrlSafetyLevel::Unknown => Err(ServerError::CommandFailed( + "Unknown URL safety level. Use localhost for testing or explicitly allow staging." + .to_string(), + )), + } +} + +/// Generate verification steps based on task description +fn generate_verification_steps(task: &str, target_url: &str) -> Vec { + let lower = task.to_lowercase(); + let mut steps = Vec::new(); + let mut step_num = 0; + + // Always start with navigation + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Navigate, + description: format!("Navigate to {}", target_url), + mcp_tool: "mcp__playwright__browser_navigate".to_string(), + assertion: "Page loads successfully".to_string(), + }); + + // Always take initial snapshot + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Snapshot, + description: "Capture initial page state".to_string(), + mcp_tool: "mcp__playwright__browser_snapshot".to_string(), + assertion: "Page structure is visible".to_string(), + }); + + // Page load verification + if lower.contains("page load") || lower.contains("loads correctly") || lower.contains("displays") { + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Screenshot, + description: "Capture screenshot as page load evidence".to_string(), + mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(), + assertion: "Page rendered correctly".to_string(), + }); + } + + // Form interactions + if lower.contains("form") || lower.contains("input") || lower.contains("fill") { + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Snapshot, + description: "Identify form fields in page structure".to_string(), + mcp_tool: "mcp__playwright__browser_snapshot".to_string(), + assertion: "Form fields are accessible".to_string(), + }); + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Fill, + description: "Fill form fields with test data".to_string(), + mcp_tool: "mcp__playwright__browser_fill".to_string(), + assertion: "Form accepts input".to_string(), + }); + } + + // Click interactions + if lower.contains("click") || lower.contains("button") { + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Click, + description: "Click the target element".to_string(), + mcp_tool: "mcp__playwright__browser_click".to_string(), + assertion: "Element responds to click".to_string(), + }); + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Snapshot, + description: "Capture state after click".to_string(), + mcp_tool: "mcp__playwright__browser_snapshot".to_string(), + assertion: "Expected state change occurred".to_string(), + }); + } + + // Modal / dialog testing + if lower.contains("modal") || lower.contains("dialog") || lower.contains("popup") { + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Click, + description: "Open modal/dialog".to_string(), + mcp_tool: "mcp__playwright__browser_click".to_string(), + assertion: "Modal opens".to_string(), + }); + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Screenshot, + description: "Screenshot modal for evidence".to_string(), + mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(), + assertion: "Modal state captured".to_string(), + }); + } + + // Responsive / mobile testing + if lower.contains("responsive") || lower.contains("mobile") || lower.contains("viewport") { + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Resize, + description: "Resize to mobile viewport (375x667)".to_string(), + mcp_tool: "mcp__playwright__browser_resize".to_string(), + assertion: "Viewport resized to mobile".to_string(), + }); + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Screenshot, + description: "Screenshot mobile layout".to_string(), + mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(), + assertion: "Mobile layout captured".to_string(), + }); + } + + // Login testing + if lower.contains("login") || lower.contains("sign in") || lower.contains("authentication") { + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Fill, + description: "Fill login credentials".to_string(), + mcp_tool: "mcp__playwright__browser_fill".to_string(), + assertion: "Credentials entered".to_string(), + }); + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Click, + description: "Submit login form".to_string(), + mcp_tool: "mcp__playwright__browser_click".to_string(), + assertion: "Login submitted".to_string(), + }); + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Snapshot, + description: "Capture post-login state".to_string(), + mcp_tool: "mcp__playwright__browser_snapshot".to_string(), + assertion: "Login result visible".to_string(), + }); + } + + // Always end with a final screenshot for evidence + step_num += 1; + steps.push(VerificationStep { + step: step_num, + action: VerificationAction::Screenshot, + description: "Final screenshot for verification evidence".to_string(), + mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(), + assertion: "Final state captured".to_string(), + }); + + steps +} + +/// Generate the Playwright MCP tool sequence for Claude to execute +fn generate_playwright_sequence(steps: &[VerificationStep], target_url: &str) -> Vec { + steps + .iter() + .map(|step| { + let params = match step.action { + VerificationAction::Navigate => json!({ + "url": target_url + }), + VerificationAction::Resize => json!({ + "width": 375, + "height": 667 + }), + VerificationAction::Fill => json!({ + "selector": "[element selector - identify from snapshot]", + "value": "[test value]" + }), + VerificationAction::Click => json!({ + "selector": "[element selector - identify from snapshot]" + }), + _ => json!({}), + }; + + json!({ + "step": step.step, + "tool": step.mcp_tool, + "description": step.description, + "params": params, + "assertion": step.assertion + }) + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_classify_url_safety_localhost() { + assert_eq!( + classify_url_safety("http://localhost:3000"), + UrlSafetyLevel::Localhost + ); + assert_eq!( + classify_url_safety("http://127.0.0.1:8080"), + UrlSafetyLevel::Localhost + ); + } + + #[test] + fn test_classify_url_safety_development() { + assert_eq!( + classify_url_safety("http://dev.example.com"), + UrlSafetyLevel::Development + ); + assert_eq!( + classify_url_safety("http://192.168.1.100:3000"), + UrlSafetyLevel::Development + ); + } + + #[test] + fn test_classify_url_safety_staging() { + assert_eq!( + classify_url_safety("https://staging.example.com"), + UrlSafetyLevel::Staging + ); + } + + #[test] + fn test_classify_url_safety_production() { + assert_eq!( + classify_url_safety("https://www.example.com"), + UrlSafetyLevel::Production + ); + } + + #[test] + fn test_validate_url_safety() { + assert!(validate_url_safety(&UrlSafetyLevel::Localhost, false).is_ok()); + assert!(validate_url_safety(&UrlSafetyLevel::Staging, false).is_err()); + assert!(validate_url_safety(&UrlSafetyLevel::Staging, true).is_ok()); + assert!(validate_url_safety(&UrlSafetyLevel::Production, true).is_err()); + } + + #[test] + fn test_generate_verification_steps() { + let steps = generate_verification_steps( + "Verify the login page loads correctly", + "http://localhost:3000/login", + ); + assert!(steps.len() >= 3); + assert!(matches!(steps[0].action, VerificationAction::Navigate)); + } +} diff --git a/crates/blue-mcp/src/server.rs b/crates/blue-mcp/src/server.rs index 7d1846a..5cfa9ec 100644 --- a/crates/blue-mcp/src/server.rs +++ b/crates/blue-mcp/src/server.rs @@ -1138,6 +1138,70 @@ impl BlueServer { } } } + }, + // Phase 8: Dialogue tools + { + "name": "blue_dialogue_lint", + "description": "Validate dialogue documents against the blue-dialogue-pattern. Returns weighted consistency score.", + "inputSchema": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Path to the .dialogue.md file" + } + }, + "required": ["file_path"] + } + }, + { + "name": "blue_extract_dialogue", + "description": "Extract dialogue content from spawned agent JSONL outputs.", + "inputSchema": { + "type": "object", + "properties": { + "task_id": { + "type": "string", + "description": "Task ID (e.g., 'a6dc70c') - resolves via symlink in /tmp/claude/.../tasks/" + }, + "file_path": { + "type": "string", + "description": "Absolute path to JSONL file" + } + } + } + }, + // Phase 8: Playwright verification + { + "name": "blue_playwright_verify", + "description": "Generate a verification plan for browser-based testing using Playwright MCP.", + "inputSchema": { + "type": "object", + "properties": { + "task": { + "type": "string", + "description": "Description of the verification task" + }, + "base_url": { + "type": "string", + "description": "Base URL for the application (e.g., 'http://localhost:3000')" + }, + "path": { + "type": "string", + "description": "Specific path to navigate to (e.g., '/login')" + }, + "expected_outcomes": { + "type": "array", + "items": { "type": "string" }, + "description": "Expected outcomes to verify" + }, + "allow_staging": { + "type": "boolean", + "description": "Allow staging URLs (default: false, only localhost allowed)" + } + }, + "required": ["task", "base_url"] + } } ] })) @@ -1215,6 +1279,11 @@ impl BlueServer { "blue_staging_create" => self.handle_staging_create(&call.arguments), "blue_staging_destroy" => self.handle_staging_destroy(&call.arguments), "blue_staging_cost" => self.handle_staging_cost(&call.arguments), + // Phase 8: Dialogue handlers + "blue_dialogue_lint" => self.handle_dialogue_lint(&call.arguments), + "blue_extract_dialogue" => self.handle_extract_dialogue(&call.arguments), + // Phase 8: Playwright handler + "blue_playwright_verify" => self.handle_playwright_verify(&call.arguments), _ => Err(ServerError::ToolNotFound(call.name)), }?; @@ -1859,6 +1928,23 @@ impl BlueServer { let state = self.ensure_state()?; crate::handlers::staging::handle_cost(args, &state.home.root) } + + // Phase 8: Dialogue and Playwright handlers + + fn handle_dialogue_lint(&mut self, args: &Option) -> Result { + let args = args.as_ref().ok_or(ServerError::InvalidParams)?; + crate::handlers::dialogue_lint::handle_dialogue_lint(args) + } + + fn handle_extract_dialogue(&mut self, args: &Option) -> Result { + let args = args.as_ref().ok_or(ServerError::InvalidParams)?; + crate::handlers::dialogue::handle_extract_dialogue(args) + } + + fn handle_playwright_verify(&mut self, args: &Option) -> Result { + let args = args.as_ref().ok_or(ServerError::InvalidParams)?; + crate::handlers::playwright::handle_verify(args) + } } impl Default for BlueServer {