feat: Phase 8 - dialogue and Playwright tools

Add 3 tools:
- blue_dialogue_lint: Validate dialogue markdown against pattern
- blue_extract_dialogue: Extract dialogue from agent JSONL outputs
- blue_playwright_verify: Generate Playwright verification plans

Features:
- Weighted scoring for dialogue linting (Critical/Major/Minor)
- jq fallback to pure Rust for JSONL extraction
- URL safety classification (localhost/dev/staging/production)

Total: 50 tools ported from coherence-mcp

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Eric Garcia 2026-01-24 04:19:18 -05:00
parent db6b7ed5c7
commit f186a470c8
7 changed files with 1463 additions and 0 deletions

View file

@ -41,6 +41,9 @@ chrono = { version = "0.4", features = ["serde"] }
# Git
git2 = "0.19"
# Regex
regex = "1.10"
# Internal
blue-core = { path = "crates/blue-core" }
blue-mcp = { path = "crates/blue-mcp" }

View file

@ -15,6 +15,7 @@ tokio.workspace = true
tracing.workspace = true
chrono.workspace = true
git2.workspace = true
regex.workspace = true
[dev-dependencies]
blue-core = { workspace = true, features = ["test-helpers"] }

View file

@ -0,0 +1,259 @@
//! Dialogue extraction tool handlers
//!
//! Extracts dialogue content from spawned agent JSONL outputs for scoring.
use serde::Serialize;
use serde_json::Value;
use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::process::Command;
use crate::error::ServerError;
/// Extraction status
#[derive(Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum ExtractionStatus {
Complete,
Truncated,
PartialError,
}
/// Extraction result
#[derive(Debug, Serialize)]
pub struct ExtractionResult {
pub text: String,
pub status: ExtractionStatus,
pub source_file: String,
pub message_count: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub errors: Option<Vec<String>>,
}
/// Handle blue_extract_dialogue
pub fn handle_extract_dialogue(args: &Value) -> Result<Value, ServerError> {
let task_id = args.get("task_id").and_then(|v| v.as_str());
let file_path_arg = args.get("file_path").and_then(|v| v.as_str());
// Resolve file path
let file_path = match (task_id, file_path_arg) {
(Some(id), _) => resolve_task_output(id)?,
(None, Some(path)) => PathBuf::from(path),
(None, None) => {
return Err(ServerError::InvalidParams);
}
};
// Verify file exists
if !file_path.exists() {
return Err(ServerError::CommandFailed(format!(
"JSONL file not found: {}",
file_path.display()
)));
}
// Try jq first, fall back to pure Rust
let result = if jq_available() {
extract_with_jq(&file_path)?
} else {
extract_with_rust(&file_path)?
};
let hint = match result.status {
ExtractionStatus::Complete => format!(
"Extracted {} assistant message(s) from {}",
result.message_count,
file_path.file_name().unwrap_or_default().to_string_lossy()
),
ExtractionStatus::Truncated => format!(
"Extracted {} assistant message(s), output truncated",
result.message_count
),
ExtractionStatus::PartialError => format!(
"Extracted {} message(s) with {} error(s)",
result.message_count,
result.errors.as_ref().map(|e| e.len()).unwrap_or(0)
),
};
Ok(serde_json::json!({
"status": "success",
"message": blue_core::voice::info(
&format!("Extracted {} messages", result.message_count),
Some(&hint)
),
"text": result.text,
"extraction_status": format!("{:?}", result.status).to_lowercase(),
"source_file": result.source_file,
"message_count": result.message_count,
"errors": result.errors
}))
}
/// Resolve file path from task_id
fn resolve_task_output(task_id: &str) -> Result<PathBuf, ServerError> {
// Look for task output symlink in /tmp/claude/.../tasks/
let tmp_claude = PathBuf::from("/tmp/claude");
if !tmp_claude.exists() {
return Err(ServerError::CommandFailed(
"No /tmp/claude directory found. Is Claude Code running?".to_string(),
));
}
// Search for task output file
for entry in fs::read_dir(&tmp_claude)
.map_err(|e| ServerError::CommandFailed(format!("Failed to read /tmp/claude: {}", e)))?
{
let entry = entry.map_err(|e| {
ServerError::CommandFailed(format!("Failed to read directory entry: {}", e))
})?;
let tasks_dir = entry.path().join("tasks");
if tasks_dir.exists() {
let output_file = tasks_dir.join(format!("{}.output", task_id));
if output_file.exists() {
// Follow symlink to get actual file
let resolved = fs::read_link(&output_file).unwrap_or(output_file.clone());
return Ok(resolved);
}
}
}
Err(ServerError::CommandFailed(format!(
"Task output not found for task_id: {}",
task_id
)))
}
/// Check if jq is available
fn jq_available() -> bool {
Command::new("jq")
.arg("--version")
.output()
.map(|o| o.status.success())
.unwrap_or(false)
}
/// Extract dialogue using jq (faster for large files)
fn extract_with_jq(file_path: &Path) -> Result<ExtractionResult, ServerError> {
let output = Command::new("jq")
.arg("-r")
.arg(r#"select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text"#)
.arg(file_path)
.output()
.map_err(|e| ServerError::CommandFailed(format!("Failed to run jq: {}", e)))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(ServerError::CommandFailed(format!("jq failed: {}", stderr)));
}
let text = String::from_utf8_lossy(&output.stdout).to_string();
// Count messages by counting non-empty segments
let message_count = text.split("\n\n").filter(|s| !s.trim().is_empty()).count();
// Check for truncation (arbitrary limit: 500KB)
let status = if text.len() > 500_000 {
ExtractionStatus::Truncated
} else {
ExtractionStatus::Complete
};
Ok(ExtractionResult {
text,
status,
source_file: file_path.to_string_lossy().to_string(),
message_count,
errors: None,
})
}
/// Extract dialogue using pure Rust (fallback)
fn extract_with_rust(file_path: &Path) -> Result<ExtractionResult, ServerError> {
let file = File::open(file_path)
.map_err(|e| ServerError::CommandFailed(format!("Failed to open file: {}", e)))?;
let reader = BufReader::new(file);
let mut texts = Vec::new();
let mut errors = Vec::new();
let mut message_count = 0;
for (line_num, line_result) in reader.lines().enumerate() {
let line = match line_result {
Ok(l) => l,
Err(e) => {
errors.push(format!("Line {}: read error: {}", line_num + 1, e));
continue;
}
};
if line.trim().is_empty() {
continue;
}
// Parse JSON line
let json_value: Value = match serde_json::from_str(&line) {
Ok(v) => v,
Err(e) => {
errors.push(format!("Line {}: JSON parse error: {}", line_num + 1, e));
continue;
}
};
// Check if this is an assistant message
if json_value.get("type").and_then(|v| v.as_str()) != Some("assistant") {
continue;
}
// Extract text content from message.content array
if let Some(content_array) = json_value
.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_array())
{
for content_item in content_array {
if content_item.get("type").and_then(|v| v.as_str()) == Some("text") {
if let Some(text) = content_item.get("text").and_then(|t| t.as_str()) {
texts.push(text.to_string());
message_count += 1;
}
}
}
}
}
let text = texts.join("\n\n");
// Determine status
let status = if !errors.is_empty() {
ExtractionStatus::PartialError
} else if text.len() > 500_000 {
ExtractionStatus::Truncated
} else {
ExtractionStatus::Complete
};
Ok(ExtractionResult {
text,
status,
source_file: file_path.to_string_lossy().to_string(),
message_count,
errors: if errors.is_empty() {
None
} else {
Some(errors)
},
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jq_check() {
// Just verify this doesn't panic
let _ = jq_available();
}
}

View file

@ -0,0 +1,659 @@
//! Dialogue lint tool handler
//!
//! Validates dialogue documents against the blue-dialogue-pattern.
//! Returns weighted consistency score with actionable remediation feedback.
use regex::Regex;
use serde::Serialize;
use serde_json::{json, Value};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::PathBuf;
use crate::error::ServerError;
/// Check severity levels with weights
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Critical, // weight = 3
Major, // weight = 2
Minor, // weight = 1
}
impl Severity {
fn weight(&self) -> u32 {
match self {
Severity::Critical => 3,
Severity::Major => 2,
Severity::Minor => 1,
}
}
}
/// Result of a single check
#[derive(Debug, Serialize)]
pub struct CheckResult {
pub name: &'static str,
pub severity: Severity,
pub pass: bool,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub fix_hint: Option<String>,
}
/// Parsed dialogue structure for validation
#[derive(Debug, Default)]
struct ParsedDialogue {
// Header fields
has_draft_link: bool,
has_participants: bool,
has_status: bool,
status_value: Option<String>,
// Scoreboard
has_scoreboard: bool,
scoreboard_agents: Vec<String>,
scoreboard_totals: HashMap<String, u32>,
claimed_total: Option<u32>,
// Inventories
has_perspectives_inventory: bool,
has_tensions_tracker: bool,
// Rounds
rounds: Vec<u32>,
// Markers
perspective_ids: Vec<String>,
tension_ids: Vec<String>,
resolved_ids: Vec<String>,
// For emoji consistency
agent_emojis: HashMap<String, String>,
}
/// Handle blue_dialogue_lint
pub fn handle_dialogue_lint(args: &Value) -> Result<Value, ServerError> {
let file_path_str = args
.get("file_path")
.and_then(|v| v.as_str())
.ok_or(ServerError::InvalidParams)?;
let file_path = PathBuf::from(file_path_str);
// Verify file exists
if !file_path.exists() {
return Err(ServerError::CommandFailed(format!(
"Dialogue file not found: {}",
file_path.display()
)));
}
// Read file content
let content = fs::read_to_string(&file_path)
.map_err(|e| ServerError::CommandFailed(format!("Failed to read file: {}", e)))?;
// Parse dialogue structure
let parsed = parse_dialogue(&content);
// Run all checks
let mut checks = Vec::new();
// Critical checks
checks.push(check_rounds_present(&parsed));
checks.push(check_markers_parseable(&content));
// Major checks
checks.push(check_convergence_gate(&parsed));
checks.push(check_scoreboard_present(&parsed));
checks.push(check_inventories_present(&parsed));
checks.push(check_id_uniqueness(&parsed));
checks.push(check_round_sequencing(&parsed));
// Minor checks
checks.push(check_header_completeness(&parsed));
checks.push(check_scoreboard_math(&parsed));
checks.push(check_round_numbering(&parsed));
checks.push(check_emoji_consistency(&parsed));
// Calculate weighted score
let mut total_weight = 0u32;
let mut earned_weight = 0u32;
let mut checks_passed = 0usize;
let mut checks_failed = 0usize;
let mut critical_failures = Vec::new();
for check in &checks {
let weight = check.severity.weight();
total_weight += weight;
if check.pass {
earned_weight += weight;
checks_passed += 1;
} else {
checks_failed += 1;
if check.severity == Severity::Critical {
critical_failures.push(check.message.clone());
}
}
}
let score = if total_weight > 0 {
(earned_weight as f64) / (total_weight as f64)
} else {
1.0
};
// Build hint
let hint = if score >= 0.9 {
format!(
"Dialogue passes with score {:.1}% ({}/{} checks)",
score * 100.0,
checks_passed,
checks_passed + checks_failed
)
} else if score >= 0.7 {
format!(
"Dialogue needs attention: {:.1}% ({} issues)",
score * 100.0,
checks_failed
)
} else if score >= 0.3 {
format!(
"Dialogue has significant issues: {:.1}% ({} failures)",
score * 100.0,
checks_failed
)
} else {
format!(
"Dialogue failing: {:.1}% - {} critical issues",
score * 100.0,
critical_failures.len()
)
};
Ok(json!({
"status": "success",
"message": blue_core::voice::info(
&format!("Dialogue score: {:.1}%", score * 100.0),
Some(&hint)
),
"score": score,
"checks_passed": checks_passed,
"checks_failed": checks_failed,
"details": checks.iter().map(|c| json!({
"name": c.name,
"severity": c.severity,
"pass": c.pass,
"message": c.message,
"fix_hint": c.fix_hint
})).collect::<Vec<_>>(),
"critical_failures": critical_failures
}))
}
/// Parse dialogue content into structured form
fn parse_dialogue(content: &str) -> ParsedDialogue {
let mut parsed = ParsedDialogue::default();
// Header patterns (case-insensitive, whitespace-tolerant)
let draft_re = Regex::new(r"(?i)\*\*Draft\*\*:").unwrap();
let participants_re = Regex::new(r"(?i)\*\*Participants\*\*:").unwrap();
let status_re = Regex::new(r"(?i)\*\*Status\*\*:\s*(.+)").unwrap();
parsed.has_draft_link = draft_re.is_match(content);
parsed.has_participants = participants_re.is_match(content);
if let Some(caps) = status_re.captures(content) {
parsed.has_status = true;
parsed.status_value = Some(caps[1].trim().to_string());
}
// Scoreboard detection
let scoreboard_re = Regex::new(r"(?i)##\s*Alignment\s+Scoreboard").unwrap();
parsed.has_scoreboard = scoreboard_re.is_match(content);
// Parse scoreboard table for agents and totals
let table_row_re =
Regex::new(r"\|\s*([🧁💙]?\s*\w+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*\*\*(\d+)\*\*\s*\|").unwrap();
for caps in table_row_re.captures_iter(content) {
let agent = caps[1].trim().to_string();
let w: u32 = caps[2].parse().unwrap_or(0);
let c: u32 = caps[3].parse().unwrap_or(0);
let t: u32 = caps[4].parse().unwrap_or(0);
let r: u32 = caps[5].parse().unwrap_or(0);
let total: u32 = caps[6].parse().unwrap_or(0);
parsed.scoreboard_agents.push(agent.clone());
parsed.scoreboard_totals.insert(agent, w + c + t + r);
parsed.claimed_total = Some(total);
}
// Total ALIGNMENT line
let total_alignment_re = Regex::new(r"(?i)\*\*Total\s+ALIGNMENT\*\*:\s*(\d+)").unwrap();
if let Some(caps) = total_alignment_re.captures(content) {
parsed.claimed_total = caps[1].parse().ok();
}
// Inventories
let perspectives_re = Regex::new(r"(?i)##\s*Perspectives\s+Inventory").unwrap();
let tensions_re = Regex::new(r"(?i)##\s*Tensions\s+Tracker").unwrap();
parsed.has_perspectives_inventory = perspectives_re.is_match(content);
parsed.has_tensions_tracker = tensions_re.is_match(content);
// Rounds (case-insensitive)
let round_re = Regex::new(r"(?i)##\s*Round\s+(\d+)").unwrap();
for caps in round_re.captures_iter(content) {
if let Ok(n) = caps[1].parse::<u32>() {
parsed.rounds.push(n);
}
}
parsed.rounds.sort();
// Agent headers within rounds
let agent_re = Regex::new(r"###\s*(\w+)\s*([🧁💙]?)").unwrap();
for caps in agent_re.captures_iter(content) {
let agent = caps[1].to_string();
let emoji = caps
.get(2)
.map(|m: regex::Match| m.as_str().to_string())
.unwrap_or_default();
if !emoji.is_empty() {
parsed.agent_emojis.insert(agent.clone(), emoji);
}
}
// Perspective markers (case-insensitive, whitespace-tolerant)
let perspective_marker_re = Regex::new(r"(?i)\[\s*PERSPECTIVE\s+P(\d{2})\s*:").unwrap();
for caps in perspective_marker_re.captures_iter(content) {
parsed.perspective_ids.push(format!("P{}", &caps[1]));
}
// Tension markers
let tension_marker_re = Regex::new(r"(?i)\[\s*TENSION\s+T(\d+)\s*:").unwrap();
for caps in tension_marker_re.captures_iter(content) {
parsed.tension_ids.push(format!("T{}", &caps[1]));
}
// Resolved markers
let resolved_marker_re = Regex::new(r"(?i)\[\s*RESOLVED\s+T(\d+)").unwrap();
for caps in resolved_marker_re.captures_iter(content) {
parsed.resolved_ids.push(format!("T{}", &caps[1]));
}
parsed
}
// ===== CRITICAL CHECKS =====
fn check_rounds_present(parsed: &ParsedDialogue) -> CheckResult {
let pass = !parsed.rounds.is_empty();
CheckResult {
name: "rounds-present",
severity: Severity::Critical,
pass,
message: if pass {
format!("Found {} round(s)", parsed.rounds.len())
} else {
"No rounds found in dialogue".to_string()
},
fix_hint: if pass {
None
} else {
Some("Add at least one '## Round N' section with agent responses".to_string())
},
}
}
fn check_markers_parseable(content: &str) -> CheckResult {
// Check for malformed markers that might indicate parsing issues
let malformed_perspective = Regex::new(r"\[PERSPECTIV[^E]").unwrap();
let malformed_tension = Regex::new(r"\[TENSIO[^N]").unwrap();
let has_malformed =
malformed_perspective.is_match(content) || malformed_tension.is_match(content);
CheckResult {
name: "markers-parseable",
severity: Severity::Critical,
pass: !has_malformed,
message: if has_malformed {
"Found potentially malformed markers".to_string()
} else {
"All markers appear well-formed".to_string()
},
fix_hint: if has_malformed {
Some("Check spelling: [PERSPECTIVE Pnn: ...] and [TENSION Tn: ...]".to_string())
} else {
None
},
}
}
// ===== MAJOR CHECKS =====
fn check_convergence_gate(parsed: &ParsedDialogue) -> CheckResult {
// Only applies if status indicates convergence
let is_converged = parsed
.status_value
.as_ref()
.map(|s| s.to_lowercase().contains("converge"))
.unwrap_or(false);
if !is_converged {
return CheckResult {
name: "convergence-gate",
severity: Severity::Major,
pass: true,
message: "Not converged yet, gate not applicable".to_string(),
fix_hint: None,
};
}
// Check all tensions have matching resolved
let tension_set: HashSet<_> = parsed.tension_ids.iter().collect();
let resolved_set: HashSet<_> = parsed.resolved_ids.iter().collect();
let unresolved: Vec<_> = tension_set
.difference(&resolved_set)
.map(|s| s.as_str())
.collect();
let pass = unresolved.is_empty();
CheckResult {
name: "convergence-gate",
severity: Severity::Major,
pass,
message: if pass {
"All tensions resolved before convergence".to_string()
} else {
format!("Unresolved tensions: {}", unresolved.join(", "))
},
fix_hint: if pass {
None
} else {
Some(format!(
"Add [RESOLVED {}] markers for each unresolved tension",
unresolved.join(", ")
))
},
}
}
fn check_scoreboard_present(parsed: &ParsedDialogue) -> CheckResult {
CheckResult {
name: "scoreboard-present",
severity: Severity::Major,
pass: parsed.has_scoreboard,
message: if parsed.has_scoreboard {
"Scoreboard section found".to_string()
} else {
"Missing '## Alignment Scoreboard' section".to_string()
},
fix_hint: if parsed.has_scoreboard {
None
} else {
Some("Add '## Alignment Scoreboard' section with W/C/T/R columns".to_string())
},
}
}
fn check_inventories_present(parsed: &ParsedDialogue) -> CheckResult {
let has_both = parsed.has_perspectives_inventory && parsed.has_tensions_tracker;
let missing = match (
parsed.has_perspectives_inventory,
parsed.has_tensions_tracker,
) {
(false, false) => "Perspectives Inventory, Tensions Tracker",
(false, true) => "Perspectives Inventory",
(true, false) => "Tensions Tracker",
(true, true) => "",
};
CheckResult {
name: "inventories-present",
severity: Severity::Major,
pass: has_both,
message: if has_both {
"Both inventory sections present".to_string()
} else {
format!("Missing: {}", missing)
},
fix_hint: if has_both {
None
} else {
Some(format!("Add '## {}' section(s)", missing))
},
}
}
fn check_id_uniqueness(parsed: &ParsedDialogue) -> CheckResult {
let mut perspective_seen: HashSet<String> = HashSet::new();
let mut tension_seen: HashSet<String> = HashSet::new();
let mut duplicates = Vec::new();
for id in &parsed.perspective_ids {
if !perspective_seen.insert(id.clone()) {
duplicates.push(id.clone());
}
}
for id in &parsed.tension_ids {
if !tension_seen.insert(id.clone()) {
duplicates.push(id.clone());
}
}
let pass = duplicates.is_empty();
CheckResult {
name: "id-uniqueness",
severity: Severity::Major,
pass,
message: if pass {
"All perspective/tension IDs are unique".to_string()
} else {
format!("Duplicate IDs: {}", duplicates.join(", "))
},
fix_hint: if pass {
None
} else {
Some("Renumber duplicate IDs to be unique".to_string())
},
}
}
fn check_round_sequencing(parsed: &ParsedDialogue) -> CheckResult {
if parsed.rounds.is_empty() {
return CheckResult {
name: "round-sequencing",
severity: Severity::Major,
pass: false,
message: "No rounds to check".to_string(),
fix_hint: Some("Add '## Round 1' section".to_string()),
};
}
// Check rounds are sequential starting from 1
let expected: Vec<u32> = (1..=parsed.rounds.len() as u32).collect();
let pass = parsed.rounds == expected;
CheckResult {
name: "round-sequencing",
severity: Severity::Major,
pass,
message: if pass {
format!("Rounds 1-{} sequential", parsed.rounds.len())
} else {
format!(
"Round sequence gap: found {:?}, expected {:?}",
parsed.rounds, expected
)
},
fix_hint: if pass {
None
} else {
Some("Renumber rounds sequentially starting from 1".to_string())
},
}
}
// ===== MINOR CHECKS =====
fn check_header_completeness(parsed: &ParsedDialogue) -> CheckResult {
let missing: Vec<&str> = [
(!parsed.has_draft_link, "Draft"),
(!parsed.has_participants, "Participants"),
(!parsed.has_status, "Status"),
]
.iter()
.filter_map(|(missing, name)| if *missing { Some(*name) } else { None })
.collect();
let pass = missing.is_empty();
CheckResult {
name: "header-completeness",
severity: Severity::Minor,
pass,
message: if pass {
"All header fields present".to_string()
} else {
format!("Missing header fields: {}", missing.join(", "))
},
fix_hint: if pass {
None
} else {
Some(format!(
"Add **{}**: fields to header",
missing.join("**, **")
))
},
}
}
fn check_scoreboard_math(parsed: &ParsedDialogue) -> CheckResult {
if !parsed.has_scoreboard || parsed.scoreboard_totals.is_empty() {
return CheckResult {
name: "scoreboard-math",
severity: Severity::Minor,
pass: true,
message: "No scoreboard to verify".to_string(),
fix_hint: None,
};
}
// Sum up all agent totals
let computed_total: u32 = parsed.scoreboard_totals.values().sum();
let claimed = parsed.claimed_total.unwrap_or(0);
// Allow some tolerance for parsing issues
let pass = (computed_total as i32 - claimed as i32).abs() <= 2;
CheckResult {
name: "scoreboard-math",
severity: Severity::Minor,
pass,
message: if pass {
format!("Total ALIGNMENT: {}", claimed)
} else {
format!(
"Math mismatch: claimed {}, computed {}",
claimed, computed_total
)
},
fix_hint: if pass {
None
} else {
Some(format!(
"Update **Total ALIGNMENT**: {} to match sum of agent scores",
computed_total
))
},
}
}
fn check_round_numbering(parsed: &ParsedDialogue) -> CheckResult {
if parsed.rounds.is_empty() {
return CheckResult {
name: "round-numbering",
severity: Severity::Minor,
pass: true,
message: "No rounds to check".to_string(),
fix_hint: None,
};
}
let starts_at_one = parsed.rounds.first() == Some(&1);
CheckResult {
name: "round-numbering",
severity: Severity::Minor,
pass: starts_at_one,
message: if starts_at_one {
"Rounds start at 1".to_string()
} else {
format!("Rounds don't start at 1: {:?}", parsed.rounds)
},
fix_hint: if starts_at_one {
None
} else {
Some("Start round numbering at 1".to_string())
},
}
}
fn check_emoji_consistency(parsed: &ParsedDialogue) -> CheckResult {
let has_emojis = !parsed.agent_emojis.is_empty();
CheckResult {
name: "emoji-consistency",
severity: Severity::Minor,
pass: has_emojis,
message: if has_emojis {
format!("Found {} agents with emoji", parsed.agent_emojis.len())
} else {
"No agent emojis found".to_string()
},
fix_hint: if has_emojis {
None
} else {
Some("Add emoji to agent headers: ### Muffin 🧁".to_string())
},
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_dialogue_rounds() {
let content = r#"
## Round 1
### Muffin 🧁
Some content
## Round 2
### Cupcake 🧁
More content
"#;
let parsed = parse_dialogue(content);
assert_eq!(parsed.rounds, vec![1, 2]);
}
#[test]
fn test_check_rounds_present_pass() {
let mut parsed = ParsedDialogue::default();
parsed.rounds = vec![1, 2];
let result = check_rounds_present(&parsed);
assert!(result.pass);
}
#[test]
fn test_check_rounds_present_fail() {
let parsed = ParsedDialogue::default();
let result = check_rounds_present(&parsed);
assert!(!result.pass);
assert!(result.fix_hint.is_some());
}
}

View file

@ -5,9 +5,12 @@
pub mod adr;
pub mod audit;
pub mod decision;
pub mod dialogue;
pub mod dialogue_lint;
pub mod env;
pub mod guide;
pub mod lint;
pub mod playwright;
pub mod pr;
pub mod prd;
pub mod release;

View file

@ -0,0 +1,452 @@
//! Playwright verification handler
//!
//! Provides browser-based test verification using Playwright MCP.
//! Generates verification plans that Claude can execute via Playwright tools.
use regex::Regex;
use serde::Serialize;
use serde_json::{json, Value};
use crate::error::ServerError;
/// A single verification step for Playwright execution
#[derive(Debug, Clone, Serialize)]
pub struct VerificationStep {
pub step: usize,
pub action: VerificationAction,
pub description: String,
pub mcp_tool: String,
pub assertion: String,
}
/// Types of verification actions
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum VerificationAction {
Navigate,
Snapshot,
Screenshot,
Click,
Fill,
Resize,
}
/// URL safety level
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum UrlSafetyLevel {
Localhost,
Development,
Staging,
Production,
Unknown,
}
/// Handle blue_playwright_verify
pub fn handle_verify(args: &Value) -> Result<Value, ServerError> {
let task = args
.get("task")
.and_then(|v| v.as_str())
.ok_or(ServerError::InvalidParams)?;
let base_url = args
.get("base_url")
.and_then(|v| v.as_str())
.ok_or(ServerError::InvalidParams)?;
let path = args.get("path").and_then(|v| v.as_str());
let allow_staging = args
.get("allow_staging")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let expected_outcomes: Vec<String> = args
.get("expected_outcomes")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
// Validate URL safety
let safety_level = classify_url_safety(base_url);
validate_url_safety(&safety_level, allow_staging)?;
// Build full URL
let target_url = if let Some(p) = path {
format!("{}{}", base_url.trim_end_matches('/'), p)
} else {
base_url.to_string()
};
// Generate verification steps from task description
let steps = generate_verification_steps(task, &target_url);
// Generate the Playwright MCP tool sequence
let playwright_sequence = generate_playwright_sequence(&steps, &target_url);
let hint = format!(
"Generated {} verification steps for '{}'. Execute the playwright_sequence in order.",
steps.len(),
task
);
Ok(json!({
"status": "success",
"message": blue_core::voice::info(
&format!("{} verification steps generated", steps.len()),
Some(&hint)
),
"verification_plan": {
"task": task,
"target_url": target_url,
"safety_level": safety_level,
"steps": steps,
"expected_outcomes": expected_outcomes
},
"playwright_sequence": playwright_sequence,
"safety": {
"url_safety_level": safety_level,
"requires_approval": safety_level != UrlSafetyLevel::Localhost,
"blocked": safety_level == UrlSafetyLevel::Production
},
"evidence_guidance": {
"screenshot_tool": "mcp__playwright__browser_take_screenshot",
"snapshot_tool": "mcp__playwright__browser_snapshot",
"recommended": "Take screenshots before and after key actions"
},
"suggested_tools": [
"mcp__playwright__browser_navigate",
"mcp__playwright__browser_snapshot",
"mcp__playwright__browser_take_screenshot"
]
}))
}
/// Classify URL safety level
fn classify_url_safety(url: &str) -> UrlSafetyLevel {
let lower = url.to_lowercase();
// Check for localhost first
if lower.contains("localhost") || lower.contains("127.0.0.1") || lower.contains("[::1]") {
return UrlSafetyLevel::Localhost;
}
// Production patterns - blocked
let production_patterns = [
"prod.", ".prod", "production.", ".production", "live.", ".live", "www.",
];
if production_patterns.iter().any(|p| lower.contains(p)) {
return UrlSafetyLevel::Production;
}
// Development patterns
let dev_patterns = [
"dev.", ".dev", "development.", ".development", "local.", ".local", ":3000", ":3001",
":5173", ":5174", ":8080", ":8000", ":4200",
];
if dev_patterns.iter().any(|p| lower.contains(p)) {
return UrlSafetyLevel::Development;
}
// Staging patterns
let staging_patterns = [
"staging.", ".staging", "stage.", ".stage", "test.", ".test", "qa.", ".qa", "uat.", ".uat",
"preview.", ".preview",
];
if staging_patterns.iter().any(|p| lower.contains(p)) {
return UrlSafetyLevel::Staging;
}
// If it looks like an IP address with a port, likely development
if let Ok(re) = Regex::new(r"\d+\.\d+\.\d+\.\d+:\d+") {
if re.is_match(&lower) {
return UrlSafetyLevel::Development;
}
}
UrlSafetyLevel::Unknown
}
/// Validate URL safety and return error if blocked
fn validate_url_safety(safety_level: &UrlSafetyLevel, allow_staging: bool) -> Result<(), ServerError> {
match safety_level {
UrlSafetyLevel::Localhost | UrlSafetyLevel::Development => Ok(()),
UrlSafetyLevel::Staging => {
if allow_staging {
Ok(())
} else {
Err(ServerError::CommandFailed(
"Staging URLs require explicit approval. Pass allow_staging=true to proceed."
.to_string(),
))
}
}
UrlSafetyLevel::Production => Err(ServerError::CommandFailed(
"Cannot run Playwright verification against production URLs. Use localhost or staging."
.to_string(),
)),
UrlSafetyLevel::Unknown => Err(ServerError::CommandFailed(
"Unknown URL safety level. Use localhost for testing or explicitly allow staging."
.to_string(),
)),
}
}
/// Generate verification steps based on task description
fn generate_verification_steps(task: &str, target_url: &str) -> Vec<VerificationStep> {
let lower = task.to_lowercase();
let mut steps = Vec::new();
let mut step_num = 0;
// Always start with navigation
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Navigate,
description: format!("Navigate to {}", target_url),
mcp_tool: "mcp__playwright__browser_navigate".to_string(),
assertion: "Page loads successfully".to_string(),
});
// Always take initial snapshot
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Snapshot,
description: "Capture initial page state".to_string(),
mcp_tool: "mcp__playwright__browser_snapshot".to_string(),
assertion: "Page structure is visible".to_string(),
});
// Page load verification
if lower.contains("page load") || lower.contains("loads correctly") || lower.contains("displays") {
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Screenshot,
description: "Capture screenshot as page load evidence".to_string(),
mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(),
assertion: "Page rendered correctly".to_string(),
});
}
// Form interactions
if lower.contains("form") || lower.contains("input") || lower.contains("fill") {
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Snapshot,
description: "Identify form fields in page structure".to_string(),
mcp_tool: "mcp__playwright__browser_snapshot".to_string(),
assertion: "Form fields are accessible".to_string(),
});
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Fill,
description: "Fill form fields with test data".to_string(),
mcp_tool: "mcp__playwright__browser_fill".to_string(),
assertion: "Form accepts input".to_string(),
});
}
// Click interactions
if lower.contains("click") || lower.contains("button") {
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Click,
description: "Click the target element".to_string(),
mcp_tool: "mcp__playwright__browser_click".to_string(),
assertion: "Element responds to click".to_string(),
});
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Snapshot,
description: "Capture state after click".to_string(),
mcp_tool: "mcp__playwright__browser_snapshot".to_string(),
assertion: "Expected state change occurred".to_string(),
});
}
// Modal / dialog testing
if lower.contains("modal") || lower.contains("dialog") || lower.contains("popup") {
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Click,
description: "Open modal/dialog".to_string(),
mcp_tool: "mcp__playwright__browser_click".to_string(),
assertion: "Modal opens".to_string(),
});
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Screenshot,
description: "Screenshot modal for evidence".to_string(),
mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(),
assertion: "Modal state captured".to_string(),
});
}
// Responsive / mobile testing
if lower.contains("responsive") || lower.contains("mobile") || lower.contains("viewport") {
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Resize,
description: "Resize to mobile viewport (375x667)".to_string(),
mcp_tool: "mcp__playwright__browser_resize".to_string(),
assertion: "Viewport resized to mobile".to_string(),
});
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Screenshot,
description: "Screenshot mobile layout".to_string(),
mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(),
assertion: "Mobile layout captured".to_string(),
});
}
// Login testing
if lower.contains("login") || lower.contains("sign in") || lower.contains("authentication") {
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Fill,
description: "Fill login credentials".to_string(),
mcp_tool: "mcp__playwright__browser_fill".to_string(),
assertion: "Credentials entered".to_string(),
});
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Click,
description: "Submit login form".to_string(),
mcp_tool: "mcp__playwright__browser_click".to_string(),
assertion: "Login submitted".to_string(),
});
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Snapshot,
description: "Capture post-login state".to_string(),
mcp_tool: "mcp__playwright__browser_snapshot".to_string(),
assertion: "Login result visible".to_string(),
});
}
// Always end with a final screenshot for evidence
step_num += 1;
steps.push(VerificationStep {
step: step_num,
action: VerificationAction::Screenshot,
description: "Final screenshot for verification evidence".to_string(),
mcp_tool: "mcp__playwright__browser_take_screenshot".to_string(),
assertion: "Final state captured".to_string(),
});
steps
}
/// Generate the Playwright MCP tool sequence for Claude to execute
fn generate_playwright_sequence(steps: &[VerificationStep], target_url: &str) -> Vec<Value> {
steps
.iter()
.map(|step| {
let params = match step.action {
VerificationAction::Navigate => json!({
"url": target_url
}),
VerificationAction::Resize => json!({
"width": 375,
"height": 667
}),
VerificationAction::Fill => json!({
"selector": "[element selector - identify from snapshot]",
"value": "[test value]"
}),
VerificationAction::Click => json!({
"selector": "[element selector - identify from snapshot]"
}),
_ => json!({}),
};
json!({
"step": step.step,
"tool": step.mcp_tool,
"description": step.description,
"params": params,
"assertion": step.assertion
})
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_url_safety_localhost() {
assert_eq!(
classify_url_safety("http://localhost:3000"),
UrlSafetyLevel::Localhost
);
assert_eq!(
classify_url_safety("http://127.0.0.1:8080"),
UrlSafetyLevel::Localhost
);
}
#[test]
fn test_classify_url_safety_development() {
assert_eq!(
classify_url_safety("http://dev.example.com"),
UrlSafetyLevel::Development
);
assert_eq!(
classify_url_safety("http://192.168.1.100:3000"),
UrlSafetyLevel::Development
);
}
#[test]
fn test_classify_url_safety_staging() {
assert_eq!(
classify_url_safety("https://staging.example.com"),
UrlSafetyLevel::Staging
);
}
#[test]
fn test_classify_url_safety_production() {
assert_eq!(
classify_url_safety("https://www.example.com"),
UrlSafetyLevel::Production
);
}
#[test]
fn test_validate_url_safety() {
assert!(validate_url_safety(&UrlSafetyLevel::Localhost, false).is_ok());
assert!(validate_url_safety(&UrlSafetyLevel::Staging, false).is_err());
assert!(validate_url_safety(&UrlSafetyLevel::Staging, true).is_ok());
assert!(validate_url_safety(&UrlSafetyLevel::Production, true).is_err());
}
#[test]
fn test_generate_verification_steps() {
let steps = generate_verification_steps(
"Verify the login page loads correctly",
"http://localhost:3000/login",
);
assert!(steps.len() >= 3);
assert!(matches!(steps[0].action, VerificationAction::Navigate));
}
}

View file

@ -1138,6 +1138,70 @@ impl BlueServer {
}
}
}
},
// Phase 8: Dialogue tools
{
"name": "blue_dialogue_lint",
"description": "Validate dialogue documents against the blue-dialogue-pattern. Returns weighted consistency score.",
"inputSchema": {
"type": "object",
"properties": {
"file_path": {
"type": "string",
"description": "Path to the .dialogue.md file"
}
},
"required": ["file_path"]
}
},
{
"name": "blue_extract_dialogue",
"description": "Extract dialogue content from spawned agent JSONL outputs.",
"inputSchema": {
"type": "object",
"properties": {
"task_id": {
"type": "string",
"description": "Task ID (e.g., 'a6dc70c') - resolves via symlink in /tmp/claude/.../tasks/"
},
"file_path": {
"type": "string",
"description": "Absolute path to JSONL file"
}
}
}
},
// Phase 8: Playwright verification
{
"name": "blue_playwright_verify",
"description": "Generate a verification plan for browser-based testing using Playwright MCP.",
"inputSchema": {
"type": "object",
"properties": {
"task": {
"type": "string",
"description": "Description of the verification task"
},
"base_url": {
"type": "string",
"description": "Base URL for the application (e.g., 'http://localhost:3000')"
},
"path": {
"type": "string",
"description": "Specific path to navigate to (e.g., '/login')"
},
"expected_outcomes": {
"type": "array",
"items": { "type": "string" },
"description": "Expected outcomes to verify"
},
"allow_staging": {
"type": "boolean",
"description": "Allow staging URLs (default: false, only localhost allowed)"
}
},
"required": ["task", "base_url"]
}
}
]
}))
@ -1215,6 +1279,11 @@ impl BlueServer {
"blue_staging_create" => self.handle_staging_create(&call.arguments),
"blue_staging_destroy" => self.handle_staging_destroy(&call.arguments),
"blue_staging_cost" => self.handle_staging_cost(&call.arguments),
// Phase 8: Dialogue handlers
"blue_dialogue_lint" => self.handle_dialogue_lint(&call.arguments),
"blue_extract_dialogue" => self.handle_extract_dialogue(&call.arguments),
// Phase 8: Playwright handler
"blue_playwright_verify" => self.handle_playwright_verify(&call.arguments),
_ => Err(ServerError::ToolNotFound(call.name)),
}?;
@ -1859,6 +1928,23 @@ impl BlueServer {
let state = self.ensure_state()?;
crate::handlers::staging::handle_cost(args, &state.home.root)
}
// Phase 8: Dialogue and Playwright handlers
fn handle_dialogue_lint(&mut self, args: &Option<Value>) -> Result<Value, ServerError> {
let args = args.as_ref().ok_or(ServerError::InvalidParams)?;
crate::handlers::dialogue_lint::handle_dialogue_lint(args)
}
fn handle_extract_dialogue(&mut self, args: &Option<Value>) -> Result<Value, ServerError> {
let args = args.as_ref().ok_or(ServerError::InvalidParams)?;
crate::handlers::dialogue::handle_extract_dialogue(args)
}
fn handle_playwright_verify(&mut self, args: &Option<Value>) -> Result<Value, ServerError> {
let args = args.as_ref().ok_or(ServerError::InvalidParams)?;
crate::handlers::playwright::handle_verify(args)
}
}
impl Default for BlueServer {