hearth/scripts/deploy-phase4-observability.sh
Eric Garcia e78000831e Initial commit: Port infrastructure from coherence-mcp
Hearth is the infrastructure home for the letemcook ecosystem.

Ported from coherence-mcp/infra:
- Terraform modules (VPC, EKS, IAM, NLB, S3, storage)
- Kubernetes manifests (Forgejo, ingress, cert-manager, karpenter)
- Deployment scripts (phased rollout)

Status: Not deployed. EKS cluster needs to be provisioned.

Next steps:
1. Bootstrap terraform backend
2. Deploy phase 1 (foundation)
3. Deploy phase 2 (core services including Forgejo)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 06:06:13 -05:00

190 lines
5.3 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Phase 4: Observability Stack Deployment (RFC 0042)
#
# This script deploys the observability stack including:
# - Prometheus for metrics collection
# - Grafana for visualization
# - Loki for log aggregation
# - Alertmanager for alert management
#
# Usage:
# ./deploy-phase4-observability.sh [--dry-run]
#
# Prerequisites:
# - Phase 3 (DNS and Email) should be deployed
# - kubectl configured for EKS cluster
#
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INFRA_DIR="$(dirname "$SCRIPT_DIR")"
K8S_DIR="$INFRA_DIR/kubernetes"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Flags
DRY_RUN=false
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
-h|--help)
echo "Usage: $0 [--dry-run]"
exit 0
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
exit 1
;;
esac
done
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
run_cmd() {
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN]${NC} Would run: $*"
else
"$@"
fi
}
check_prerequisites() {
log_info "Checking prerequisites..."
# Check kubectl connectivity
if ! kubectl cluster-info &> /dev/null; then
log_error "kubectl not connected to cluster."
exit 1
fi
# Check Keycloak is running (for SSO)
if ! kubectl -n keycloak get pods -l app=keycloak -o jsonpath='{.items[0].status.phase}' 2>/dev/null | grep -q Running; then
log_warn "Keycloak not running. SSO integration may not work."
fi
log_success "All prerequisites met"
}
deploy_observability_stack() {
log_info "Deploying observability stack..."
# Deploy entire observability stack
run_cmd kubectl apply -k "$K8S_DIR/observability/"
# Wait for each component
if [ "$DRY_RUN" = false ]; then
log_info "Waiting for Prometheus pods to be ready..."
kubectl -n observability wait --for=condition=ready pod -l app=prometheus --timeout=300s || true
log_info "Waiting for Grafana pods to be ready..."
kubectl -n observability wait --for=condition=ready pod -l app=grafana --timeout=300s || true
log_info "Waiting for Loki pods to be ready..."
kubectl -n observability wait --for=condition=ready pod -l app=loki --timeout=300s || true
log_info "Waiting for Alertmanager pods to be ready..."
kubectl -n observability wait --for=condition=ready pod -l app=alertmanager --timeout=300s || true
fi
log_success "Observability stack deployment complete"
}
configure_grafana() {
log_info "Configuring Grafana..."
if [ "$DRY_RUN" = false ]; then
# Get Grafana pod
local grafana_pod
grafana_pod=$(kubectl -n observability get pods -l app=grafana -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
if [ -n "$grafana_pod" ]; then
log_info "Grafana pod: $grafana_pod"
# Datasources and dashboards are typically provisioned via ConfigMaps
log_info "Checking datasources..."
kubectl -n observability get configmap grafana-datasources -o yaml 2>/dev/null | head -20 || log_warn "No datasources ConfigMap found"
log_info "Checking dashboards..."
kubectl -n observability get configmap -l grafana_dashboard=1 2>/dev/null || log_warn "No dashboard ConfigMaps found"
fi
fi
log_success "Grafana configuration complete"
}
validate_phase4() {
log_info "Running Phase 4 validation..."
local validation_script="$SCRIPT_DIR/validate-phase4.sh"
if [ -x "$validation_script" ]; then
if [ "$DRY_RUN" = true ]; then
log_info "Would run validation script: $validation_script"
else
"$validation_script"
fi
else
log_warn "Validation script not found or not executable: $validation_script"
fi
}
main() {
echo "========================================"
echo "Phase 4: Observability Stack"
echo "RFC 0042 Deployment"
echo "========================================"
echo ""
if [ "$DRY_RUN" = true ]; then
log_warn "Running in DRY-RUN mode - no changes will be made"
echo ""
fi
check_prerequisites
deploy_observability_stack
configure_grafana
validate_phase4
echo ""
echo "========================================"
log_success "Phase 4 deployment complete!"
echo "========================================"
echo ""
echo "Post-deployment steps:"
echo " 1. Access Grafana via Keycloak SSO"
echo " 2. Verify Prometheus datasource is configured"
echo " 3. Verify Loki datasource is configured"
echo " 4. Check that dashboards are loaded"
echo " 5. Verify Alertmanager cluster is formed"
echo ""
echo "Next steps:"
echo " 1. Run validate-phase4.sh to verify deployment"
echo " 2. Tag this deployment: git tag -a v0.4.0-phase4 -m 'Phase 4: Observability'"
echo " 3. Proceed to Phase 5: ./deploy-phase5-e2ee-webmail.sh (optional)"
}
main "$@"