#!/usr/bin/env bash # # Phase 4: Observability Stack Deployment (RFC 0042) # # This script deploys the observability stack including: # - Prometheus for metrics collection # - Grafana for visualization # - Loki for log aggregation # - Alertmanager for alert management # # Usage: # ./deploy-phase4-observability.sh [--dry-run] # # Prerequisites: # - Phase 3 (DNS and Email) should be deployed # - kubectl configured for EKS cluster # set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" INFRA_DIR="$(dirname "$SCRIPT_DIR")" K8S_DIR="$INFRA_DIR/kubernetes" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # Flags DRY_RUN=false # Parse arguments while [[ $# -gt 0 ]]; do case $1 in --dry-run) DRY_RUN=true shift ;; -h|--help) echo "Usage: $0 [--dry-run]" exit 0 ;; *) echo -e "${RED}Unknown option: $1${NC}" exit 1 ;; esac done log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } run_cmd() { if [ "$DRY_RUN" = true ]; then echo -e "${YELLOW}[DRY-RUN]${NC} Would run: $*" else "$@" fi } check_prerequisites() { log_info "Checking prerequisites..." # Check kubectl connectivity if ! kubectl cluster-info &> /dev/null; then log_error "kubectl not connected to cluster." exit 1 fi # Check Keycloak is running (for SSO) if ! kubectl -n keycloak get pods -l app=keycloak -o jsonpath='{.items[0].status.phase}' 2>/dev/null | grep -q Running; then log_warn "Keycloak not running. SSO integration may not work." fi log_success "All prerequisites met" } deploy_observability_stack() { log_info "Deploying observability stack..." # Deploy entire observability stack run_cmd kubectl apply -k "$K8S_DIR/observability/" # Wait for each component if [ "$DRY_RUN" = false ]; then log_info "Waiting for Prometheus pods to be ready..." kubectl -n observability wait --for=condition=ready pod -l app=prometheus --timeout=300s || true log_info "Waiting for Grafana pods to be ready..." kubectl -n observability wait --for=condition=ready pod -l app=grafana --timeout=300s || true log_info "Waiting for Loki pods to be ready..." kubectl -n observability wait --for=condition=ready pod -l app=loki --timeout=300s || true log_info "Waiting for Alertmanager pods to be ready..." kubectl -n observability wait --for=condition=ready pod -l app=alertmanager --timeout=300s || true fi log_success "Observability stack deployment complete" } configure_grafana() { log_info "Configuring Grafana..." if [ "$DRY_RUN" = false ]; then # Get Grafana pod local grafana_pod grafana_pod=$(kubectl -n observability get pods -l app=grafana -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") if [ -n "$grafana_pod" ]; then log_info "Grafana pod: $grafana_pod" # Datasources and dashboards are typically provisioned via ConfigMaps log_info "Checking datasources..." kubectl -n observability get configmap grafana-datasources -o yaml 2>/dev/null | head -20 || log_warn "No datasources ConfigMap found" log_info "Checking dashboards..." kubectl -n observability get configmap -l grafana_dashboard=1 2>/dev/null || log_warn "No dashboard ConfigMaps found" fi fi log_success "Grafana configuration complete" } validate_phase4() { log_info "Running Phase 4 validation..." local validation_script="$SCRIPT_DIR/validate-phase4.sh" if [ -x "$validation_script" ]; then if [ "$DRY_RUN" = true ]; then log_info "Would run validation script: $validation_script" else "$validation_script" fi else log_warn "Validation script not found or not executable: $validation_script" fi } main() { echo "========================================" echo "Phase 4: Observability Stack" echo "RFC 0042 Deployment" echo "========================================" echo "" if [ "$DRY_RUN" = true ]; then log_warn "Running in DRY-RUN mode - no changes will be made" echo "" fi check_prerequisites deploy_observability_stack configure_grafana validate_phase4 echo "" echo "========================================" log_success "Phase 4 deployment complete!" echo "========================================" echo "" echo "Post-deployment steps:" echo " 1. Access Grafana via Keycloak SSO" echo " 2. Verify Prometheus datasource is configured" echo " 3. Verify Loki datasource is configured" echo " 4. Check that dashboards are loaded" echo " 5. Verify Alertmanager cluster is formed" echo "" echo "Next steps:" echo " 1. Run validate-phase4.sh to verify deployment" echo " 2. Tag this deployment: git tag -a v0.4.0-phase4 -m 'Phase 4: Observability'" echo " 3. Proceed to Phase 5: ./deploy-phase5-e2ee-webmail.sh (optional)" } main "$@"