Hearth is the infrastructure home for the letemcook ecosystem. Ported from coherence-mcp/infra: - Terraform modules (VPC, EKS, IAM, NLB, S3, storage) - Kubernetes manifests (Forgejo, ingress, cert-manager, karpenter) - Deployment scripts (phased rollout) Status: Not deployed. EKS cluster needs to be provisioned. Next steps: 1. Bootstrap terraform backend 2. Deploy phase 1 (foundation) 3. Deploy phase 2 (core services including Forgejo) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
190 lines
5.3 KiB
Bash
Executable file
190 lines
5.3 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
#
|
|
# Phase 4: Observability Stack Deployment (RFC 0042)
|
|
#
|
|
# This script deploys the observability stack including:
|
|
# - Prometheus for metrics collection
|
|
# - Grafana for visualization
|
|
# - Loki for log aggregation
|
|
# - Alertmanager for alert management
|
|
#
|
|
# Usage:
|
|
# ./deploy-phase4-observability.sh [--dry-run]
|
|
#
|
|
# Prerequisites:
|
|
# - Phase 3 (DNS and Email) should be deployed
|
|
# - kubectl configured for EKS cluster
|
|
#
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
INFRA_DIR="$(dirname "$SCRIPT_DIR")"
|
|
K8S_DIR="$INFRA_DIR/kubernetes"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
# Flags
|
|
DRY_RUN=false
|
|
|
|
# Parse arguments
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--dry-run)
|
|
DRY_RUN=true
|
|
shift
|
|
;;
|
|
-h|--help)
|
|
echo "Usage: $0 [--dry-run]"
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo -e "${RED}Unknown option: $1${NC}"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
log_info() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
log_success() {
|
|
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
|
}
|
|
|
|
log_warn() {
|
|
echo -e "${YELLOW}[WARN]${NC} $1"
|
|
}
|
|
|
|
log_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
|
}
|
|
|
|
run_cmd() {
|
|
if [ "$DRY_RUN" = true ]; then
|
|
echo -e "${YELLOW}[DRY-RUN]${NC} Would run: $*"
|
|
else
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
check_prerequisites() {
|
|
log_info "Checking prerequisites..."
|
|
|
|
# Check kubectl connectivity
|
|
if ! kubectl cluster-info &> /dev/null; then
|
|
log_error "kubectl not connected to cluster."
|
|
exit 1
|
|
fi
|
|
|
|
# Check Keycloak is running (for SSO)
|
|
if ! kubectl -n keycloak get pods -l app=keycloak -o jsonpath='{.items[0].status.phase}' 2>/dev/null | grep -q Running; then
|
|
log_warn "Keycloak not running. SSO integration may not work."
|
|
fi
|
|
|
|
log_success "All prerequisites met"
|
|
}
|
|
|
|
deploy_observability_stack() {
|
|
log_info "Deploying observability stack..."
|
|
|
|
# Deploy entire observability stack
|
|
run_cmd kubectl apply -k "$K8S_DIR/observability/"
|
|
|
|
# Wait for each component
|
|
if [ "$DRY_RUN" = false ]; then
|
|
log_info "Waiting for Prometheus pods to be ready..."
|
|
kubectl -n observability wait --for=condition=ready pod -l app=prometheus --timeout=300s || true
|
|
|
|
log_info "Waiting for Grafana pods to be ready..."
|
|
kubectl -n observability wait --for=condition=ready pod -l app=grafana --timeout=300s || true
|
|
|
|
log_info "Waiting for Loki pods to be ready..."
|
|
kubectl -n observability wait --for=condition=ready pod -l app=loki --timeout=300s || true
|
|
|
|
log_info "Waiting for Alertmanager pods to be ready..."
|
|
kubectl -n observability wait --for=condition=ready pod -l app=alertmanager --timeout=300s || true
|
|
fi
|
|
|
|
log_success "Observability stack deployment complete"
|
|
}
|
|
|
|
configure_grafana() {
|
|
log_info "Configuring Grafana..."
|
|
|
|
if [ "$DRY_RUN" = false ]; then
|
|
# Get Grafana pod
|
|
local grafana_pod
|
|
grafana_pod=$(kubectl -n observability get pods -l app=grafana -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
|
|
|
|
if [ -n "$grafana_pod" ]; then
|
|
log_info "Grafana pod: $grafana_pod"
|
|
|
|
# Datasources and dashboards are typically provisioned via ConfigMaps
|
|
log_info "Checking datasources..."
|
|
kubectl -n observability get configmap grafana-datasources -o yaml 2>/dev/null | head -20 || log_warn "No datasources ConfigMap found"
|
|
|
|
log_info "Checking dashboards..."
|
|
kubectl -n observability get configmap -l grafana_dashboard=1 2>/dev/null || log_warn "No dashboard ConfigMaps found"
|
|
fi
|
|
fi
|
|
|
|
log_success "Grafana configuration complete"
|
|
}
|
|
|
|
validate_phase4() {
|
|
log_info "Running Phase 4 validation..."
|
|
|
|
local validation_script="$SCRIPT_DIR/validate-phase4.sh"
|
|
if [ -x "$validation_script" ]; then
|
|
if [ "$DRY_RUN" = true ]; then
|
|
log_info "Would run validation script: $validation_script"
|
|
else
|
|
"$validation_script"
|
|
fi
|
|
else
|
|
log_warn "Validation script not found or not executable: $validation_script"
|
|
fi
|
|
}
|
|
|
|
main() {
|
|
echo "========================================"
|
|
echo "Phase 4: Observability Stack"
|
|
echo "RFC 0042 Deployment"
|
|
echo "========================================"
|
|
echo ""
|
|
|
|
if [ "$DRY_RUN" = true ]; then
|
|
log_warn "Running in DRY-RUN mode - no changes will be made"
|
|
echo ""
|
|
fi
|
|
|
|
check_prerequisites
|
|
deploy_observability_stack
|
|
configure_grafana
|
|
validate_phase4
|
|
|
|
echo ""
|
|
echo "========================================"
|
|
log_success "Phase 4 deployment complete!"
|
|
echo "========================================"
|
|
echo ""
|
|
echo "Post-deployment steps:"
|
|
echo " 1. Access Grafana via Keycloak SSO"
|
|
echo " 2. Verify Prometheus datasource is configured"
|
|
echo " 3. Verify Loki datasource is configured"
|
|
echo " 4. Check that dashboards are loaded"
|
|
echo " 5. Verify Alertmanager cluster is formed"
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo " 1. Run validate-phase4.sh to verify deployment"
|
|
echo " 2. Tag this deployment: git tag -a v0.4.0-phase4 -m 'Phase 4: Observability'"
|
|
echo " 3. Proceed to Phase 5: ./deploy-phase5-e2ee-webmail.sh (optional)"
|
|
}
|
|
|
|
main "$@"
|