#!/usr/bin/env bash # # Phase 1 Validation: Foundation Infrastructure # # Validates that RFC 0039 components are deployed and healthy: # - CockroachDB cluster (3 nodes, all live) # - cert-manager certificates (all Ready) # - S3 buckets (4 buckets created) # set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' PASSED=0 FAILED=0 WARNINGS=0 log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_pass() { echo -e "${GREEN}[PASS]${NC} $1" ((PASSED++)) } log_fail() { echo -e "${RED}[FAIL]${NC} $1" ((FAILED++)) } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1" ((WARNINGS++)) } check_cockroachdb_cluster() { log_info "Checking CockroachDB cluster health..." # Check if namespace exists if ! kubectl get namespace cockroachdb &> /dev/null; then log_fail "cockroachdb namespace does not exist" return fi # Check pod count local pod_count pod_count=$(kubectl -n cockroachdb get pods -l app=cockroachdb -o json | jq '.items | length') if [ "$pod_count" -ge 3 ]; then log_pass "CockroachDB has $pod_count pods running (expected: >= 3)" else log_fail "CockroachDB has only $pod_count pods (expected: >= 3)" fi # Check pod health local ready_pods ready_pods=$(kubectl -n cockroachdb get pods -l app=cockroachdb -o json | jq '[.items[] | select(.status.phase == "Running") | select(.status.containerStatuses[]?.ready == true)] | length') if [ "$ready_pods" -ge 3 ]; then log_pass "CockroachDB has $ready_pods ready pods" else log_fail "CockroachDB has only $ready_pods ready pods (expected: >= 3)" fi # Check node status via cockroach CLI local node_status node_status=$(kubectl -n cockroachdb exec cockroachdb-0 -- cockroach node status --certs-dir=/cockroach/cockroach-certs --format=json 2>/dev/null || echo '[]') local live_nodes live_nodes=$(echo "$node_status" | jq 'length') if [ "$live_nodes" -ge 3 ]; then log_pass "CockroachDB cluster has $live_nodes live nodes" else log_fail "CockroachDB cluster has only $live_nodes live nodes (expected: >= 3)" fi } check_certificates() { log_info "Checking cert-manager certificates..." # Check if cert-manager namespace exists if ! kubectl get namespace cert-manager &> /dev/null; then log_fail "cert-manager namespace does not exist" return fi # Check cert-manager pods local cm_ready cm_ready=$(kubectl -n cert-manager get pods -l app.kubernetes.io/instance=cert-manager -o json | jq '[.items[] | select(.status.phase == "Running")] | length') if [ "$cm_ready" -ge 1 ]; then log_pass "cert-manager is running ($cm_ready pods)" else log_fail "cert-manager is not running" fi # Check all certificates across namespaces local all_certs all_certs=$(kubectl get certificates -A -o json 2>/dev/null || echo '{"items":[]}') local total_certs total_certs=$(echo "$all_certs" | jq '.items | length') local ready_certs ready_certs=$(echo "$all_certs" | jq '[.items[] | select(.status.conditions[]? | select(.type == "Ready" and .status == "True"))] | length') if [ "$total_certs" -eq 0 ]; then log_warn "No certificates found (may not be required yet)" elif [ "$ready_certs" -eq "$total_certs" ]; then log_pass "All certificates are ready ($ready_certs/$total_certs)" else log_fail "Some certificates are not ready ($ready_certs/$total_certs ready)" echo " Non-ready certificates:" echo "$all_certs" | jq -r '.items[] | select(.status.conditions[]? | select(.type == "Ready" and .status != "True")) | " - " + .metadata.namespace + "/" + .metadata.name' fi # Check ClusterIssuers local issuers issuers=$(kubectl get clusterissuers -o json 2>/dev/null || echo '{"items":[]}') local ready_issuers ready_issuers=$(echo "$issuers" | jq '[.items[] | select(.status.conditions[]? | select(.type == "Ready" and .status == "True"))] | length') local total_issuers total_issuers=$(echo "$issuers" | jq '.items | length') if [ "$total_issuers" -eq 0 ]; then log_warn "No ClusterIssuers found" elif [ "$ready_issuers" -eq "$total_issuers" ]; then log_pass "All ClusterIssuers are ready ($ready_issuers/$total_issuers)" else log_fail "Some ClusterIssuers are not ready ($ready_issuers/$total_issuers)" fi } check_s3_buckets() { log_info "Checking S3 buckets..." # Expected buckets local expected_buckets=("email" "logs" "traces" "lfs") local found=0 # List S3 buckets local buckets buckets=$(aws s3 ls 2>/dev/null | awk '{print $3}' || echo "") for expected in "${expected_buckets[@]}"; do if echo "$buckets" | grep -q "coherence.*$expected"; then ((found++)) fi done if [ "$found" -eq ${#expected_buckets[@]} ]; then log_pass "All expected S3 buckets found ($found/${#expected_buckets[@]})" elif [ "$found" -gt 0 ]; then log_warn "Only $found/${#expected_buckets[@]} expected S3 buckets found" else log_fail "No expected S3 buckets found" fi } check_karpenter() { log_info "Checking Karpenter..." # Check if karpenter namespace exists if ! kubectl get namespace karpenter &> /dev/null; then log_warn "karpenter namespace does not exist (may be using Fargate only)" return fi # Check Karpenter pods local kp_ready kp_ready=$(kubectl -n karpenter get pods -l app.kubernetes.io/name=karpenter -o json 2>/dev/null | jq '[.items[] | select(.status.phase == "Running")] | length') if [ "$kp_ready" -ge 1 ]; then log_pass "Karpenter is running ($kp_ready pods)" else log_fail "Karpenter is not running" fi # Check NodePools local nodepools nodepools=$(kubectl get nodepools -o json 2>/dev/null || echo '{"items":[]}') local np_count np_count=$(echo "$nodepools" | jq '.items | length') if [ "$np_count" -ge 1 ]; then log_pass "Karpenter NodePools configured ($np_count)" else log_warn "No Karpenter NodePools found" fi } print_summary() { echo "" echo "========================================" echo "Phase 1 Validation Summary" echo "========================================" echo -e " ${GREEN}Passed:${NC} $PASSED" echo -e " ${RED}Failed:${NC} $FAILED" echo -e " ${YELLOW}Warnings:${NC} $WARNINGS" echo "========================================" if [ "$FAILED" -gt 0 ]; then echo "" echo -e "${RED}Phase 1 validation FAILED${NC}" echo "Please fix the issues above before proceeding to Phase 2." exit 1 elif [ "$WARNINGS" -gt 0 ]; then echo "" echo -e "${YELLOW}Phase 1 validation passed with warnings${NC}" echo "Review warnings above. You may proceed to Phase 2." exit 0 else echo "" echo -e "${GREEN}Phase 1 validation PASSED${NC}" echo "You may proceed to Phase 2." exit 0 fi } main() { echo "========================================" echo "Phase 1 Validation: Foundation Infrastructure" echo "========================================" echo "" check_cockroachdb_cluster check_certificates check_s3_buckets check_karpenter print_summary } main "$@"