Hearth is the infrastructure home for the letemcook ecosystem. Ported from coherence-mcp/infra: - Terraform modules (VPC, EKS, IAM, NLB, S3, storage) - Kubernetes manifests (Forgejo, ingress, cert-manager, karpenter) - Deployment scripts (phased rollout) Status: Not deployed. EKS cluster needs to be provisioned. Next steps: 1. Bootstrap terraform backend 2. Deploy phase 1 (foundation) 3. Deploy phase 2 (core services including Forgejo) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
245 lines
7.4 KiB
Bash
Executable file
245 lines
7.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
#
|
|
# Phase 1 Validation: Foundation Infrastructure
|
|
#
|
|
# Validates that RFC 0039 components are deployed and healthy:
|
|
# - CockroachDB cluster (3 nodes, all live)
|
|
# - cert-manager certificates (all Ready)
|
|
# - S3 buckets (4 buckets created)
|
|
#
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
PASSED=0
|
|
FAILED=0
|
|
WARNINGS=0
|
|
|
|
log_info() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
log_pass() {
|
|
echo -e "${GREEN}[PASS]${NC} $1"
|
|
((PASSED++))
|
|
}
|
|
|
|
log_fail() {
|
|
echo -e "${RED}[FAIL]${NC} $1"
|
|
((FAILED++))
|
|
}
|
|
|
|
log_warn() {
|
|
echo -e "${YELLOW}[WARN]${NC} $1"
|
|
((WARNINGS++))
|
|
}
|
|
|
|
check_cockroachdb_cluster() {
|
|
log_info "Checking CockroachDB cluster health..."
|
|
|
|
# Check if namespace exists
|
|
if ! kubectl get namespace cockroachdb &> /dev/null; then
|
|
log_fail "cockroachdb namespace does not exist"
|
|
return
|
|
fi
|
|
|
|
# Check pod count
|
|
local pod_count
|
|
pod_count=$(kubectl -n cockroachdb get pods -l app=cockroachdb -o json | jq '.items | length')
|
|
|
|
if [ "$pod_count" -ge 3 ]; then
|
|
log_pass "CockroachDB has $pod_count pods running (expected: >= 3)"
|
|
else
|
|
log_fail "CockroachDB has only $pod_count pods (expected: >= 3)"
|
|
fi
|
|
|
|
# Check pod health
|
|
local ready_pods
|
|
ready_pods=$(kubectl -n cockroachdb get pods -l app=cockroachdb -o json | jq '[.items[] | select(.status.phase == "Running") | select(.status.containerStatuses[]?.ready == true)] | length')
|
|
|
|
if [ "$ready_pods" -ge 3 ]; then
|
|
log_pass "CockroachDB has $ready_pods ready pods"
|
|
else
|
|
log_fail "CockroachDB has only $ready_pods ready pods (expected: >= 3)"
|
|
fi
|
|
|
|
# Check node status via cockroach CLI
|
|
local node_status
|
|
node_status=$(kubectl -n cockroachdb exec cockroachdb-0 -- cockroach node status --certs-dir=/cockroach/cockroach-certs --format=json 2>/dev/null || echo '[]')
|
|
|
|
local live_nodes
|
|
live_nodes=$(echo "$node_status" | jq 'length')
|
|
|
|
if [ "$live_nodes" -ge 3 ]; then
|
|
log_pass "CockroachDB cluster has $live_nodes live nodes"
|
|
else
|
|
log_fail "CockroachDB cluster has only $live_nodes live nodes (expected: >= 3)"
|
|
fi
|
|
}
|
|
|
|
check_certificates() {
|
|
log_info "Checking cert-manager certificates..."
|
|
|
|
# Check if cert-manager namespace exists
|
|
if ! kubectl get namespace cert-manager &> /dev/null; then
|
|
log_fail "cert-manager namespace does not exist"
|
|
return
|
|
fi
|
|
|
|
# Check cert-manager pods
|
|
local cm_ready
|
|
cm_ready=$(kubectl -n cert-manager get pods -l app.kubernetes.io/instance=cert-manager -o json | jq '[.items[] | select(.status.phase == "Running")] | length')
|
|
|
|
if [ "$cm_ready" -ge 1 ]; then
|
|
log_pass "cert-manager is running ($cm_ready pods)"
|
|
else
|
|
log_fail "cert-manager is not running"
|
|
fi
|
|
|
|
# Check all certificates across namespaces
|
|
local all_certs
|
|
all_certs=$(kubectl get certificates -A -o json 2>/dev/null || echo '{"items":[]}')
|
|
|
|
local total_certs
|
|
total_certs=$(echo "$all_certs" | jq '.items | length')
|
|
|
|
local ready_certs
|
|
ready_certs=$(echo "$all_certs" | jq '[.items[] | select(.status.conditions[]? | select(.type == "Ready" and .status == "True"))] | length')
|
|
|
|
if [ "$total_certs" -eq 0 ]; then
|
|
log_warn "No certificates found (may not be required yet)"
|
|
elif [ "$ready_certs" -eq "$total_certs" ]; then
|
|
log_pass "All certificates are ready ($ready_certs/$total_certs)"
|
|
else
|
|
log_fail "Some certificates are not ready ($ready_certs/$total_certs ready)"
|
|
echo " Non-ready certificates:"
|
|
echo "$all_certs" | jq -r '.items[] | select(.status.conditions[]? | select(.type == "Ready" and .status != "True")) | " - " + .metadata.namespace + "/" + .metadata.name'
|
|
fi
|
|
|
|
# Check ClusterIssuers
|
|
local issuers
|
|
issuers=$(kubectl get clusterissuers -o json 2>/dev/null || echo '{"items":[]}')
|
|
|
|
local ready_issuers
|
|
ready_issuers=$(echo "$issuers" | jq '[.items[] | select(.status.conditions[]? | select(.type == "Ready" and .status == "True"))] | length')
|
|
|
|
local total_issuers
|
|
total_issuers=$(echo "$issuers" | jq '.items | length')
|
|
|
|
if [ "$total_issuers" -eq 0 ]; then
|
|
log_warn "No ClusterIssuers found"
|
|
elif [ "$ready_issuers" -eq "$total_issuers" ]; then
|
|
log_pass "All ClusterIssuers are ready ($ready_issuers/$total_issuers)"
|
|
else
|
|
log_fail "Some ClusterIssuers are not ready ($ready_issuers/$total_issuers)"
|
|
fi
|
|
}
|
|
|
|
check_s3_buckets() {
|
|
log_info "Checking S3 buckets..."
|
|
|
|
# Expected buckets
|
|
local expected_buckets=("email" "logs" "traces" "lfs")
|
|
local found=0
|
|
|
|
# List S3 buckets
|
|
local buckets
|
|
buckets=$(aws s3 ls 2>/dev/null | awk '{print $3}' || echo "")
|
|
|
|
for expected in "${expected_buckets[@]}"; do
|
|
if echo "$buckets" | grep -q "coherence.*$expected"; then
|
|
((found++))
|
|
fi
|
|
done
|
|
|
|
if [ "$found" -eq ${#expected_buckets[@]} ]; then
|
|
log_pass "All expected S3 buckets found ($found/${#expected_buckets[@]})"
|
|
elif [ "$found" -gt 0 ]; then
|
|
log_warn "Only $found/${#expected_buckets[@]} expected S3 buckets found"
|
|
else
|
|
log_fail "No expected S3 buckets found"
|
|
fi
|
|
}
|
|
|
|
check_karpenter() {
|
|
log_info "Checking Karpenter..."
|
|
|
|
# Check if karpenter namespace exists
|
|
if ! kubectl get namespace karpenter &> /dev/null; then
|
|
log_warn "karpenter namespace does not exist (may be using Fargate only)"
|
|
return
|
|
fi
|
|
|
|
# Check Karpenter pods
|
|
local kp_ready
|
|
kp_ready=$(kubectl -n karpenter get pods -l app.kubernetes.io/name=karpenter -o json 2>/dev/null | jq '[.items[] | select(.status.phase == "Running")] | length')
|
|
|
|
if [ "$kp_ready" -ge 1 ]; then
|
|
log_pass "Karpenter is running ($kp_ready pods)"
|
|
else
|
|
log_fail "Karpenter is not running"
|
|
fi
|
|
|
|
# Check NodePools
|
|
local nodepools
|
|
nodepools=$(kubectl get nodepools -o json 2>/dev/null || echo '{"items":[]}')
|
|
local np_count
|
|
np_count=$(echo "$nodepools" | jq '.items | length')
|
|
|
|
if [ "$np_count" -ge 1 ]; then
|
|
log_pass "Karpenter NodePools configured ($np_count)"
|
|
else
|
|
log_warn "No Karpenter NodePools found"
|
|
fi
|
|
}
|
|
|
|
print_summary() {
|
|
echo ""
|
|
echo "========================================"
|
|
echo "Phase 1 Validation Summary"
|
|
echo "========================================"
|
|
echo -e " ${GREEN}Passed:${NC} $PASSED"
|
|
echo -e " ${RED}Failed:${NC} $FAILED"
|
|
echo -e " ${YELLOW}Warnings:${NC} $WARNINGS"
|
|
echo "========================================"
|
|
|
|
if [ "$FAILED" -gt 0 ]; then
|
|
echo ""
|
|
echo -e "${RED}Phase 1 validation FAILED${NC}"
|
|
echo "Please fix the issues above before proceeding to Phase 2."
|
|
exit 1
|
|
elif [ "$WARNINGS" -gt 0 ]; then
|
|
echo ""
|
|
echo -e "${YELLOW}Phase 1 validation passed with warnings${NC}"
|
|
echo "Review warnings above. You may proceed to Phase 2."
|
|
exit 0
|
|
else
|
|
echo ""
|
|
echo -e "${GREEN}Phase 1 validation PASSED${NC}"
|
|
echo "You may proceed to Phase 2."
|
|
exit 0
|
|
fi
|
|
}
|
|
|
|
main() {
|
|
echo "========================================"
|
|
echo "Phase 1 Validation: Foundation Infrastructure"
|
|
echo "========================================"
|
|
echo ""
|
|
|
|
check_cockroachdb_cluster
|
|
check_certificates
|
|
check_s3_buckets
|
|
check_karpenter
|
|
|
|
print_summary
|
|
}
|
|
|
|
main "$@"
|