hearth/terraform/minimal/user-data.sh
Eric Garcia b1065ca887 feat(minimal): Add k3s-on-EC2 infrastructure for single user
Decision from 12-expert alignment dialogue on single-user scale.
Implements Option E with modifications:

- t4g.small spot instance (~$5/mo)
- k3s with Traefik for ingress + Let's Encrypt TLS
- SQLite database for Forgejo
- S3 backups with 30-day lifecycle
- EBS gp3 20GB encrypted
- Admin SSH on port 2222, Git SSH on port 22

Total cost: ~$7.50/month

Includes:
- terraform/minimal/ - full terraform configuration
- terraform/bootstrap/ - state backend (already applied)
- docs/spikes/0001-single-user-scale.md - decision documentation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 06:21:55 -05:00

352 lines
9.1 KiB
Bash

#!/bin/bash
set -euo pipefail
# Hearth Minimal - EC2 User Data Script
# Installs k3s and deploys Forgejo
exec > >(tee /var/log/user-data.log) 2>&1
echo "Starting user-data script at $(date)"
# -----------------------------------------------------------------------------
# Variables from Terraform
# -----------------------------------------------------------------------------
DOMAIN="${domain}"
LETSENCRYPT_EMAIL="${letsencrypt_email}"
SSH_PORT="${ssh_port}"
S3_BUCKET="${s3_bucket}"
# -----------------------------------------------------------------------------
# System Setup
# -----------------------------------------------------------------------------
# Update system
dnf update -y
# Install required packages
dnf install -y docker git jq awscli
# Enable and start Docker (for building if needed)
systemctl enable --now docker
# Move SSH to alternate port for admin access
sed -i "s/#Port 22/Port $SSH_PORT/" /etc/ssh/sshd_config
systemctl restart sshd
# Enable automatic security updates
dnf install -y dnf-automatic
sed -i 's/apply_updates = no/apply_updates = yes/' /etc/dnf/automatic.conf
systemctl enable --now dnf-automatic-install.timer
# -----------------------------------------------------------------------------
# Install k3s
# -----------------------------------------------------------------------------
echo "Installing k3s..."
curl -sfL https://get.k3s.io | sh -s - \
--disable traefik \
--write-kubeconfig-mode 644
# Wait for k3s to be ready
echo "Waiting for k3s to be ready..."
until kubectl get nodes 2>/dev/null | grep -q "Ready"; do
sleep 5
done
echo "k3s is ready"
# -----------------------------------------------------------------------------
# Install Traefik with Let's Encrypt
# -----------------------------------------------------------------------------
echo "Installing Traefik..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Namespace
metadata:
name: traefik
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: traefik
namespace: kube-system
spec:
repo: https://traefik.github.io/charts
chart: traefik
targetNamespace: traefik
valuesContent: |-
ports:
ssh:
port: 2222
exposedPort: 22
expose:
default: true
protocol: TCP
web:
redirectTo:
port: websecure
websecure:
tls:
enabled: true
ingressRoute:
dashboard:
enabled: false
certificatesResolvers:
letsencrypt:
acme:
email: ${LETSENCRYPT_EMAIL}
storage: /data/acme.json
httpChallenge:
entryPoint: web
persistence:
enabled: true
size: 128Mi
additionalArguments:
- "--certificatesresolvers.letsencrypt.acme.tlschallenge=true"
- "--entrypoints.ssh.address=:2222/tcp"
service:
type: LoadBalancer
EOF
# Wait for Traefik
echo "Waiting for Traefik..."
sleep 30
# -----------------------------------------------------------------------------
# Create Forgejo Namespace and Resources
# -----------------------------------------------------------------------------
echo "Creating Forgejo namespace..."
kubectl create namespace forgejo --dry-run=client -o yaml | kubectl apply -f -
# Create Forgejo data directory
mkdir -p /data/forgejo
chown 1000:1000 /data/forgejo
# Create PV and PVC for Forgejo data
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolume
metadata:
name: forgejo-data
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
hostPath:
path: /data/forgejo
storageClassName: local
persistentVolumeReclaimPolicy: Retain
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: forgejo-data
namespace: forgejo
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
storageClassName: local
EOF
# -----------------------------------------------------------------------------
# Deploy Forgejo
# -----------------------------------------------------------------------------
echo "Deploying Forgejo..."
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: forgejo
namespace: forgejo
spec:
replicas: 1
selector:
matchLabels:
app: forgejo
strategy:
type: Recreate
template:
metadata:
labels:
app: forgejo
spec:
securityContext:
fsGroup: 1000
containers:
- name: forgejo
image: codeberg.org/forgejo/forgejo:9
ports:
- name: http
containerPort: 3000
- name: ssh
containerPort: 22
env:
- name: FORGEJO__server__DOMAIN
value: "${DOMAIN}"
- name: FORGEJO__server__ROOT_URL
value: "https://${DOMAIN}"
- name: FORGEJO__server__SSH_DOMAIN
value: "${DOMAIN}"
- name: FORGEJO__server__SSH_PORT
value: "22"
- name: FORGEJO__server__LFS_START_SERVER
value: "true"
- name: FORGEJO__database__DB_TYPE
value: "sqlite3"
- name: FORGEJO__database__PATH
value: "/data/gitea/gitea.db"
- name: FORGEJO__security__INSTALL_LOCK
value: "false"
- name: FORGEJO__service__DISABLE_REGISTRATION
value: "false"
- name: FORGEJO__log__MODE
value: "console"
- name: FORGEJO__log__LEVEL
value: "Info"
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 1000m
memory: 1Gi
livenessProbe:
httpGet:
path: /api/healthz
port: 3000
initialDelaySeconds: 30
periodSeconds: 30
readinessProbe:
httpGet:
path: /api/healthz
port: 3000
initialDelaySeconds: 5
periodSeconds: 10
volumes:
- name: data
persistentVolumeClaim:
claimName: forgejo-data
---
apiVersion: v1
kind: Service
metadata:
name: forgejo
namespace: forgejo
spec:
selector:
app: forgejo
ports:
- name: http
port: 3000
targetPort: 3000
- name: ssh
port: 22
targetPort: 22
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: forgejo
namespace: forgejo
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt
spec:
rules:
- host: ${DOMAIN}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: forgejo
port:
number: 3000
---
apiVersion: traefik.io/v1alpha1
kind: IngressRouteTCP
metadata:
name: forgejo-ssh
namespace: forgejo
spec:
entryPoints:
- ssh
routes:
- match: HostSNI(\`*\`)
services:
- name: forgejo
port: 22
EOF
# -----------------------------------------------------------------------------
# Setup Backup Cron Job
# -----------------------------------------------------------------------------
echo "Setting up backup cron..."
cat <<'BACKUP_SCRIPT' > /usr/local/bin/backup-forgejo.sh
#!/bin/bash
set -euo pipefail
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
S3_BUCKET="$1"
BACKUP_DIR="/tmp/backup-$TIMESTAMP"
mkdir -p "$BACKUP_DIR"
# Backup Forgejo SQLite database
if [ -f /data/forgejo/gitea/gitea.db ]; then
sqlite3 /data/forgejo/gitea/gitea.db ".backup '$BACKUP_DIR/gitea.db'"
fi
# Backup k3s state
cp -r /var/lib/rancher/k3s/server/db "$BACKUP_DIR/k3s-db" 2>/dev/null || true
# Create tarball
tar -czf "/tmp/backup-$TIMESTAMP.tar.gz" -C "$BACKUP_DIR" .
# Upload to S3
aws s3 cp "/tmp/backup-$TIMESTAMP.tar.gz" "s3://$S3_BUCKET/backups/backup-$TIMESTAMP.tar.gz"
# Cleanup
rm -rf "$BACKUP_DIR" "/tmp/backup-$TIMESTAMP.tar.gz"
# Keep only last 7 days of backups in S3 (lifecycle policy handles older ones)
echo "Backup completed: s3://$S3_BUCKET/backups/backup-$TIMESTAMP.tar.gz"
BACKUP_SCRIPT
chmod +x /usr/local/bin/backup-forgejo.sh
# Add cron job for daily backup at 3 AM
echo "0 3 * * * root /usr/local/bin/backup-forgejo.sh ${S3_BUCKET} >> /var/log/backup.log 2>&1" > /etc/cron.d/forgejo-backup
# Initial backup
/usr/local/bin/backup-forgejo.sh ${S3_BUCKET} || true
# -----------------------------------------------------------------------------
# Done
# -----------------------------------------------------------------------------
echo "User-data script completed at $(date)"
echo ""
echo "=========================================="
echo "Forgejo deployment complete!"
echo "=========================================="
echo ""
echo "Web URL: https://${DOMAIN}"
echo "Git SSH: git@${DOMAIN}:ORG/REPO.git"
echo "Admin SSH: ssh -p ${SSH_PORT} ec2-user@<ELASTIC_IP>"
echo ""
echo "Next steps:"
echo "1. Point DNS: ${DOMAIN} -> <ELASTIC_IP>"
echo "2. Wait for DNS propagation"
echo "3. Visit https://${DOMAIN} to complete setup"
echo "=========================================="