hearth/terraform/minimal/user-data.sh
Eric Garcia 0d904fe130 fix(minimal): Replace Traefik HelmChart with direct deployment
HelmChart values schema changed in newer Traefik versions causing
installation failures. Replaced with direct Deployment + RBAC manifests
which work reliably with Traefik v3.2.

Also adds SSH public key variable for admin access.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 06:42:32 -05:00

424 lines
11 KiB
Bash

#!/bin/bash
set -euo pipefail
# Hearth Minimal - EC2 User Data Script
# Installs k3s and deploys Forgejo
exec > >(tee /var/log/user-data.log) 2>&1
echo "Starting user-data script at $(date)"
# -----------------------------------------------------------------------------
# Variables from Terraform
# -----------------------------------------------------------------------------
DOMAIN="${domain}"
LETSENCRYPT_EMAIL="${letsencrypt_email}"
SSH_PORT="${ssh_port}"
S3_BUCKET="${s3_bucket}"
# -----------------------------------------------------------------------------
# System Setup
# -----------------------------------------------------------------------------
# Update system
dnf update -y
# Install required packages
dnf install -y docker git jq awscli
# Enable and start Docker (for building if needed)
systemctl enable --now docker
# Move SSH to alternate port for admin access
sed -i "s/#Port 22/Port $SSH_PORT/" /etc/ssh/sshd_config
systemctl restart sshd
# Add admin SSH key
if [ -n "${ssh_public_key}" ]; then
mkdir -p /home/ec2-user/.ssh
echo "${ssh_public_key}" >> /home/ec2-user/.ssh/authorized_keys
chown -R ec2-user:ec2-user /home/ec2-user/.ssh
chmod 700 /home/ec2-user/.ssh
chmod 600 /home/ec2-user/.ssh/authorized_keys
fi
# Enable automatic security updates
dnf install -y dnf-automatic
sed -i 's/apply_updates = no/apply_updates = yes/' /etc/dnf/automatic.conf
systemctl enable --now dnf-automatic-install.timer
# -----------------------------------------------------------------------------
# Install k3s
# -----------------------------------------------------------------------------
echo "Installing k3s..."
curl -sfL https://get.k3s.io | sh -s - \
--disable traefik \
--write-kubeconfig-mode 644
# Wait for k3s to be ready
echo "Waiting for k3s to be ready..."
until kubectl get nodes 2>/dev/null | grep -q "Ready"; do
sleep 5
done
echo "k3s is ready"
# -----------------------------------------------------------------------------
# Install Traefik with Let's Encrypt
# -----------------------------------------------------------------------------
echo "Installing Traefik..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Namespace
metadata:
name: traefik
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: traefik
namespace: traefik
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: traefik
rules:
- apiGroups: [""]
resources: [services, endpoints, secrets, nodes, pods]
verbs: [get, list, watch]
- apiGroups: [extensions, networking.k8s.io]
resources: [ingresses, ingressclasses]
verbs: [get, list, watch]
- apiGroups: [extensions, networking.k8s.io]
resources: [ingresses/status]
verbs: [update]
- apiGroups: [traefik.io]
resources: ["*"]
verbs: [get, list, watch]
- apiGroups: [discovery.k8s.io]
resources: [endpointslices]
verbs: [get, list, watch]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: traefik
subjects:
- kind: ServiceAccount
name: traefik
namespace: traefik
roleRef:
kind: ClusterRole
name: traefik
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: traefik
namespace: traefik
spec:
replicas: 1
selector:
matchLabels:
app: traefik
template:
metadata:
labels:
app: traefik
spec:
serviceAccountName: traefik
containers:
- name: traefik
image: traefik:v3.2
args:
- --api.insecure=false
- --providers.kubernetesingress
- --providers.kubernetescrd
- --entrypoints.web.address=:80
- --entrypoints.websecure.address=:443
- --entrypoints.ssh.address=:22/tcp
- --entrypoints.web.http.redirections.entrypoint.to=websecure
- --certificatesresolvers.letsencrypt.acme.email=${letsencrypt_email}
- --certificatesresolvers.letsencrypt.acme.storage=/data/acme.json
- --certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web
ports:
- name: web
containerPort: 80
- name: websecure
containerPort: 443
- name: ssh
containerPort: 22
volumeMounts:
- name: acme
mountPath: /data
volumes:
- name: acme
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: traefik
namespace: traefik
spec:
type: LoadBalancer
externalTrafficPolicy: Local
selector:
app: traefik
ports:
- name: web
port: 80
targetPort: 80
- name: websecure
port: 443
targetPort: 443
- name: ssh
port: 22
targetPort: 22
EOF
# Wait for Traefik
echo "Waiting for Traefik..."
sleep 15
# -----------------------------------------------------------------------------
# Create Forgejo Namespace and Resources
# -----------------------------------------------------------------------------
echo "Creating Forgejo namespace..."
kubectl create namespace forgejo --dry-run=client -o yaml | kubectl apply -f -
# Create Forgejo data directory
mkdir -p /data/forgejo
chown 1000:1000 /data/forgejo
# Create PV and PVC for Forgejo data
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolume
metadata:
name: forgejo-data
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
hostPath:
path: /data/forgejo
storageClassName: local
persistentVolumeReclaimPolicy: Retain
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: forgejo-data
namespace: forgejo
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
storageClassName: local
EOF
# -----------------------------------------------------------------------------
# Deploy Forgejo
# -----------------------------------------------------------------------------
echo "Deploying Forgejo..."
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: forgejo
namespace: forgejo
spec:
replicas: 1
selector:
matchLabels:
app: forgejo
strategy:
type: Recreate
template:
metadata:
labels:
app: forgejo
spec:
securityContext:
fsGroup: 1000
containers:
- name: forgejo
image: codeberg.org/forgejo/forgejo:9
ports:
- name: http
containerPort: 3000
- name: ssh
containerPort: 22
env:
- name: FORGEJO__server__DOMAIN
value: "${domain}"
- name: FORGEJO__server__ROOT_URL
value: "https://${domain}"
- name: FORGEJO__server__SSH_DOMAIN
value: "${domain}"
- name: FORGEJO__server__SSH_PORT
value: "22"
- name: FORGEJO__server__LFS_START_SERVER
value: "true"
- name: FORGEJO__database__DB_TYPE
value: "sqlite3"
- name: FORGEJO__database__PATH
value: "/data/gitea/gitea.db"
- name: FORGEJO__security__INSTALL_LOCK
value: "false"
- name: FORGEJO__service__DISABLE_REGISTRATION
value: "false"
- name: FORGEJO__log__MODE
value: "console"
- name: FORGEJO__log__LEVEL
value: "Info"
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 1000m
memory: 1Gi
livenessProbe:
httpGet:
path: /api/healthz
port: 3000
initialDelaySeconds: 30
periodSeconds: 30
readinessProbe:
httpGet:
path: /api/healthz
port: 3000
initialDelaySeconds: 5
periodSeconds: 10
volumes:
- name: data
persistentVolumeClaim:
claimName: forgejo-data
---
apiVersion: v1
kind: Service
metadata:
name: forgejo
namespace: forgejo
spec:
selector:
app: forgejo
ports:
- name: http
port: 3000
targetPort: 3000
- name: ssh
port: 22
targetPort: 22
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: forgejo
namespace: forgejo
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt
spec:
rules:
- host: ${domain}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: forgejo
port:
number: 3000
---
apiVersion: traefik.io/v1alpha1
kind: IngressRouteTCP
metadata:
name: forgejo-ssh
namespace: forgejo
spec:
entryPoints:
- ssh
routes:
- match: HostSNI(\`*\`)
services:
- name: forgejo
port: 22
EOF
# -----------------------------------------------------------------------------
# Setup Backup Cron Job
# -----------------------------------------------------------------------------
echo "Setting up backup cron..."
cat <<'BACKUP_SCRIPT' > /usr/local/bin/backup-forgejo.sh
#!/bin/bash
set -euo pipefail
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
S3_BUCKET="$1"
BACKUP_DIR="/tmp/backup-$TIMESTAMP"
mkdir -p "$BACKUP_DIR"
# Backup Forgejo SQLite database
if [ -f /data/forgejo/gitea/gitea.db ]; then
sqlite3 /data/forgejo/gitea/gitea.db ".backup '$BACKUP_DIR/gitea.db'"
fi
# Backup k3s state
cp -r /var/lib/rancher/k3s/server/db "$BACKUP_DIR/k3s-db" 2>/dev/null || true
# Create tarball
tar -czf "/tmp/backup-$TIMESTAMP.tar.gz" -C "$BACKUP_DIR" .
# Upload to S3
aws s3 cp "/tmp/backup-$TIMESTAMP.tar.gz" "s3://$S3_BUCKET/backups/backup-$TIMESTAMP.tar.gz"
# Cleanup
rm -rf "$BACKUP_DIR" "/tmp/backup-$TIMESTAMP.tar.gz"
# Keep only last 7 days of backups in S3 (lifecycle policy handles older ones)
echo "Backup completed: s3://$S3_BUCKET/backups/backup-$TIMESTAMP.tar.gz"
BACKUP_SCRIPT
chmod +x /usr/local/bin/backup-forgejo.sh
# Add cron job for daily backup at 3 AM
echo "0 3 * * * root /usr/local/bin/backup-forgejo.sh ${s3_bucket} >> /var/log/backup.log 2>&1" > /etc/cron.d/forgejo-backup
# Initial backup
/usr/local/bin/backup-forgejo.sh ${s3_bucket} || true
# -----------------------------------------------------------------------------
# Done
# -----------------------------------------------------------------------------
echo "User-data script completed at $(date)"
echo ""
echo "=========================================="
echo "Forgejo deployment complete!"
echo "=========================================="
echo ""
echo "Web URL: https://${domain}"
echo "Git SSH: git@${domain}:ORG/REPO.git"
echo "Admin SSH: ssh -p ${ssh_port} ec2-user@<ELASTIC_IP>"
echo ""
echo "Next steps:"
echo "1. Point DNS: ${domain} -> <ELASTIC_IP>"
echo "2. Wait for DNS propagation"
echo "3. Visit https://${domain} to complete setup"
echo "=========================================="