Clustering
Este documento explica como implementar clustering para alta disponibilidade do n8n, incluindo configuração de múltiplas instâncias, balanceamento de carga inteligente, sincronização de dados, failover automático, e arquiteturas distribuídas que garantem operação contínua mesmo com falhas de hardware ou software, proporcionando disponibilidade enterprise-grade para automações críticas de negócio.
Configuração Básica
Docker Compose com Cluster
version: '3.8'
services:
# Load balancer
nginx:
image: nginx:alpine
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- ./ssl:/etc/nginx/ssl
depends_on:
- n8n-main
- n8n-worker-1
- n8n-worker-2
networks:
- n8n_network
# Instância principal
n8n-main:
image: n8nio/n8n:latest
restart: unless-stopped
environment:
- EXECUTIONS_PROCESS=main
- EXECUTIONS_MODE=regular
- REDIS_URL=redis://redis:6379
- DB_TYPE=postgresdb
- DB_POSTGRESDB_HOST=postgres
- DB_POSTGRESDB_DATABASE=n8n
- DB_POSTGRESDB_USER=n8n
- DB_POSTGRESDB_PASSWORD=${DB_PASSWORD}
- N8N_ENCRYPTION_KEY=${N8N_ENCRYPTION_KEY}
- N8N_PROTOCOL=https
- N8N_HOST=seudominio.com
depends_on:
- postgres
- redis
networks:
- n8n_network
# Instâncias de execução
n8n-worker-1:
image: n8nio/n8n:latest
restart: unless-stopped
environment:
- EXECUTIONS_PROCESS=worker
- EXECUTIONS_MODE=regular
- REDIS_URL=redis://redis:6379
- DB_TYPE=postgresdb
- DB_POSTGRESDB_HOST=postgres
- DB_POSTGRESDB_DATABASE=n8n
- DB_POSTGRESDB_USER=n8n
- DB_POSTGRESDB_PASSWORD=${DB_PASSWORD}
- N8N_ENCRYPTION_KEY=${N8N_ENCRYPTION_KEY}
depends_on:
- postgres
- redis
networks:
- n8n_network
n8n-worker-2:
image: n8nio/n8n:latest
restart: unless-stopped
environment:
- EXECUTIONS_PROCESS=worker
- EXECUTIONS_MODE=regular
- REDIS_URL=redis://redis:6379
- DB_TYPE=postgresdb
- DB_POSTGRESDB_HOST=postgres
- DB_POSTGRESDB_DATABASE=n8n
- DB_POSTGRESDB_USER=n8n
- DB_POSTGRESDB_PASSWORD=${DB_PASSWORD}
- N8N_ENCRYPTION_KEY=${N8N_ENCRYPTION_KEY}
depends_on:
- postgres
- redis
networks:
- n8n_network
# Banco de dados compartilhado
postgres:
image: postgres:15
restart: unless-stopped
environment:
- POSTGRES_DB=n8n
- POSTGRES_USER=n8n
- POSTGRES_PASSWORD=${DB_PASSWORD}
volumes:
- postgres_data:/var/lib/postgresql/data
networks:
- n8n_network
# Redis compartilhado
redis:
image: redis:7-alpine
restart: unless-stopped
command: redis-server --appendonly yes
volumes:
- redis_data:/data
networks:
- n8n_network
volumes:
postgres_data:
redis_data:
networks:
n8n_network:
driver: bridge
Balanceamento de Carga
Configuração Nginx
events {
worker_connections 1024;
}
http {
# Upstream para instâncias n8n
upstream n8n_backend {
# Instância principal
server n8n-main:5678 max_fails=3 fail_timeout=30s;
# Instâncias de execução
server n8n-worker-1:5678 max_fails=3 fail_timeout=30s;
server n8n-worker-2:5678 max_fails=3 fail_timeout=30s;
# Health check
keepalive 32;
}
# Rate limiting
limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
limit_req_zone $binary_remote_addr zone=webhook:10m rate=30r/s;
# Gzip compression
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_types text/plain text/css application/json application/javascript;
server {
listen 80;
server_name seudominio.com;
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
server_name seudominio.com;
# SSL configuration
ssl_certificate /etc/nginx/ssl/cert.pem;
ssl_certificate_key /etc/nginx/ssl/key.pem;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
ssl_prefer_server_ciphers off;
ssl_session_cache shared:SSL:10m;
# Security headers
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
add_header X-Frame-Options DENY always;
add_header X-Content-Type-Options nosniff always;
add_header X-XSS-Protection "1; mode=block" always;
# API endpoints com rate limiting
location /api/ {
limit_req zone=api burst=20 nodelay;
proxy_pass http://n8n_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Health check
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
}
# Webhook endpoints com rate limiting
location /webhook/ {
limit_req zone=webhook burst=50 nodelay;
proxy_pass http://n8n_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 120s;
# Health check
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
}
# Configuração geral
location / {
proxy_pass http://n8n_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
# Health check
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
}
}
}
HAProxy Alternativa
# haproxy.cfg
global
log /dev/log local0
log /dev/log local1 notice
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
stats timeout 30s
user haproxy
group haproxy
daemon
defaults
log global
mode http
option httplog
option dontlognull
timeout connect 5000
timeout client 50000
timeout server 50000
frontend n8n_frontend
bind *:80
bind *:443 ssl crt /etc/ssl/certs/n8n.pem
redirect scheme https if !{ ssl_fc }
# ACLs para diferentes tipos de tráfego
acl is_api path_beg /api/
acl is_webhook path_beg /webhook/
# Rate limiting
stick table type ip size 100k expire 30s store http_req_rate(10s)
http-request track-sc0 src
http-request deny deny_status 429 if { sc_http_req_rate(0) gt 10 }
default_backend n8n_backend
backend n8n_backend
balance roundrobin
option httpchk GET /healthz
http-check expect status 200
# Instâncias
server n8n-main n8n-main:5678 check maxconn 100
server n8n-worker-1 n8n-worker-1:5678 check maxconn 100
server n8n-worker-2 n8n-worker-2:5678 check maxconn 100
# Configurações de failover
option redispatch
retries 3
timeout connect 5s
timeout server 30s
Failover Automático
Health Checks
#!/bin/bash
# Configurações
N8N_HOST="localhost"
N8N_PORT="5678"
HEALTH_ENDPOINT="/healthz"
# Verificar saúde do n8n
if curl -f -s "http://$N8N_HOST:$N8N_PORT$HEALTH_ENDPOINT" > /dev/null; then
echo "OK: n8n está saudável"
exit 0
else
echo "ERROR: n8n não está respondendo"
exit 1
fi
Monitoramento de Failover
#!/bin/bash
# Configurações
CLUSTER_NODES=("n8n-main" "n8n-worker-1" "n8n-worker-2")
WEBHOOK_URL="https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
echo "=== Monitoramento do Cluster n8n ==="
echo
# Verificar cada nó
for node in "${CLUSTER_NODES[@]}"; do
if ! docker exec $node curl -f -s "http://localhost:5678/healthz" > /dev/null; then
echo "ALERTA: Nó $node não está respondendo!"
# Enviar notificação
curl -X POST $WEBHOOK_URL \
-H "Content-type: application/json" \
-d "{\"text\":\"🚨 Nó $node não está respondendo!\"}"
# Tentar reiniciar o container
docker restart $node
else
echo "OK: Nó $node está saudável"
fi
done
Kubernetes Deployment
Deployment Completo
# n8n-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: n8n-main
labels:
app: n8n
component: main
spec:
replicas: 1
selector:
matchLabels:
app: n8n
component: main
template:
metadata:
labels:
app: n8n
component: main
spec:
containers:
- name: n8n
image: n8nio/n8n:latest
ports:
- containerPort: 5678
env:
- name: EXECUTIONS_PROCESS
value: "main"
- name: EXECUTIONS_MODE
value: "regular"
- name: REDIS_URL
value: "redis://redis-service:6379"
- name: DB_TYPE
value: "postgresdb"
- name: DB_POSTGRESDB_HOST
value: "postgres-service"
- name: DB_POSTGRESDB_DATABASE
value: "n8n"
- name: DB_POSTGRESDB_USER
valueFrom:
secretKeyRef:
name: n8n-secrets
key: db-user
- name: DB_POSTGRESDB_PASSWORD
valueFrom:
secretKeyRef:
name: n8n-secrets
key: db-password
- name: N8N_ENCRYPTION_KEY
valueFrom:
secretKeyRef:
name: n8n-secrets
key: encryption-key
- name: N8N_PROTOCOL
value: "https"
- name: N8N_HOST
value: "seudominio.com"
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /healthz
port: 5678
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5678
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: n8n-worker
labels:
app: n8n
component: worker
spec:
replicas: 3
selector:
matchLabels:
app: n8n
component: worker
template:
metadata:
labels:
app: n8n
component: worker
spec:
containers:
- name: n8n
image: n8nio/n8n:latest
ports:
- containerPort: 5678
env:
- name: EXECUTIONS_PROCESS
value: "worker"
- name: EXECUTIONS_MODE
value: "regular"
- name: REDIS_URL
value: "redis://redis-service:6379"
- name: DB_TYPE
value: "postgresdb"
- name: DB_POSTGRESDB_HOST
value: "postgres-service"
- name: DB_POSTGRESDB_DATABASE
value: "n8n"
- name: DB_POSTGRESDB_USER
valueFrom:
secretKeyRef:
name: n8n-secrets
key: db-user
- name: DB_POSTGRESDB_PASSWORD
valueFrom:
secretKeyRef:
name: n8n-secrets
key: db-password
- name: N8N_ENCRYPTION_KEY
valueFrom:
secretKeyRef:
name: n8n-secrets
key: encryption-key
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /healthz
port: 5678
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5678
initialDelaySeconds: 5
periodSeconds: 5
# n8n-service.yaml
apiVersion: v1
kind: Service
metadata:
name: n8n-service
labels:
app: n8n
spec:
selector:
app: n8n
ports:
- port: 5678
targetPort: 5678
protocol: TCP
type: ClusterIP
# n8n-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: n8n-ingress
annotations:
kubernetes.io/ingress.class: "nginx"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
spec:
tls:
- hosts:
- seudominio.com
secretName: n8n-tls
rules:
- host: seudominio.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: n8n-service
port:
number: 5678
Monitoramento
Script de Métricas
#!/bin/bash
echo "=== Métricas do Cluster n8n ==="
echo
echo "1. Status dos Containers:"
docker ps --filter "name=n8n" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
echo
echo "2. Uso de Recursos:"
docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}"
echo
echo "3. Logs de Erro (últimas 10 linhas):"
docker logs --tail 10 n8n-main 2>&1 | grep -E "(ERROR|WARN)" || echo "Nenhum erro encontrado"
echo
echo "4. Health Checks:"
for container in n8n-main n8n-worker-1 n8n-worker-2; do
if docker exec $container curl -f -s "http://localhost:5678/healthz" > /dev/null; then
echo "✅ $container: Saudável"
else
echo "❌ $container: Não respondendo"
fi
done
echo
echo "5. Métricas Redis:"
echo "Jobs na fila: $(redis-cli llen n8n:queue:jobs)"
echo "Webhooks na fila: $(redis-cli llen n8n:queue:webhooks)"
echo "Jobs processados: $(redis-cli get n8n:stats:processed || echo '0')"
echo "Jobs falharam: $(redis-cli get n8n:stats:failed || echo '0')"
Alertas Automáticos
#!/bin/bash
# Configurações
ALERT_THRESHOLD_CPU=80
ALERT_THRESHOLD_MEMORY=85
ALERT_THRESHOLD_QUEUE=1000
WEBHOOK_URL="https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
# Verificar CPU
CPU_USAGE=$(docker stats --no-stream --format "{{.CPUPerc}}" n8n-main | sed 's/%//')
if (( $(echo "$CPU_USAGE > $ALERT_THRESHOLD_CPU" | bc -l) )); then
curl -X POST $WEBHOOK_URL \
-H "Content-type: application/json" \
-d "{\"text\":\"⚠️ CPU alta: ${CPU_USAGE}%\"}"
fi
# Verificar memória
MEMORY_USAGE=$(docker stats --no-stream --format "{{.MemPerc}}" n8n-main | sed 's/%//')
if (( $(echo "$MEMORY_USAGE > $ALERT_THRESHOLD_MEMORY" | bc -l) )); then
curl -X POST $WEBHOOK_URL \
-H "Content-type: application/json" \
-d "{\"text\":\"⚠️ Memória alta: ${MEMORY_USAGE}%\"}"
fi
# Verificar fila
QUEUE_SIZE=$(redis-cli llen n8n:queue:jobs)
if [ $QUEUE_SIZE -gt $ALERT_THRESHOLD_QUEUE ]; then
curl -X POST $WEBHOOK_URL \
-H "Content-type: application/json" \
-d "{\"text\":\"⚠️ Fila muito grande: $QUEUE_SIZE jobs\"}"
fi
Troubleshooting
Problemas Comuns
#!/bin/bash
echo "=== Diagnóstico do Cluster n8n ==="
echo
echo "1. Verificar variáveis de ambiente:"
docker exec n8n-main env | grep -E "(DB_|REDIS_|N8N_)"
echo
echo "2. Verificar conectividade entre nós:"
for node in n8n-main n8n-worker-1 n8n-worker-2; do
echo "=== $node ==="
docker exec $node curl -I http://localhost:5678/healthz
echo
done
echo "3. Verificar logs de erro:"
grep "limiting requests" /var/log/nginx/error.log
echo
echo "4. Verificar configuração do nginx:"
nginx -t
Checklist de Produção
Configuração
- Redis instalado e configurado
- Autenticação Redis configurada
- Variáveis de ambiente configuradas
- Conexão Redis testada
- Workers configurados corretamente
Performance
- Configurações de memória otimizadas
- Persistência Redis configurada
- Workers distribuídos adequadamente
- Timeouts configurados
- Retry policies definidas
Monitoramento
- Scripts de monitoramento configurados
- Alertas configurados
- Métricas sendo coletadas
- Logs estruturados
- Dashboard de monitoramento
Segurança
- Senha Redis configurada
- Acesso restrito por IP
- SSL/TLS configurado (se necessário)
- Backup Redis configurado
- Logs de acesso ativados
Dica Pro
Configure health checks adequados e monitore a distribuição de carga entre os nós do cluster para garantir alta disponibilidade.
Importante
Sempre teste o clustering em ambiente de desenvolvimento antes de aplicar em produção. Configure backups adequados para o Redis e banco de dados.