Pular para o conteúdo principal

Clustering

Este documento explica como implementar clustering para alta disponibilidade do n8n, incluindo configuração de múltiplas instâncias, balanceamento de carga inteligente, sincronização de dados, failover automático, e arquiteturas distribuídas que garantem operação contínua mesmo com falhas de hardware ou software, proporcionando disponibilidade enterprise-grade para automações críticas de negócio.


Configuração Básica

Docker Compose com Cluster

version: '3.8'

services:
  # Load balancer
  nginx:
    image: nginx:alpine
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./ssl:/etc/nginx/ssl
    depends_on:
      - n8n-main
      - n8n-worker-1
      - n8n-worker-2
    networks:
      - n8n_network

  # Instância principal
  n8n-main:
    image: n8nio/n8n:latest
    restart: unless-stopped
    environment:
      - EXECUTIONS_PROCESS=main
      - EXECUTIONS_MODE=regular
      - REDIS_URL=redis://redis:6379
      - DB_TYPE=postgresdb
      - DB_POSTGRESDB_HOST=postgres
      - DB_POSTGRESDB_DATABASE=n8n
      - DB_POSTGRESDB_USER=n8n
      - DB_POSTGRESDB_PASSWORD=${DB_PASSWORD}
      - N8N_ENCRYPTION_KEY=${N8N_ENCRYPTION_KEY}
      - N8N_PROTOCOL=https
      - N8N_HOST=seudominio.com
    depends_on:
      - postgres
      - redis
    networks:
      - n8n_network

  # Instâncias de execução
  n8n-worker-1:
    image: n8nio/n8n:latest
    restart: unless-stopped
    environment:
      - EXECUTIONS_PROCESS=worker
      - EXECUTIONS_MODE=regular
      - REDIS_URL=redis://redis:6379
      - DB_TYPE=postgresdb
      - DB_POSTGRESDB_HOST=postgres
      - DB_POSTGRESDB_DATABASE=n8n
      - DB_POSTGRESDB_USER=n8n
      - DB_POSTGRESDB_PASSWORD=${DB_PASSWORD}
      - N8N_ENCRYPTION_KEY=${N8N_ENCRYPTION_KEY}
    depends_on:
      - postgres
      - redis
    networks:
      - n8n_network

  n8n-worker-2:
    image: n8nio/n8n:latest
    restart: unless-stopped
    environment:
      - EXECUTIONS_PROCESS=worker
      - EXECUTIONS_MODE=regular
      - REDIS_URL=redis://redis:6379
      - DB_TYPE=postgresdb
      - DB_POSTGRESDB_HOST=postgres
      - DB_POSTGRESDB_DATABASE=n8n
      - DB_POSTGRESDB_USER=n8n
      - DB_POSTGRESDB_PASSWORD=${DB_PASSWORD}
      - N8N_ENCRYPTION_KEY=${N8N_ENCRYPTION_KEY}
    depends_on:
      - postgres
      - redis
    networks:
      - n8n_network

  # Banco de dados compartilhado
  postgres:
    image: postgres:15
    restart: unless-stopped
    environment:
      - POSTGRES_DB=n8n
      - POSTGRES_USER=n8n
      - POSTGRES_PASSWORD=${DB_PASSWORD}
    volumes:
      - postgres_data:/var/lib/postgresql/data
    networks:
      - n8n_network

  # Redis compartilhado
  redis:
    image: redis:7-alpine
    restart: unless-stopped
    command: redis-server --appendonly yes
    volumes:
      - redis_data:/data
    networks:
      - n8n_network

volumes:
  postgres_data:
  redis_data:

networks:
  n8n_network:
    driver: bridge

Balanceamento de Carga

Configuração Nginx

events {
    worker_connections 1024;
}

http {
    # Upstream para instâncias n8n
    upstream n8n_backend {
        # Instância principal
        server n8n-main:5678 max_fails=3 fail_timeout=30s;
        
        # Instâncias de execução
        server n8n-worker-1:5678 max_fails=3 fail_timeout=30s;
        server n8n-worker-2:5678 max_fails=3 fail_timeout=30s;
        
        # Health check
        keepalive 32;
    }

    # Rate limiting
    limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
    limit_req_zone $binary_remote_addr zone=webhook:10m rate=30r/s;

    # Gzip compression
    gzip on;
    gzip_vary on;
    gzip_min_length 1024;
    gzip_types text/plain text/css application/json application/javascript;

    server {
        listen 80;
        server_name seudominio.com;
        return 301 https://$server_name$request_uri;
    }

    server {
        listen 443 ssl http2;
        server_name seudominio.com;

        # SSL configuration
        ssl_certificate /etc/nginx/ssl/cert.pem;
        ssl_certificate_key /etc/nginx/ssl/key.pem;
        ssl_protocols TLSv1.2 TLSv1.3;
        ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
        ssl_prefer_server_ciphers off;
        ssl_session_cache shared:SSL:10m;

        # Security headers
        add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
        add_header X-Frame-Options DENY always;
        add_header X-Content-Type-Options nosniff always;
        add_header X-XSS-Protection "1; mode=block" always;

        # API endpoints com rate limiting
        location /api/ {
            limit_req zone=api burst=20 nodelay;
            proxy_pass http://n8n_backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            
            # Health check
            proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        }

        # Webhook endpoints com rate limiting
        location /webhook/ {
            limit_req zone=webhook burst=50 nodelay;
            proxy_pass http://n8n_backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_read_timeout 120s;
            
            # Health check
            proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        }

        # Configuração geral
        location / {
            proxy_pass http://n8n_backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header X-Forwarded-Host $host;
            proxy_set_header X-Forwarded-Port $server_port;

            # WebSocket support
            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection "upgrade";
            
            # Health check
            proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        }
    }
}

HAProxy Alternativa

# haproxy.cfg
global
    log /dev/log local0
    log /dev/log local1 notice
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
    stats timeout 30s
    user haproxy
    group haproxy
    daemon

defaults
    log     global
    mode    http
    option  httplog
    option  dontlognull
    timeout connect 5000
    timeout client  50000
    timeout server  50000

frontend n8n_frontend
    bind *:80
    bind *:443 ssl crt /etc/ssl/certs/n8n.pem
    redirect scheme https if !{ ssl_fc }

    # ACLs para diferentes tipos de tráfego
    acl is_api path_beg /api/
    acl is_webhook path_beg /webhook/

    # Rate limiting
    stick table type ip size 100k expire 30s store http_req_rate(10s)
    http-request track-sc0 src
    http-request deny deny_status 429 if { sc_http_req_rate(0) gt 10 }

    default_backend n8n_backend

backend n8n_backend
    balance roundrobin
    option httpchk GET /healthz
    http-check expect status 200

    # Instâncias
    server n8n-main n8n-main:5678 check maxconn 100
    server n8n-worker-1 n8n-worker-1:5678 check maxconn 100
    server n8n-worker-2 n8n-worker-2:5678 check maxconn 100

    # Configurações de failover
    option redispatch
    retries 3
    timeout connect 5s
    timeout server 30s

Failover Automático

Health Checks

#!/bin/bash

# Configurações
N8N_HOST="localhost"
N8N_PORT="5678"
HEALTH_ENDPOINT="/healthz"

# Verificar saúde do n8n
if curl -f -s "http://$N8N_HOST:$N8N_PORT$HEALTH_ENDPOINT" > /dev/null; then
    echo "OK: n8n está saudável"
    exit 0
else
    echo "ERROR: n8n não está respondendo"
    exit 1
fi

Monitoramento de Failover

#!/bin/bash

# Configurações
CLUSTER_NODES=("n8n-main" "n8n-worker-1" "n8n-worker-2")
WEBHOOK_URL="https://hooks.slack.com/services/YOUR/WEBHOOK/URL"

echo "=== Monitoramento do Cluster n8n ==="
echo

# Verificar cada nó
for node in "${CLUSTER_NODES[@]}"; do
    if ! docker exec $node curl -f -s "http://localhost:5678/healthz" > /dev/null; then
        echo "ALERTA: Nó $node não está respondendo!"
        
        # Enviar notificação
        curl -X POST $WEBHOOK_URL \
          -H "Content-type: application/json" \
          -d "{\"text\":\"🚨 Nó $node não está respondendo!\"}"
        
        # Tentar reiniciar o container
        docker restart $node
    else
        echo "OK: Nó $node está saudável"
    fi
done

Kubernetes Deployment

Deployment Completo

# n8n-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: n8n-main
  labels:
    app: n8n
    component: main
spec:
  replicas: 1
  selector:
    matchLabels:
      app: n8n
      component: main
  template:
    metadata:
      labels:
        app: n8n
        component: main
    spec:
      containers:
      - name: n8n
        image: n8nio/n8n:latest
        ports:
        - containerPort: 5678
        env:
        - name: EXECUTIONS_PROCESS
          value: "main"
        - name: EXECUTIONS_MODE
          value: "regular"
        - name: REDIS_URL
          value: "redis://redis-service:6379"
        - name: DB_TYPE
          value: "postgresdb"
        - name: DB_POSTGRESDB_HOST
          value: "postgres-service"
        - name: DB_POSTGRESDB_DATABASE
          value: "n8n"
        - name: DB_POSTGRESDB_USER
          valueFrom:
            secretKeyRef:
              name: n8n-secrets
              key: db-user
        - name: DB_POSTGRESDB_PASSWORD
          valueFrom:
            secretKeyRef:
              name: n8n-secrets
              key: db-password
        - name: N8N_ENCRYPTION_KEY
          valueFrom:
            secretKeyRef:
              name: n8n-secrets
              key: encryption-key
        - name: N8N_PROTOCOL
          value: "https"
        - name: N8N_HOST
          value: "seudominio.com"
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "2Gi"
            cpu: "1000m"
        livenessProbe:
          httpGet:
            path: /healthz
            port: 5678
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /healthz
            port: 5678
          initialDelaySeconds: 5
          periodSeconds: 5
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: n8n-worker
  labels:
    app: n8n
    component: worker
spec:
  replicas: 3
  selector:
    matchLabels:
      app: n8n
      component: worker
  template:
    metadata:
      labels:
        app: n8n
        component: worker
    spec:
      containers:
      - name: n8n
        image: n8nio/n8n:latest
        ports:
        - containerPort: 5678
        env:
        - name: EXECUTIONS_PROCESS
          value: "worker"
        - name: EXECUTIONS_MODE
          value: "regular"
        - name: REDIS_URL
          value: "redis://redis-service:6379"
        - name: DB_TYPE
          value: "postgresdb"
        - name: DB_POSTGRESDB_HOST
          value: "postgres-service"
        - name: DB_POSTGRESDB_DATABASE
          value: "n8n"
        - name: DB_POSTGRESDB_USER
          valueFrom:
            secretKeyRef:
              name: n8n-secrets
              key: db-user
        - name: DB_POSTGRESDB_PASSWORD
          valueFrom:
            secretKeyRef:
              name: n8n-secrets
              key: db-password
        - name: N8N_ENCRYPTION_KEY
          valueFrom:
            secretKeyRef:
              name: n8n-secrets
              key: encryption-key
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "2Gi"
            cpu: "1000m"
        livenessProbe:
          httpGet:
            path: /healthz
            port: 5678
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /healthz
            port: 5678
          initialDelaySeconds: 5
          periodSeconds: 5
# n8n-service.yaml
apiVersion: v1
kind: Service
metadata:
  name: n8n-service
  labels:
    app: n8n
spec:
  selector:
    app: n8n
  ports:
  - port: 5678
    targetPort: 5678
    protocol: TCP
  type: ClusterIP
# n8n-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: n8n-ingress
  annotations:
    kubernetes.io/ingress.class: "nginx"
    cert-manager.io/cluster-issuer: "letsencrypt-prod"
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
spec:
  tls:
  - hosts:
    - seudominio.com
    secretName: n8n-tls
  rules:
  - host: seudominio.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: n8n-service
            port:
              number: 5678

Monitoramento

Script de Métricas

#!/bin/bash

echo "=== Métricas do Cluster n8n ==="
echo

echo "1. Status dos Containers:"
docker ps --filter "name=n8n" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
echo

echo "2. Uso de Recursos:"
docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}"
echo

echo "3. Logs de Erro (últimas 10 linhas):"
docker logs --tail 10 n8n-main 2>&1 | grep -E "(ERROR|WARN)" || echo "Nenhum erro encontrado"
echo

echo "4. Health Checks:"
for container in n8n-main n8n-worker-1 n8n-worker-2; do
    if docker exec $container curl -f -s "http://localhost:5678/healthz" > /dev/null; then
        echo "✅ $container: Saudável"
    else
        echo "❌ $container: Não respondendo"
    fi
done
echo

echo "5. Métricas Redis:"
echo "Jobs na fila: $(redis-cli llen n8n:queue:jobs)"
echo "Webhooks na fila: $(redis-cli llen n8n:queue:webhooks)"
echo "Jobs processados: $(redis-cli get n8n:stats:processed || echo '0')"
echo "Jobs falharam: $(redis-cli get n8n:stats:failed || echo '0')"

Alertas Automáticos

#!/bin/bash

# Configurações
ALERT_THRESHOLD_CPU=80
ALERT_THRESHOLD_MEMORY=85
ALERT_THRESHOLD_QUEUE=1000
WEBHOOK_URL="https://hooks.slack.com/services/YOUR/WEBHOOK/URL"

# Verificar CPU
CPU_USAGE=$(docker stats --no-stream --format "{{.CPUPerc}}" n8n-main | sed 's/%//')
if (( $(echo "$CPU_USAGE > $ALERT_THRESHOLD_CPU" | bc -l) )); then
    curl -X POST $WEBHOOK_URL \
      -H "Content-type: application/json" \
      -d "{\"text\":\"⚠️ CPU alta: ${CPU_USAGE}%\"}"
fi

# Verificar memória
MEMORY_USAGE=$(docker stats --no-stream --format "{{.MemPerc}}" n8n-main | sed 's/%//')
if (( $(echo "$MEMORY_USAGE > $ALERT_THRESHOLD_MEMORY" | bc -l) )); then
    curl -X POST $WEBHOOK_URL \
      -H "Content-type: application/json" \
      -d "{\"text\":\"⚠️ Memória alta: ${MEMORY_USAGE}%\"}"
fi

# Verificar fila
QUEUE_SIZE=$(redis-cli llen n8n:queue:jobs)
if [ $QUEUE_SIZE -gt $ALERT_THRESHOLD_QUEUE ]; then
    curl -X POST $WEBHOOK_URL \
      -H "Content-type: application/json" \
      -d "{\"text\":\"⚠️ Fila muito grande: $QUEUE_SIZE jobs\"}"
fi

Troubleshooting

Problemas Comuns

#!/bin/bash

echo "=== Diagnóstico do Cluster n8n ==="
echo

echo "1. Verificar variáveis de ambiente:"
docker exec n8n-main env | grep -E "(DB_|REDIS_|N8N_)"
echo

echo "2. Verificar conectividade entre nós:"
for node in n8n-main n8n-worker-1 n8n-worker-2; do
    echo "=== $node ==="
    docker exec $node curl -I http://localhost:5678/healthz
    echo
done

echo "3. Verificar logs de erro:"
grep "limiting requests" /var/log/nginx/error.log
echo

echo "4. Verificar configuração do nginx:"
nginx -t

Checklist de Produção

Configuração

  • Redis instalado e configurado
  • Autenticação Redis configurada
  • Variáveis de ambiente configuradas
  • Conexão Redis testada
  • Workers configurados corretamente

Performance

  • Configurações de memória otimizadas
  • Persistência Redis configurada
  • Workers distribuídos adequadamente
  • Timeouts configurados
  • Retry policies definidas

Monitoramento

  • Scripts de monitoramento configurados
  • Alertas configurados
  • Métricas sendo coletadas
  • Logs estruturados
  • Dashboard de monitoramento

Segurança

  • Senha Redis configurada
  • Acesso restrito por IP
  • SSL/TLS configurado (se necessário)
  • Backup Redis configurado
  • Logs de acesso ativados

Dica Pro

Configure health checks adequados e monitore a distribuição de carga entre os nós do cluster para garantir alta disponibilidade.

Importante

Sempre teste o clustering em ambiente de desenvolvimento antes de aplicar em produção. Configure backups adequados para o Redis e banco de dados.