Pular para o conteúdo principal

Monitoramento e Alertas

Este documento explica como implementar monitoramento abrangente do n8n, abordando métricas de performance, logs estruturados, alertas proativos, dashboards de observabilidade, integração com ferramentas de APM, e sistemas de notificação que detectam problemas antes que afetem usuários finais, garantindo visibilidade completa sobre saúde e performance das automações em produção.


Métricas do Sistema

CPU

#!/bin/bash
echo "=== Métricas do Sistema ==="

# CPU Usage
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
echo "CPU Usage: ${CPU_USAGE}%"

Memória

# Memory Usage
MEMORY_USAGE=$(free | grep Mem | awk '{printf("%.2f", $3/$2 * 100.0)}')
echo "Memory Usage: ${MEMORY_USAGE}%"

Disco

# Disk Usage
DISK_USAGE=$(df / | tail -1 | awk '{print $5}' | cut -d'%' -f1)
echo "Disk Usage: ${DISK_USAGE}%"

Load Average

# Load Average
LOAD_AVG=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
echo "Load Average: $LOAD_AVG"

Métricas de Rede

#!/bin/bash
echo "=== Métricas de Rede ==="

# Active Connections
ACTIVE_CONNECTIONS=$(netstat -an | grep ESTABLISHED | wc -l)
echo "Active Connections: $ACTIVE_CONNECTIONS"

# n8n Connections
N8N_CONNECTIONS=$(netstat -an | grep :5678 | grep ESTABLISHED | wc -l)
echo "n8n Connections: $N8N_CONNECTIONS"

# Network I/O
NETWORK_IN=$(cat /proc/net/dev | grep eth0 | awk '{print $2}')
NETWORK_OUT=$(cat /proc/net/dev | grep eth0 | awk '{print $10}')
echo "Network In: $NETWORK_IN bytes"
echo "Network Out: $NETWORK_OUT bytes"

Métricas de Aplicação

Métricas n8n

#!/bin/bash
N8N_URL="http://localhost:5678"
API_KEY="sua_api_key"

echo "=== Métricas n8n ==="

# Health Check
if curl -f -s "$N8N_URL/healthz" > /dev/null; then
    echo "n8n Status: ✅ Online"
else
    echo "n8n Status: ❌ Offline"
    exit 1
fi

# Total Workflows
WORKFLOW_COUNT=$(curl -s -H "X-N8N-API-KEY: $API_KEY" \
  "$N8N_URL/api/v1/workflows" | jq '. | length')
echo "Total Workflows: $WORKFLOW_COUNT"

# Active Workflows
ACTIVE_WORKFLOWS=$(curl -s -H "X-N8N-API-KEY: $API_KEY" \
  "$N8N_URL/api/v1/workflows" | jq '[.[] | select(.active == true)] | length')
echo "Active Workflows: $ACTIVE_WORKFLOWS"

# Executions (24h)
EXECUTIONS_24H=$(curl -s -H "X-N8N-API-KEY: $API_KEY" \
  "$N8N_URL/api/v1/executions?limit=1000" | \
  jq '[.[] | select(.startedAt > "'$(date -d '24 hours ago' -Iseconds)'")] | length')
echo "Executions (24h): $EXECUTIONS_24H"

# Error Rate (1h)
ERROR_RATE=$(curl -s -H "X-N8N-API-KEY: $API_KEY" \
  "$N8N_URL/api/v1/executions?limit=1000" | \
  jq '[.[] | select(.startedAt > "'$(date -d '1 hour ago' -Iseconds)'")] |
       [.[] | select(.status == "error")] | length as $errors |
       [.[] | select(.startedAt > "'$(date -d '1 hour ago' -Iseconds)'")] | length as $total |
       if $total > 0 then ($errors * 100 / $total) else 0 end')
echo "Error Rate (1h): ${ERROR_RATE}%"

Métricas de Performance

#!/bin/bash
echo "=== Métricas de Performance ==="

# API Response Time
API_RESPONSE_TIME=$(curl -o /dev/null -s -w "%{time_total}" \
  http://localhost:5678/api/v1/workflows)
echo "API Response Time: ${API_RESPONSE_TIME}s"

# UI Response Time
UI_RESPONSE_TIME=$(curl -o /dev/null -s -w "%{time_total}" \
  http://localhost:5678/)
echo "UI Response Time: ${UI_RESPONSE_TIME}s"

# Node.js Memory Usage
NODE_MEMORY=$(docker exec n8n node -e "
const mem = process.memoryUsage();
console.log(Math.round(mem.rss / 1024 / 1024));
" 2>/dev/null || echo "N/A")
echo "Node.js Memory: ${NODE_MEMORY}MB"

# Node.js CPU Usage
NODE_CPU=$(docker stats --no-stream --format "{{.CPUPerc}}" n8n 2>/dev/null || echo "N/A")
echo "Node.js CPU: $NODE_CPU"

Sistema de Alertas

Configuração de Alertas

#!/bin/bash

# Configurações
EMAIL_ALERTS="admin@empresa.com"
ALERT_THRESHOLD_CPU=80
ALERT_THRESHOLD_MEMORY=85
ALERT_THRESHOLD_DISK=90
ALERT_THRESHOLD_ERROR_RATE=5
ALERT_THRESHOLD_RESPONSE_TIME=5

# Função para enviar alertas
send_alert() {
    local message="$1"
    local severity="$2"

    # Slack
    curl -X POST \
      -H "Content-type: application/json" \
      -d "{\"text\":\"$severity $message\"}" \
      https://hooks.slack.com/services/YOUR_WEBHOOK_URL

    # Email
    echo "$message" | mail -s "n8n Alert: $severity" $EMAIL_ALERTS

    # Log
    echo "$(date): $severity $message" >> /var/log/n8n/alerts.log
}

# Verificar CPU
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
if (( $(echo "$CPU_USAGE > $ALERT_THRESHOLD_CPU" | bc -l) )); then
    send_alert "CPU usage is ${CPU_USAGE}%" "⚠️"
fi

# Verificar Memória
MEMORY_USAGE=$(free | grep Mem | awk '{printf("%.0f", $3/$2 * 100.0)}')
if [ $MEMORY_USAGE -gt $ALERT_THRESHOLD_MEMORY ]; then
    send_alert "Memory usage is ${MEMORY_USAGE}%" "⚠️"
fi

# Verificar Disco
DISK_USAGE=$(df / | tail -1 | awk '{print $5}' | cut -d'%' -f1)
if [ $DISK_USAGE -gt $ALERT_THRESHOLD_DISK ]; then
    send_alert "Disk usage is ${DISK_USAGE}%" "🚨"
fi

# Verificar n8n
if ! curl -f -s http://localhost:5678/healthz > /dev/null; then
    send_alert "n8n is not responding" "🚨"
fi

# Verificar Error Rate
ERROR_RATE=$(curl -s -H "X-N8N-API-KEY: $API_KEY" \
  "http://localhost:5678/api/v1/executions?limit=1000" | \
  jq '[.[] | select(.startedAt > "'$(date -d '10 minutes ago' -Iseconds)'")] |
       [.[] | select(.status == "error")] | length as $errors |
       [.[] | select(.startedAt > "'$(date -d '10 minutes ago' -Iseconds)'")] | length as $total |
       if $total > 0 then ($errors * 100 / $total) else 0 end')

if (( $(echo "$ERROR_RATE > $ALERT_THRESHOLD_ERROR_RATE" | bc -l) )); then
    send_alert "Error rate is ${ERROR_RATE}%" "🚨"
fi

# Verificar Tempo de Resposta
RESPONSE_TIME=$(curl -o /dev/null -s -w "%{time_total}" \
  http://localhost:5678/api/v1/workflows)

if (( $(echo "$RESPONSE_TIME > $ALERT_THRESHOLD_RESPONSE_TIME" | bc -l) )); then
    send_alert "Response time is ${RESPONSE_TIME}s" "⚠️"
fi

Alertas Baseados em Tempo

#!/bin/bash

# Alertas de horário
echo "Current thresholds - CPU: ${ALERT_THRESHOLD_CPU}%, Memory: ${ALERT_THRESHOLD_MEMORY}%, Error Rate: ${ALERT_THRESHOLD_ERROR_RATE}%"

Dashboards de Observabilidade

Configuração do Grafana

version: '3.8'

services:
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=SUA_SENHA_ADMIN_AQUI
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/dashboards:/etc/grafana/provisioning/dashboards
      - ./grafana/datasources:/etc/grafana/provisioning/datasources

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus

volumes:
  grafana_data:
  prometheus_data:

Configuração Prometheus

global:
  scrape_interval: 15s

scrape_configs:
  - job_name: 'n8n'
    static_configs:
      - targets: ['n8n:5678']
    metrics_path: '/metrics'
    scrape_interval: 30s

  - job_name: 'node-exporter'
    static_configs:
      - targets: ['node-exporter:9100']

  - job_name: 'postgres'
    static_configs:
      - targets: ['postgres-exporter:9187']

Dashboard n8n

{
  "title": "n8n Monitoring Dashboard",
  "panels": [
    {
      "title": "System Metrics",
      "type": "graph",
      "targets": [
        {
          "expr": "cpu_usage_percent",
          "legendFormat": "CPU Usage"
        },
        {
          "expr": "memory_usage_percent",
          "legendFormat": "Memory Usage"
        }
      ]
    },
    {
      "title": "n8n Metrics",
      "type": "graph",
      "targets": [
        {
          "expr": "n8n_workflows_total",
          "legendFormat": "Total Workflows"
        },
        {
          "expr": "n8n_workflows_active",
          "legendFormat": "Active Workflows"
        }
      ]
    },
    {
      "title": "Execution Metrics",
      "type": "graph",
      "targets": [
        {
          "expr": "n8n_executions_total",
          "legendFormat": "Total Executions"
        },
        {
          "expr": "n8n_executions_failed",
          "legendFormat": "Failed Executions"
        }
      ]
    }
  ]
}

Integração Datadog

apiVersion: v1
kind: ConfigMap
metadata:
  name: datadog-config
data:
  datadog.yaml: |
    api_key: YOUR_DATADOG_API_KEY
    site: datadoghq.com
    
    logs:
      enabled: true
      container_collect_all: true
    
    apm_config:
      enabled: true
    
    process_config:
      enabled: true
      process_collection:
        enabled: true

Métricas Customizadas

const { StatsD } = require('hot-shots');

const dogstatsd = new StatsD({
  host: 'localhost',
  port: 8125,
  prefix: 'n8n.'
});

// Métricas de workflow
function trackWorkflowExecution(workflowId, duration, status) {
  dogstatsd.timing('workflow.execution_time', duration, [`workflow_id:${workflowId}`]);
  dogstatsd.increment('workflow.executions', 1, [`status:${status}`, `workflow_id:${workflowId}`]);
}

// Métricas de erro
function trackError(errorType, errorMessage) {
  dogstatsd.increment('errors.total', 1, [`type:${errorType}`]);
  dogstatsd.event('n8n.error', errorMessage, {
    alert_type: 'error',
    tags: [`error_type:${errorType}`]
  });
}

// Métricas de performance
function trackAPIPerformance(endpoint, duration, statusCode) {
  dogstatsd.timing('api.response_time', duration, [`endpoint:${endpoint}`]);
  dogstatsd.increment('api.requests', 1, [`endpoint:${endpoint}`, `status:${statusCode}`]);
}

Logs Estruturados

Configuração de Logs

{
  "timestamp": "2024-01-15T10:30:00.000Z",
  "level": "info",
  "message": "Workflow execution started",
  "workflowId": "abc123",
  "workflowName": "Email Marketing Campaign",
  "userId": "user456",
  "executionId": "exec789",
  "metadata": {
    "nodeCount": 15,
    "estimatedDuration": 300000,
    "priority": "high"
  }
}

Centralização de Logs

version: '3.8'

services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0
    environment:
      - discovery.type=single-node
      - xpack.security.enabled=false
    ports:
      - "9200:9200"
    volumes:
      - elasticsearch_data:/usr/share/elasticsearch/data

  logstash:
    image: docker.elastic.co/logstash/logstash:8.11.0
    ports:
      - "5044:5044"
    volumes:
      - ./logstash/pipeline:/usr/share/logstash/pipeline
    depends_on:
      - elasticsearch

  kibana:
    image: docker.elastic.co/kibana/kibana:8.11.0
    ports:
      - "5601:5601"
    environment:
      - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
    depends_on:
      - elasticsearch

volumes:
  elasticsearch_data:

Pipeline Logstash

input {
  beats {
    port => 5044
  }
}

filter {
  if [fields][service] == "n8n" {
    json {
      source => "message"
    }

    date {
      match => [ "timestamp", "ISO8601" ]
      target => "@timestamp"
    }

    mutate {
      add_field => {
        "service" => "n8n"
      }
      add_field => {
        "environment" => "%{ENVIRONMENT}"
      }
    }
  }
}

output {
  elasticsearch {
    hosts => ["elasticsearch:9200"]
    index => "n8n-logs-%{+YYYY.MM.dd}"
  }
}

Integração com Ferramentas APM

New Relic

'use strict';

exports.config = {
  app_name: ['n8n'],
  license_key: 'YOUR_NEW_RELIC_LICENSE_KEY',
  logging: {
    level: 'info'
  },
  distributed_tracing: {
    enabled: true
  },
  transaction_tracer: {
    enabled: true,
    transaction_threshold: 5,
    record_sql: 'obfuscated',
    stack_trace_threshold: 0.5,
    explain_threshold: 0.5
  },
  error_collector: {
    enabled: true,
    collect_events: true
  },
  browser_monitoring: {
    enable: true
  }
};

AppDynamics

const appd = require('appdynamics');

appd.profile({
  controllerHostName: 'your-controller-host',
  controllerPort: 8090,
  controllerSslEnabled: false,
  accountName: 'your-account',
  accountAccessKey: 'your-access-key',
  applicationName: 'n8n',
  tierName: 'n8n-tier',
  nodeName: 'n8n-node'
});

// Custom business transactions
appd.startBT('Workflow Execution', 'Workflow');
// ... workflow execution code ...
appd.endBT();

Checklist de Monitoramento

Métricas

  • Métricas de sistema configuradas
  • Métricas de aplicação implementadas
  • Métricas de negócio definidas
  • Coleta automática configurada
  • Retenção de dados definida

Alertas

  • Alertas críticos configurados
  • Alertas de warning configurados
  • Escalação de alertas definida
  • Canais de notificação configurados
  • Testes de alertas realizados

Dashboards

  • Dashboard principal criado
  • Dashboards específicos por equipe
  • Métricas em tempo real
  • Histórico de dados
  • Exportação de relatórios

Logs

  • Logs estruturados configurados
  • Centralização de logs implementada
  • Retenção de logs definida
  • Busca e filtros configurados
  • Alertas baseados em logs

Dica Pro

Configure alertas inteligentes que considerem o contexto e evitem alertas falsos. Use machine learning para detectar padrões anômalos.

Importante

Sempre teste configurações de monitoramento em ambiente de desenvolvimento antes de aplicar em produção. Mantenha dashboards atualizados e documente todas as configurações.