Skip to main content

Observability Stack Installation

This guide covers deploying the complete observability stack for RavenmaskOS, providing metrics collection, log aggregation, distributed tracing, and visualization.

Overview

The observability stack consists of:

ComponentPurposePortURL
PrometheusMetrics collection and alerting9090prometheus.ravenhelm.dev
LokiLog aggregation3100-
TempoDistributed tracing3200-
GrafanaVisualization and dashboards3000grafana.ravenhelm.dev
PromtailLog shipping agent9080-
Blackbox ExporterEndpoint probing9115-

Prerequisites

Before deploying the observability stack:

  • Core Stack is running (PostgreSQL, Redis, Traefik)
  • Identity Stack is configured (Zitadel for Grafana OAuth)
  • Docker and Docker Compose installed
  • ravenhelm_net network exists

Directory Structure

mkdir -p ~/ravenhelm/services/{prometheus,loki,tempo,grafana}
mkdir -p ~/ravenhelm/data/{prometheus,loki,tempo,grafana}
mkdir -p ~/ravenhelm/services/prometheus/rules

Step 1: Deploy Prometheus

Prometheus collects and stores metrics from all services.

Configuration

Create ~/ravenhelm/services/prometheus/prometheus.yml:

global:
scrape_interval: 15s
evaluation_interval: 15s

alerting:
alertmanagers:
- static_configs:
- targets: []

rule_files:
- /etc/prometheus/rules/*.yml

scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']

- job_name: 'traefik'
static_configs:
- targets: ['traefik:8080']

- job_name: 'node'
static_configs:
- targets: ['host.docker.internal:9100']

- job_name: 'docker'
static_configs:
- targets: ['host.docker.internal:9323']

- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']

- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']

- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://grafana.ravenhelm.dev
- https://auth.ravenhelm.dev
- https://n8n.ravenhelm.dev
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115

Docker Compose

Create ~/ravenhelm/services/prometheus/docker-compose.yml:

services:
prometheus:
image: prom/prometheus:v2.54.1
container_name: prometheus
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=15d'
- '--web.enable-lifecycle'
- '--web.enable-admin-api'
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./rules:/etc/prometheus/rules:ro
- prometheus_data:/prometheus
networks:
- ravenhelm_net
labels:
- "traefik.enable=true"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.ravenhelm.dev`)"
- "traefik.http.routers.prometheus.entrypoints=websecure"
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
- "traefik.http.routers.prometheus.middlewares=oauth2-proxy@docker"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"

blackbox-exporter:
image: prom/blackbox-exporter:v0.25.0
container_name: blackbox-exporter
restart: unless-stopped
command:
- '--config.file=/etc/blackbox/blackbox.yml'
volumes:
- ./blackbox.yml:/etc/blackbox/blackbox.yml:ro
networks:
- ravenhelm_net

volumes:
prometheus_data:
name: prometheus_data

networks:
ravenhelm_net:
external: true

Blackbox Exporter Configuration

Create ~/ravenhelm/services/prometheus/blackbox.yml:

modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [200, 301, 302]
method: GET
follow_redirects: true
preferred_ip_protocol: "ip4"

tcp_connect:
prober: tcp
timeout: 5s

Deploy Prometheus

cd ~/ravenhelm/services/prometheus
docker compose up -d

Step 2: Deploy Loki

Loki aggregates logs from all services.

Configuration

Create ~/ravenhelm/services/loki/loki-config.yml:

auth_enabled: false

server:
http_listen_port: 3100
grpc_listen_port: 9096

common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory

query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100

schema_config:
configs:
- from: 2020-10-24
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h

ruler:
alertmanager_url: http://localhost:9093

limits_config:
retention_period: 720h
ingestion_rate_mb: 16
ingestion_burst_size_mb: 32

Docker Compose

Create ~/ravenhelm/services/loki/docker-compose.yml:

services:
loki:
image: grafana/loki:3.0.0
container_name: loki
restart: unless-stopped
command: -config.file=/etc/loki/loki-config.yml
volumes:
- ./loki-config.yml:/etc/loki/loki-config.yml:ro
- loki_data:/loki
ports:
- "3100:3100"
networks:
- ravenhelm_net
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:3100/ready || exit 1"]
interval: 30s
timeout: 10s
retries: 3

volumes:
loki_data:
name: loki_data

networks:
ravenhelm_net:
external: true

Deploy Loki

cd ~/ravenhelm/services/loki
docker compose up -d

Step 3: Deploy Promtail

Promtail ships logs from Docker containers to Loki.

Configuration

Create ~/ravenhelm/services/loki/promtail-config.yml:

server:
http_listen_port: 9080
grpc_listen_port: 0

positions:
filename: /tmp/positions.yaml

clients:
- url: http://loki:3100/loki/api/v1/push

scrape_configs:
- job_name: docker
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
relabel_configs:
- source_labels: ['__meta_docker_container_name']
regex: '/(.*)'
target_label: 'container'
- source_labels: ['__meta_docker_container_log_stream']
target_label: 'logstream'
- source_labels: ['__meta_docker_container_label_com_docker_compose_service']
target_label: 'service'

Add Promtail to the Loki docker-compose.yml:

  promtail:
image: grafana/promtail:3.0.0
container_name: promtail
restart: unless-stopped
command: -config.file=/etc/promtail/promtail-config.yml
volumes:
- ./promtail-config.yml:/etc/promtail/promtail-config.yml:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
networks:
- ravenhelm_net
depends_on:
- loki

Restart Loki Stack

cd ~/ravenhelm/services/loki
docker compose up -d

Step 4: Deploy Tempo

Tempo provides distributed tracing capabilities.

Configuration

Create ~/ravenhelm/services/tempo/tempo.yml:

server:
http_listen_port: 3200

distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: "0.0.0.0:4317"
http:
endpoint: "0.0.0.0:4318"
zipkin:
endpoint: "0.0.0.0:9411"

storage:
trace:
backend: local
local:
path: /var/tempo/traces
wal:
path: /var/tempo/wal

metrics_generator:
registry:
external_labels:
source: tempo
storage:
path: /var/tempo/generator/wal
remote_write:
- url: http://prometheus:9090/api/v1/write
send_exemplars: true

overrides:
defaults:
metrics_generator:
processors: [service-graphs, span-metrics]

Docker Compose

Create ~/ravenhelm/services/tempo/docker-compose.yml:

services:
tempo:
image: grafana/tempo:2.6.1
container_name: tempo
restart: unless-stopped
command: ["-config.file=/etc/tempo.yml"]
volumes:
- ./tempo.yml:/etc/tempo.yml:ro
- tempo_data:/var/tempo
ports:
- "3200:3200" # Tempo HTTP
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
- "9411:9411" # Zipkin
networks:
- ravenhelm_net
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:3200/ready"]
interval: 30s
timeout: 10s
retries: 3

volumes:
tempo_data:
name: tempo_data

networks:
ravenhelm_net:
external: true

Deploy Tempo

cd ~/ravenhelm/services/tempo
docker compose up -d

Step 5: Deploy Grafana

Grafana provides visualization for all observability data.

Environment Variables

Add to ~/ravenhelm/secrets/.env:

# Grafana
GF_SECURITY_ADMIN_PASSWORD=your-admin-password
GF_DATABASE_PASSWORD=your-db-password
GRAFANA_OAUTH_CLIENT_ID=your-zitadel-client-id
GRAFANA_OAUTH_CLIENT_SECRET=your-zitadel-client-secret

Create OAuth Application in Zitadel

  1. Navigate to https://auth.ravenhelm.dev
  2. Create a new application for Grafana:
    • Name: Grafana
    • Type: Web Application
    • Authentication Method: Code (PKCE)
    • Redirect URIs: https://grafana.ravenhelm.dev/login/generic_oauth

Docker Compose

Create ~/ravenhelm/services/grafana/docker-compose.yml:

services:
grafana:
image: grafana/grafana:11.3.0
container_name: grafana
restart: unless-stopped
environment:
# Database
- GF_DATABASE_TYPE=postgres
- GF_DATABASE_HOST=postgres:5432
- GF_DATABASE_NAME=grafana
- GF_DATABASE_USER=grafana
- GF_DATABASE_PASSWORD=${GF_DATABASE_PASSWORD}
- GF_DATABASE_SSL_MODE=disable

# Server
- GF_SERVER_ROOT_URL=https://grafana.ravenhelm.dev
- GF_SERVER_DOMAIN=grafana.ravenhelm.dev

# Security
- GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD}
- GF_SECURITY_DISABLE_GRAVATAR=true

# OAuth via Zitadel
- GF_AUTH_GENERIC_OAUTH_ENABLED=true
- GF_AUTH_GENERIC_OAUTH_NAME=Zitadel
- GF_AUTH_GENERIC_OAUTH_CLIENT_ID=${GRAFANA_OAUTH_CLIENT_ID}
- GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET=${GRAFANA_OAUTH_CLIENT_SECRET}
- GF_AUTH_GENERIC_OAUTH_SCOPES=openid profile email
- GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://auth.ravenhelm.dev/oauth/v2/authorize
- GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://auth.ravenhelm.dev/oauth/v2/token
- GF_AUTH_GENERIC_OAUTH_API_URL=https://auth.ravenhelm.dev/oidc/v1/userinfo
- GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP=true
- GF_AUTH_GENERIC_OAUTH_USE_PKCE=true

# Features
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tempoSearch tempoBackendSearch tempoServiceGraph
volumes:
- grafana_data:/var/lib/grafana
networks:
- ravenhelm_net
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.rule=Host(`grafana.ravenhelm.dev`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
depends_on:
- postgres

volumes:
grafana_data:
name: grafana_data

networks:
ravenhelm_net:
external: true

Create Grafana Database

docker exec -it postgres psql -U ravenhelm -d postgres -c "
CREATE USER grafana WITH PASSWORD 'your-db-password';
CREATE DATABASE grafana OWNER grafana;
GRANT ALL PRIVILEGES ON DATABASE grafana TO grafana;
"

Deploy Grafana

cd ~/ravenhelm/services/grafana
docker compose --env-file ~/ravenhelm/secrets/.env up -d

Step 6: Configure Grafana Data Sources

After Grafana is running, configure the data sources.

Via UI

  1. Navigate to https://grafana.ravenhelm.dev
  2. Go to ConfigurationData Sources
  3. Add each data source:

Prometheus:

  • URL: http://prometheus:9090
  • Default: Yes

Loki:

  • URL: http://loki:3100

Tempo:

  • URL: http://tempo:3200
  • Enable: TraceQL, Service Graph
  • Link to Loki for logs

Via Provisioning

Create ~/ravenhelm/services/grafana/provisioning/datasources/datasources.yml:

apiVersion: 1

datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false

- name: Loki
type: loki
access: proxy
url: http://loki:3100
editable: false
jsonData:
derivedFields:
- datasourceUid: tempo
matcherRegex: "traceID=(\\w+)"
name: TraceID
url: "$${__value.raw}"

- name: Tempo
type: tempo
access: proxy
url: http://tempo:3200
editable: false
uid: tempo
jsonData:
httpMethod: GET
tracesToLogs:
datasourceUid: loki
tags: ['container', 'service']
serviceMap:
datasourceUid: prometheus

Mount the provisioning directory in the Grafana container:

volumes:
- grafana_data:/var/lib/grafana
- ./provisioning:/etc/grafana/provisioning:ro

Verification

Check Service Status

# All services running
docker ps | grep -E "(prometheus|loki|tempo|grafana|promtail|blackbox)"

# Prometheus targets
curl -s http://localhost:9090/api/v1/targets | jq '.data.activeTargets | length'

# Loki ready
curl -s http://localhost:3100/ready

# Tempo ready
curl -s http://localhost:3200/ready

# Grafana health
curl -s http://localhost:3000/api/health

Test Log Ingestion

# Generate a test log
docker run --rm --network ravenhelm_net alpine echo "Test log message"

# Query Loki
curl -G -s http://localhost:3100/loki/api/v1/query \
--data-urlencode 'query={container="alpine"}' | jq

Test Metrics

# Query Prometheus
curl -s 'http://localhost:9090/api/v1/query?query=up' | jq '.data.result'

Troubleshooting

Prometheus Not Scraping Targets

# Check target status
curl -s http://localhost:9090/api/v1/targets | jq '.data.activeTargets[] | {job: .labels.job, health: .health, error: .lastError}'

# Reload configuration
curl -X POST http://localhost:9090/-/reload

Loki Not Receiving Logs

# Check Promtail status
docker logs promtail --tail 50

# Verify Promtail can reach Loki
docker exec promtail wget -q -O- http://loki:3100/ready

Grafana OAuth Issues

# Check Grafana logs
docker logs grafana --tail 100 | grep -i oauth

# Verify Zitadel connectivity
docker exec grafana wget -q -O- https://auth.ravenhelm.dev/.well-known/openid-configuration

Tempo Not Receiving Traces

# Check Tempo logs
docker logs tempo --tail 50

# Verify OTLP endpoint
curl -s http://localhost:3200/ready

Next Steps