From 5246c6f0dcb4035517bb66d047b71ebd2962f056 Mon Sep 17 00:00:00 2001 From: Nick Douma Date: Tue, 3 Jun 2025 10:43:29 +0200 Subject: [PATCH 1/4] Pin sequel to latest version that still works Later versions break with this error: .../lib/ruby/gems/3.0.0/gems/sequel-4.42.1/lib/sequel/dataset/misc.rb:336:in `initialize_copy': wrong number of arguments (given 2, expected 1) (ArgumentError) from :48:in `clone' from .../lib/ruby/gems/3.0.0/gems/sequel-4.42.1/lib/sequel/dataset/query.rb:89:in `clone' from .../lib/ruby/gems/3.0.0/gems/sequel-4.42.1/lib/sequel/dataset/query.rb:265:in `from' from .../lib/ruby/gems/3.0.0/gems/sequel-4.42.1/lib/sequel/database/dataset.rb:58:in `from' from .../lib/ruby/gems/3.0.0/gems/sequel-4.42.1/lib/sequel/database/query.rb:196:in `table_exists?' from .../lib/ruby/gems/3.0.0/gems/sequel-4.42.1/lib/sequel/database/schema_methods.rb:224:in `create_table?' from .../lib/ruby/gems/3.0.0/gems/ring-sqa-0.4.4/lib/ring/sqa/database.rb:64:in `create_db' from .../lib/ruby/gems/3.0.0/gems/ring-sqa-0.4.4/lib/ring/sqa/database.rb:59:in `initialize' from .../lib/ruby/gems/3.0.0/gems/ring-sqa-0.4.4/lib/ring/sqa/core.rb:23:in `new' from .../lib/ruby/gems/3.0.0/gems/ring-sqa-0.4.4/lib/ring/sqa/core.rb:23:in `initialize' from .../ring-sqa/lib/ring/sqa/cli.rb:14:in `new' from .../ring-sqa/lib/ring/sqa/cli.rb:14:in `run' from .../ring-sqa/lib/ring/sqa/cli.rb:29:in `initialize' from ./bin/ring-sqad:5:in `new' from ./bin/ring-sqad:5:in `
' --- ring-sqa.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ring-sqa.gemspec b/ring-sqa.gemspec index e4e025a..d9701aa 100644 --- a/ring-sqa.gemspec +++ b/ring-sqa.gemspec @@ -16,7 +16,7 @@ Gem::Specification.new do |s| s.required_ruby_version = '>= 1.9.3' s.add_runtime_dependency 'slop', '~> 3.5' s.add_runtime_dependency 'rb-inotify', '~> 0.9' - s.add_runtime_dependency 'sequel', '~> 4.12' + s.add_runtime_dependency 'sequel', '~> 4.12', '< 4.42.0' s.add_runtime_dependency 'sqlite3', '~> 1.3' s.add_runtime_dependency 'asetus', '~> 0.3' s.add_runtime_dependency 'graphite-api', '~> 0.1.0' From 01abf47f7c209422b170f272eba38b634ec6898a Mon Sep 17 00:00:00 2001 From: Nick Douma Date: Tue, 3 Jun 2025 10:47:57 +0200 Subject: [PATCH 2/4] Add dependency on prometheus-client --- ring-sqa.gemspec | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ring-sqa.gemspec b/ring-sqa.gemspec index d9701aa..6c5550a 100644 --- a/ring-sqa.gemspec +++ b/ring-sqa.gemspec @@ -21,4 +21,6 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'asetus', '~> 0.3' s.add_runtime_dependency 'graphite-api', '~> 0.1.0' s.add_runtime_dependency 'influxdb-client', '>= 0.1', '<= 3.2.0' + s.add_runtime_dependency 'prometheus-client', '~> 4.2' + s.add_runtime_dependency 'webrick', '~> 1.9' end From aabd63e514c3cae7da14ac644f8a59b7048a7d8d Mon Sep 17 00:00:00 2001 From: Nick Douma Date: Tue, 3 Jun 2025 12:11:50 +0200 Subject: [PATCH 3/4] Add facility for separate web server access logs --- lib/ring/sqa/log.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/ring/sqa/log.rb b/lib/ring/sqa/log.rb index a01ae82..ed53d66 100644 --- a/lib/ring/sqa/log.rb +++ b/lib/ring/sqa/log.rb @@ -4,13 +4,19 @@ class SQA if CFG.debug? require 'logger' Log = Logger.new STDERR + Log.level = Logger::DEBUG + AccessLog = Log else begin require 'syslog/logger' Log = Syslog::Logger.new 'ring-sqad%i' % ( CFG.afi == "ipv6" ? 6 : 4 ) + Log.level = Logger::INFO + AccessLog = Logger.new STDERR rescue LoadError require 'logger' Log = Logger.new STDERR + Log.level = Logger::INFO + AccessLog = Log end end From 1d5c9645c7d702cc2712e80ae967b35e78304f9e Mon Sep 17 00:00:00 2001 From: Nick Douma Date: Tue, 3 Jun 2025 12:12:22 +0200 Subject: [PATCH 4/4] Implement Prometheus exporter with built-in webserver Available configuration options are: prometheus: bind: [address to bind on, optional, default 127.0.0.1] port: [port to bind on, optional, default 8129] accesslog: [set to true to enable accesslogs to stderr] --- lib/ring/sqa/analyzer.rb | 6 ++ lib/ring/sqa/prometheus.rb | 115 +++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 lib/ring/sqa/prometheus.rb diff --git a/lib/ring/sqa/analyzer.rb b/lib/ring/sqa/analyzer.rb index 3c78ad7..d239a3b 100644 --- a/lib/ring/sqa/analyzer.rb +++ b/lib/ring/sqa/analyzer.rb @@ -17,6 +17,7 @@ def run records = records.all @graphite.add @db.id_range(first_id, @db_id_seen).all if @graphite @influxdb.add @db.id_range(first_id, @db_id_seen).all if @influxdb + @prometheus.add @db.id_range(first_id, @db_id_seen).all if @prometheus @buffer.push records.map { |record| record.peer } @buffer.exceed_median? ? @alarm.set(@buffer) : @alarm.clear(@buffer) delay = INTERVAL-(Time.now-start) @@ -43,6 +44,7 @@ def initialize database, nodes @db_id_seen = 0 @graphite = graphite if CFG.graphite? @influxdb = influxdb if CFG.influxdb? + @prometheus = prometheus if CFG.prometheus? end def graphite @@ -55,6 +57,10 @@ def influxdb InfluxDBWriter.new @nodes end + def prometheus + require_relative 'prometheus' + Prometheus.new @nodes + end end diff --git a/lib/ring/sqa/prometheus.rb b/lib/ring/sqa/prometheus.rb new file mode 100644 index 0000000..232458c --- /dev/null +++ b/lib/ring/sqa/prometheus.rb @@ -0,0 +1,115 @@ +require 'webrick' +require 'prometheus/client' +require 'prometheus/client/formats/text' +require "webrick" +require "webrick/accesslog" + +module Ring +class SQA + + class Prometheus + STATE_METRIC_NAME = :nlnog_ring_sqa_node_state + LATENCY_METRIC_NAME = :nlnog_ring_sqa_node_latency_microseconds + NODE_LAST_CHECKED_METRIC_NAME = :nlnog_ring_sqa_node_last_checked_timestamp_seconds + + def add records + host = @hostname.split(".").first + node = @nodes.all + + records.each do |record| + nodename = nodecc = node[record.peer][:name].split(".").first + nodecc = node[record.peer][:cc].downcase + nodestatus = record.result + if nodestatus == "ok" + nodeup = 1 + nodelatency = record.latency + else + nodeup = 0 + nodelatency = -1 + end + + labels = { + address_family: CFG.afi, + country_code: nodecc, + ring_source: host, + ring_target: nodename, + target_status: nodestatus + } + + @state_metric.set(nodeup, labels: labels) + @latency_metric.set(nodelatency, labels: labels) + @refreshed_metric.set(record.time, labels: labels) + end + end + + private + + def initialize nodes + bind = CFG.prometheus.bind? || "127.0.0.1" + port = CFG.prometheus.port? || 8129 + @hostname = Ring::SQA::CFG.host.name + @nodes = nodes + + common_labels = [ + :address_family, + :country_code, + :ring_source, + :ring_target, + :target_status, + ] + + @registry = ::Prometheus::Client.registry + @state_metric = ::Prometheus::Client::Gauge.new(STATE_METRIC_NAME, docstring: "State of NLNOG Ring Node: 0 is down, 1 is up.", labels: common_labels) + @latency_metric = ::Prometheus::Client::Gauge.new(LATENCY_METRIC_NAME, docstring: "Latency of NLNOG Ring Node in microseconds.", labels: common_labels) + @refreshed_metric = ::Prometheus::Client::Gauge.new(NODE_LAST_CHECKED_METRIC_NAME, docstring: "Last timestamp this NLNOG Ring Node was checked.", labels: common_labels) + + @registry.register(@state_metric) + @registry.register(@latency_metric) + @registry.register(@refreshed_metric) + + + accesslog = [] + if CFG.prometheus.accesslog? + accesslog = [ + [AccessLog, WEBrick::AccessLog::COMMON_LOG_FORMAT] + ] + end + @server = + WEBrick::HTTPServer.new( + Port: port, + BindAddress: bind, + Logger: Log, + AccessLog: accesslog + ) + start + end + + def start + @server.mount_proc "/" do |req, res| + res["Content-Type"] = "text/plain; charset=utf-8" + if req.path == "/metrics" + res.status = 200 + res.body = export_metrics + else + res.status = 404 + res.body = "Not Found" + end + end + + @runner ||= + Thread.start do + begin + @server.start + rescue => e + Log.error "Failed to start prometheus metrics web on port #{@port}: #{e}" + end + end + end + + def export_metrics + ::Prometheus::Client::Formats::Text.marshal(@registry) + end + end + +end +end