diff --git a/lib/ring/sqa/analyzer.rb b/lib/ring/sqa/analyzer.rb index 3c78ad7..d239a3b 100644 --- a/lib/ring/sqa/analyzer.rb +++ b/lib/ring/sqa/analyzer.rb @@ -17,6 +17,7 @@ def run records = records.all @graphite.add @db.id_range(first_id, @db_id_seen).all if @graphite @influxdb.add @db.id_range(first_id, @db_id_seen).all if @influxdb + @prometheus.add @db.id_range(first_id, @db_id_seen).all if @prometheus @buffer.push records.map { |record| record.peer } @buffer.exceed_median? ? @alarm.set(@buffer) : @alarm.clear(@buffer) delay = INTERVAL-(Time.now-start) @@ -43,6 +44,7 @@ def initialize database, nodes @db_id_seen = 0 @graphite = graphite if CFG.graphite? @influxdb = influxdb if CFG.influxdb? + @prometheus = prometheus if CFG.prometheus? end def graphite @@ -55,6 +57,10 @@ def influxdb InfluxDBWriter.new @nodes end + def prometheus + require_relative 'prometheus' + Prometheus.new @nodes + end end diff --git a/lib/ring/sqa/log.rb b/lib/ring/sqa/log.rb index a01ae82..ed53d66 100644 --- a/lib/ring/sqa/log.rb +++ b/lib/ring/sqa/log.rb @@ -4,13 +4,19 @@ class SQA if CFG.debug? require 'logger' Log = Logger.new STDERR + Log.level = Logger::DEBUG + AccessLog = Log else begin require 'syslog/logger' Log = Syslog::Logger.new 'ring-sqad%i' % ( CFG.afi == "ipv6" ? 6 : 4 ) + Log.level = Logger::INFO + AccessLog = Logger.new STDERR rescue LoadError require 'logger' Log = Logger.new STDERR + Log.level = Logger::INFO + AccessLog = Log end end diff --git a/lib/ring/sqa/prometheus.rb b/lib/ring/sqa/prometheus.rb new file mode 100644 index 0000000..232458c --- /dev/null +++ b/lib/ring/sqa/prometheus.rb @@ -0,0 +1,115 @@ +require 'webrick' +require 'prometheus/client' +require 'prometheus/client/formats/text' +require "webrick" +require "webrick/accesslog" + +module Ring +class SQA + + class Prometheus + STATE_METRIC_NAME = :nlnog_ring_sqa_node_state + LATENCY_METRIC_NAME = :nlnog_ring_sqa_node_latency_microseconds + NODE_LAST_CHECKED_METRIC_NAME = :nlnog_ring_sqa_node_last_checked_timestamp_seconds + + def add records + host = @hostname.split(".").first + node = @nodes.all + + records.each do |record| + nodename = nodecc = node[record.peer][:name].split(".").first + nodecc = node[record.peer][:cc].downcase + nodestatus = record.result + if nodestatus == "ok" + nodeup = 1 + nodelatency = record.latency + else + nodeup = 0 + nodelatency = -1 + end + + labels = { + address_family: CFG.afi, + country_code: nodecc, + ring_source: host, + ring_target: nodename, + target_status: nodestatus + } + + @state_metric.set(nodeup, labels: labels) + @latency_metric.set(nodelatency, labels: labels) + @refreshed_metric.set(record.time, labels: labels) + end + end + + private + + def initialize nodes + bind = CFG.prometheus.bind? || "127.0.0.1" + port = CFG.prometheus.port? || 8129 + @hostname = Ring::SQA::CFG.host.name + @nodes = nodes + + common_labels = [ + :address_family, + :country_code, + :ring_source, + :ring_target, + :target_status, + ] + + @registry = ::Prometheus::Client.registry + @state_metric = ::Prometheus::Client::Gauge.new(STATE_METRIC_NAME, docstring: "State of NLNOG Ring Node: 0 is down, 1 is up.", labels: common_labels) + @latency_metric = ::Prometheus::Client::Gauge.new(LATENCY_METRIC_NAME, docstring: "Latency of NLNOG Ring Node in microseconds.", labels: common_labels) + @refreshed_metric = ::Prometheus::Client::Gauge.new(NODE_LAST_CHECKED_METRIC_NAME, docstring: "Last timestamp this NLNOG Ring Node was checked.", labels: common_labels) + + @registry.register(@state_metric) + @registry.register(@latency_metric) + @registry.register(@refreshed_metric) + + + accesslog = [] + if CFG.prometheus.accesslog? + accesslog = [ + [AccessLog, WEBrick::AccessLog::COMMON_LOG_FORMAT] + ] + end + @server = + WEBrick::HTTPServer.new( + Port: port, + BindAddress: bind, + Logger: Log, + AccessLog: accesslog + ) + start + end + + def start + @server.mount_proc "/" do |req, res| + res["Content-Type"] = "text/plain; charset=utf-8" + if req.path == "/metrics" + res.status = 200 + res.body = export_metrics + else + res.status = 404 + res.body = "Not Found" + end + end + + @runner ||= + Thread.start do + begin + @server.start + rescue => e + Log.error "Failed to start prometheus metrics web on port #{@port}: #{e}" + end + end + end + + def export_metrics + ::Prometheus::Client::Formats::Text.marshal(@registry) + end + end + +end +end diff --git a/ring-sqa.gemspec b/ring-sqa.gemspec index e4e025a..6c5550a 100644 --- a/ring-sqa.gemspec +++ b/ring-sqa.gemspec @@ -16,9 +16,11 @@ Gem::Specification.new do |s| s.required_ruby_version = '>= 1.9.3' s.add_runtime_dependency 'slop', '~> 3.5' s.add_runtime_dependency 'rb-inotify', '~> 0.9' - s.add_runtime_dependency 'sequel', '~> 4.12' + s.add_runtime_dependency 'sequel', '~> 4.12', '< 4.42.0' s.add_runtime_dependency 'sqlite3', '~> 1.3' s.add_runtime_dependency 'asetus', '~> 0.3' s.add_runtime_dependency 'graphite-api', '~> 0.1.0' s.add_runtime_dependency 'influxdb-client', '>= 0.1', '<= 3.2.0' + s.add_runtime_dependency 'prometheus-client', '~> 4.2' + s.add_runtime_dependency 'webrick', '~> 1.9' end