Skip to content

Commit 0b66264

Browse files
authored
Merge pull request #176 from m0dular/SUP-3535-sar_metrics
(SUP-3535) Add more sar metrics to collect
2 parents e9ed0c3 + ce3f680 commit 0b66264

File tree

13 files changed

+351
-132
lines changed

13 files changed

+351
-132
lines changed

files/metrics_tidy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ done
4545

4646

4747
# Guard against deleting or archiving files outside of a Puppet service metrics directory.
48-
valid_paths=(puppetserver puppetdb orchestrator console ace bolt activemq postgres system_processes system_memory system_cpu vmware)
48+
valid_paths=(puppetserver puppetdb orchestrator console ace bolt activemq postgres system_processes system_memory system_cpu vmware sar)
4949

5050
# Arguments and defaults.
5151
metrics_directory="${metrics_directory:-/opt/puppetlabs/puppet-metrics-collector/puppetserver}"

files/system_metrics

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ module SystemMetrics
3232
#
3333
# @attr [integer] polling_interval Time in seconds between calls to poll the system for data.
3434
# @attr [integer] file_interval Time in seconds between the creation of each output file.
35-
# @attr [string] metric_type system_cpu|system_memory|system_processes
35+
# @attr [string] metric_type system_cpu|system_processes
3636
# @attr [string] metrics_dir The puppet_metrics_collector output directory.
3737
# @attr [string] process_expression Expression to pass to egrep that matches processes to track
3838
# @attr [boolean] verbose Verbose output
@@ -47,7 +47,7 @@ module SystemMetrics
4747
#
4848
# @param [integer] polling_interval Time in seconds between calls to poll the system for data.
4949
# @param [integer] file_interval Time in seconds between the creation of each output file.
50-
# @param [string] metric_type system_cpu|system_memory|system_processes
50+
# @param [string] metric_type system_cpu|system_processes
5151
# @param [string] process_expression Expression to pass to egrep that matches processes to track
5252
# @param [string] metrics_dir The puppet_metrics_collector output directory.
5353
# @param [boolean] verbose Verbose output
@@ -78,7 +78,7 @@ module SystemMetrics
7878
def run_sar
7979
times_to_poll = (@file_interval / @polling_interval).round
8080
# sar inputs are polling interval and how many times to poll
81-
comm_flags = ' -r' if %r{system_memory}.match?(@metric_type)
81+
comm_flags = ' -u -r -w -W -B'
8282
comm = "sar #{comm_flags} #{@polling_interval} #{times_to_poll}"
8383
puts "sar command is: #{comm}" if @verbose
8484
begin
@@ -97,41 +97,25 @@ module SystemMetrics
9797
#
9898
# @return [hash] The metrics data
9999
def parse_sar_output(sar_output)
100-
sar_output_arr = sar_output.split(%r{\n+|\r+}).reject(&:empty?).map { |line| line.split }
101-
102-
unique_header_str = if @metric_type == 'system_memory'
103-
'%memused'
104-
else
105-
'%user'
106-
end
107-
headers_line = sar_output_arr.find { |e| e.include? unique_header_str }
108-
sar_error_missing_headers = <<-EOF
109-
sar output invalid or missing headers. Failed to find line with #{unique_header_str}.
110-
Full output:
111-
#{sar_output}
112-
EOF
113-
send_error_to_output_file_and_exit(sar_error_missing_headers) if headers_line.nil?
114-
115-
averages_line = sar_output_arr.find { |e| e.include? 'Average:' }
116-
sar_error_missing_averages = <<-EOF
117-
sar output missing "Average:"
118-
Full output:
119-
#{sar_output}"
120-
EOF
121-
send_error_to_output_file_and_exit(sar_error_missing_averages) if averages_line.nil?
122-
123-
Hash[headers_line.reverse.zip(averages_line.reverse).reverse]
124-
125-
puts "sar headers and averages:\n#{headers_line.join(',')}\n#{averages_line.join(',')}" if @verbose
126-
127-
# example of array data
128-
# 04:59:13,PM,CPU,%user,%nice,%system,%iowait,%steal,%idle
129-
# Average:,all,0.58,0.00,0.08,0.00,0.00,99.33
130-
# combine the arrays into a hash starting from the deal with the unmatched columns in the front
131-
data_hash = Hash[headers_line.reverse.zip(averages_line.reverse).reverse]
132-
# remove anything that doesn't have a number for an average like "Average:" or "all"
133-
data_hash.select! { |_k, v| v =~ %r{\A[-+]?[0-9]*\.?[0-9]+\Z} }
134-
data_hash.transform_values!(&:to_f)
100+
results = []
101+
# Split the output on newlines and select the "Average" metrics that sar calculates over the interval
102+
sar_averages = sar_output.split(%r{\n+|\r+}).select { |line| line.include? 'Average' }
103+
104+
# Each average will consist of a header line and a values line, e.g.
105+
# Average: CPU %user %nice %system %iowait %steal %idle
106+
# Average: all 0.18 0.00 0.10 0.00 0.03 99.70
107+
# So, step over the array in increments of two to process each average
108+
0.step(sar_averages.size - 1, 2) do |i|
109+
# Split each line on spaces and join them together, e.g.
110+
# [["Average:", "Average:"], ["CPU", "all"], ["%user", "0.38"], ["%nice", "0.00"], ["%system", "0.08"], ["%iowait", "0.00"], ["%steal", "0.03"], ["%idle", "99.52"]]
111+
merged_sar = sar_averages[i].split.zip(sar_averages[i + 1].split)
112+
# Filter non-numeric values, then turn them into key/value pairs, e.g.
113+
# {"name": "%user", "value": 0.45}
114+
merged_sar.select! { |_k, v| v =~ %r{\A[-+]?[0-9]*\.?[0-9]+\Z} }.each do |header, val|
115+
results.append({ name: header, value: val.to_f })
116+
end
117+
end
118+
results
135119
end
136120

137121
# Run pidstat to collect process specific data
@@ -407,7 +391,7 @@ end
407391

408392
if $PROGRAM_NAME == __FILE__
409393

410-
VALID_METRIC_TYPES = ['system_cpu', 'system_memory', 'system_processes'].freeze
394+
VALID_METRIC_TYPES = ['system_cpu', 'system_processes'].freeze
411395
FILE_INTERVAL_DEFAULT = 60 * 5
412396
POLLING_INTERVAL_DEFAULT = 1
413397
METRIC_TYPE_DEFAULT = 'system_cpu'

manifests/collect.pp

Lines changed: 70 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3,60 +3,87 @@
33
# Creates systemd units for collecting a given metric
44
#
55
define puppet_metrics_collector::collect (
6+
Enum['present', 'absent'] $ensure = 'present',
67
String $metrics_type = $title,
78
String $metrics_command = undef,
89
String $tidy_command = undef,
10+
Optional[Hash] $env_vars = undef,
911
String $metric_ensure = 'present',
1012
String $minute = '5',
1113
) {
12-
$service_ensure = $metric_ensure ? {
13-
'present' => running,
14-
'absent' => stopped,
15-
}
14+
if $ensure == 'absent' {
15+
$services = [
16+
"puppet_${metrics_type}-metrics.service",
17+
"puppet_${metrics_type}-metrics.timer",
18+
"puppet_${metrics_type}-tidy.service",
19+
"puppet_${metrics_type}-tidy.timer",
20+
]
21+
service { $services:
22+
ensure => stopped,
23+
enable => false,
24+
}
1625

17-
$service_enable = $metric_ensure ? {
18-
'present' => true,
19-
'absent' => false,
20-
}
26+
$files = [
27+
"/etc/systemd/system/puppet_${metrics_type}-metrics.service",
28+
"/etc/systemd/system/puppet_${metrics_type}-metrics.timer",
29+
"/etc/systemd/system/puppet_${metrics_type}-tidy.service",
30+
"/etc/systemd/system/puppet_${metrics_type}-tidy.timer",
31+
]
2132

22-
file { "/etc/systemd/system/puppet_${metrics_type}-metrics.service":
23-
ensure => $metric_ensure,
24-
content => epp('puppet_metrics_collector/service.epp',
25-
{ 'service' => "puppet_${metrics_type}", 'metrics_command' => $metrics_command }
26-
),
27-
}
28-
file { "/etc/systemd/system/puppet_${metrics_type}-metrics.timer":
29-
ensure => $metric_ensure,
30-
content => epp('puppet_metrics_collector/timer.epp',
31-
{ 'service' => "puppet_${metrics_type}", 'minute' => $minute },
32-
),
33+
file { $files:
34+
ensure => absent,
35+
}
3336
}
37+
else {
38+
$service_ensure = $metric_ensure ? {
39+
'present' => running,
40+
'absent' => stopped,
41+
}
3442

35-
file { "/etc/systemd/system/puppet_${metrics_type}-tidy.service":
36-
ensure => $metric_ensure,
37-
content => epp('puppet_metrics_collector/tidy.epp',
38-
{ 'service' => "puppet_${metrics_type}", 'tidy_command' => $tidy_command }
39-
),
40-
}
41-
file { "/etc/systemd/system/puppet_${metrics_type}-tidy.timer":
42-
ensure => $metric_ensure,
43-
content => epp('puppet_metrics_collector/tidy_timer.epp',
44-
{ 'service' => "puppet_${metrics_type}" }
45-
),
46-
}
43+
$service_enable = $metric_ensure ? {
44+
'present' => true,
45+
'absent' => false,
46+
}
4747

48-
service { "puppet_${metrics_type}-metrics.service":
49-
}
50-
service { "puppet_${metrics_type}-metrics.timer":
51-
ensure => $service_ensure,
52-
enable => $service_enable,
53-
subscribe => File["/etc/systemd/system/puppet_${metrics_type}-metrics.timer"],
54-
}
48+
file { "/etc/systemd/system/puppet_${metrics_type}-metrics.service":
49+
ensure => $metric_ensure,
50+
content => epp('puppet_metrics_collector/service.epp',
51+
{ 'service' => "puppet_${metrics_type}", 'metrics_command' => $metrics_command, 'env_vars' => $env_vars }
52+
),
53+
}
54+
file { "/etc/systemd/system/puppet_${metrics_type}-metrics.timer":
55+
ensure => $metric_ensure,
56+
content => epp('puppet_metrics_collector/timer.epp',
57+
{ 'service' => "puppet_${metrics_type}", 'minute' => $minute },
58+
),
59+
}
60+
61+
file { "/etc/systemd/system/puppet_${metrics_type}-tidy.service":
62+
ensure => $metric_ensure,
63+
content => epp('puppet_metrics_collector/tidy.epp',
64+
{ 'service' => "puppet_${metrics_type}", 'tidy_command' => $tidy_command }
65+
),
66+
}
67+
file { "/etc/systemd/system/puppet_${metrics_type}-tidy.timer":
68+
ensure => $metric_ensure,
69+
content => epp('puppet_metrics_collector/tidy_timer.epp',
70+
{ 'service' => "puppet_${metrics_type}" }
71+
),
72+
}
73+
74+
service { "puppet_${metrics_type}-metrics.service":
75+
}
76+
service { "puppet_${metrics_type}-metrics.timer":
77+
ensure => $service_ensure,
78+
enable => $service_enable,
79+
subscribe => File["/etc/systemd/system/puppet_${metrics_type}-metrics.timer"],
80+
}
5581

56-
service { "puppet_${metrics_type}-tidy.service": }
57-
service { "puppet_${metrics_type}-tidy.timer":
58-
ensure => $service_ensure,
59-
enable => $service_enable,
60-
subscribe => File["/etc/systemd/system/puppet_${metrics_type}-tidy.timer"],
82+
service { "puppet_${metrics_type}-tidy.service": }
83+
service { "puppet_${metrics_type}-tidy.timer":
84+
ensure => $service_ensure,
85+
enable => $service_enable,
86+
subscribe => File["/etc/systemd/system/puppet_${metrics_type}-tidy.timer"],
87+
}
6188
}
6289
}

manifests/pe_metric.pp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
Optional[String] $metrics_server_hostname = undef,
1717
Optional[Integer] $metrics_server_port = undef,
1818
Optional[String] $metrics_server_db_name = undef,
19+
Optional[Hash] $env_vars = undef,
1920
) {
2021
$metrics_output_dir = "${puppet_metrics_collector::output_dir}/${metrics_type}"
2122

@@ -84,10 +85,12 @@
8485
$tidy_command = "${puppet_metrics_collector::scripts_dir}/metrics_tidy -d ${metrics_output_dir} -r ${retention_days}"
8586

8687
puppet_metrics_collector::collect { $metrics_type:
88+
ensure => $metric_ensure,
8789
metrics_command => $metrics_command,
8890
tidy_command => $tidy_command,
8991
metric_ensure => $metric_ensure,
9092
minute => $cron_minute,
93+
env_vars => $env_vars,
9194
notify => Exec['puppet_metrics_collector_daemon_reload'],
9295
}
9396

manifests/sar_metric.pp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
Integer $polling_frequency_seconds = 1,
3434
String $metric_script_file = 'system_metrics',
3535
String $metrics_shipping_command = $puppet_metrics_collector::system::metrics_shipping_command,
36+
Optional[Hash] $env_vars = undef,
3637
) {
3738
$metrics_output_dir = "${puppet_metrics_collector::system::output_dir}/${metrics_type}"
3839

@@ -66,10 +67,12 @@
6667
$tidy_command = "${puppet_metrics_collector::system::scripts_dir}/metrics_tidy -d ${metrics_output_dir} -r ${retention_days}"
6768

6869
puppet_metrics_collector::collect { $metrics_type:
70+
ensure => $metric_ensure,
6971
metrics_command => $metrics_command,
7072
tidy_command => $tidy_command,
7173
metric_ensure => $metric_ensure,
7274
minute => $cron_minute,
75+
env_vars => $env_vars,
7376
notify => Exec['puppet_metrics_collector_system_daemon_reload'],
7477
}
7578

manifests/system.pp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@
9393
source => 'puppet:///modules/puppet_metrics_collector/system_metrics',
9494
}
9595

96-
contain puppet_metrics_collector::system::cpu
97-
contain puppet_metrics_collector::system::memory
96+
contain puppet_metrics_collector::system::sar
9897
contain puppet_metrics_collector::system::processes
9998
}
10099

@@ -133,4 +132,14 @@
133132
file { $metric_legacy_files :
134133
ensure => absent,
135134
}
135+
136+
# Legacy sar cleanup
137+
puppet_metrics_collector::sar_metric { 'system_memory' :
138+
metric_ensure => 'absent',
139+
cron_minute => '0',
140+
retention_days => 0,
141+
collection_frequency => 0,
142+
polling_frequency_seconds => 0,
143+
metrics_shipping_command => 'foo',
144+
}
136145
}

manifests/system/memory.pp

Lines changed: 0 additions & 20 deletions
This file was deleted.

manifests/system/cpu.pp renamed to manifests/system/sar.pp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,22 @@
22
#
33
# @api private
44
#
5-
class puppet_metrics_collector::system::cpu (
5+
class puppet_metrics_collector::system::sar (
66
String $metrics_ensure = $puppet_metrics_collector::system::system_metrics_ensure,
77
Integer $collection_frequency = $puppet_metrics_collector::system::collection_frequency,
88
Integer $retention_days = $puppet_metrics_collector::system::retention_days,
99
Integer $polling_frequency_seconds = $puppet_metrics_collector::system::polling_frequency_seconds,
1010
Optional[String] $metrics_shipping_command = undef,
1111
) {
12+
# This is to ensure that files are written to a directory that the sup script will pick up
1213
puppet_metrics_collector::sar_metric { 'system_cpu' :
1314
metric_ensure => $metrics_ensure,
1415
cron_minute => "0/${collection_frequency}",
1516
retention_days => $retention_days,
1617
collection_frequency => $collection_frequency,
1718
polling_frequency_seconds => $polling_frequency_seconds,
1819
metrics_shipping_command => $metrics_shipping_command,
20+
# This ensures that sar reports the time field as one 24 hour field, instead of a 12 hour format with spaces
21+
env_vars => { 'LC_TIME' => 'POSIX' },
1922
}
2023
}

spec/acceptance/pe_system_spec.rb

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,13 @@
4949
context 'system timers are running' do
5050
it { expect(service('puppet_system_cpu-metrics.timer')).to be_running }
5151
it { expect(service('puppet_system_cpu-tidy.timer')).to be_running }
52-
it { expect(service('puppet_system_memory-metrics.timer')).to be_running }
53-
it { expect(service('puppet_system_memory-tidy.timer')).to be_running }
5452
it { expect(service('puppet_system_processes-metrics.timer')).to be_running }
5553
it { expect(service('puppet_system_processes-tidy.timer')).to be_running }
5654
end
5755

5856
it 'creates system tidy services files' do
59-
files = run_shell('ls /etc/systemd/system/puppet_system*-tidy.service').stdout
60-
expect(files.split("\n").count).to eq(3)
57+
expect(run_shell('ls /etc/systemd/system/puppet_system_cpu-tidy.service').exit_code).to eq(0)
58+
expect(run_shell('ls /etc/systemd/system/puppet_system_processes-tidy.service').exit_code).to eq(0)
6159
end
6260
end
6361

@@ -79,15 +77,13 @@ class { 'puppet_metrics_collector::system':
7977
context 'system timers are running' do
8078
it { expect(service('puppet_system_cpu-metrics.timer')).to be_running }
8179
it { expect(service('puppet_system_cpu-tidy.timer')).to be_running }
82-
it { expect(service('puppet_system_memory-metrics.timer')).to be_running }
83-
it { expect(service('puppet_system_memory-tidy.timer')).to be_running }
8480
it { expect(service('puppet_system_processes-metrics.timer')).to be_running }
8581
it { expect(service('puppet_system_processes-tidy.timer')).to be_running }
8682
end
8783

8884
it 'creates system tidy services files' do
89-
files = run_shell('ls /etc/systemd/system/puppet_system*-tidy.service').stdout
90-
expect(files.split("\n").count).to eq(3)
85+
expect(run_shell('ls /etc/systemd/system/puppet_system_cpu-tidy.service').exit_code).to eq(0)
86+
expect(run_shell('ls /etc/systemd/system/puppet_system_processes-tidy.service').exit_code).to eq(0)
9187
end
9288
end
9389
end

0 commit comments

Comments
 (0)