Skip to content

Commit a7e3a36

Browse files
committed
BP: add option to configure handling offline backends (offline_handling)
1 parent 60bdaae commit a7e3a36

File tree

4 files changed

+63
-20
lines changed

4 files changed

+63
-20
lines changed

Changes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ next:
66
- add more output functions: age, utc, date, duration, fmt, hoststate and servicestate
77
- Business Process:
88
- add option to configure offline backends grade period (offline_grace_time)
9+
- add option to configure handling offline backends (offline_handling)
910

1011
3.24 Thu Oct 2 09:20:56 CEST 2025
1112
- teams: add role based permissions based on oauth group memberships

docs/documentation/configuration.asciidoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3329,6 +3329,13 @@ will keep its last known state.
33293329

33303330
Default is 180 seconds.
33313331

3332+
=== offline_handling
3333+
Decide how to handle offline backends in business processes. Valid values are:
3334+
3335+
* `0` - do nothing if backends are offline, simply skips calculation for affected BP
3336+
* `1` - continue calculation after grace period (default)
3337+
* `2` - set to unknown state if any backend is offline
3338+
33323339
=== default_filter
33333340
Add global filter to all business processes, can be set multiple times.
33343341

plugins/plugins-available/business_process/lib/Thruk/BP/Components/BP.pm

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -212,43 +212,75 @@ sub update_status {
212212
my $last_state = $self->{'status'};
213213

214214
my $results = [];
215-
my($livedata);
215+
my($livedata, $skip_update);
216216
if($type == 0) {
217217
$livedata = $self->bulk_fetch_live_data($c);
218218
my $previous_affected = $self->{'affected_peers'};
219219
$self->{'affected_peers'} = $self->_extract_affected_backends($livedata);
220220
my $failed = $self->_list_failed_backends($c, $previous_affected, $c->stash->{'failed_backends'});
221221
my $offline_grace_time = $c->config->{'Thruk::Plugin::BP'}->{'offline_grace_time'} // 180;
222-
if(scalar @{$failed} > 0 && ($self->{'last_check'} > (time() - $offline_grace_time))) {
223-
_warn(sprintf("not updating business process '%s' because the backends %s are unavailable. Waiting %s to recover, last successful update: %s",
224-
$self->{'name'},
225-
join(",", @{$failed}),
226-
Thruk::Utils::Filter::duration($offline_grace_time, 5),
227-
(scalar localtime $self->{'last_check'}),
228-
));
229-
return;
222+
my $offline_handling = $c->config->{'Thruk::Plugin::BP'}->{'offline_handling'} // 1;
223+
if(scalar @{$failed} > 0) {
224+
if($offline_handling == 0) {
225+
_warn(sprintf("not updating business process '%s' because the backends %s are unavailable. Waiting to recover, last successful update: %s",
226+
$self->{'name'},
227+
join(",", @{$failed}),
228+
(scalar localtime $self->{'last_check'}),
229+
));
230+
return;
231+
}
232+
if($self->{'last_check'} > (time() - $offline_grace_time)) {
233+
_warn(sprintf("not updating business process '%s' because the backends %s are unavailable. Waiting %s to recover, last successful update: %s",
234+
$self->{'name'},
235+
join(",", @{$failed}),
236+
Thruk::Utils::Filter::duration($offline_grace_time, 5),
237+
(scalar localtime $self->{'last_check'}),
238+
));
239+
return;
240+
}
241+
242+
# backends offline and grace period expired
243+
if($offline_handling == 1) {
244+
# continue normally
245+
} elsif($offline_handling == 2) {
246+
# set BP to unknown
247+
$self->{'affected_peers'} = $previous_affected;
248+
$skip_update = 1;
249+
for my $n (@{$self->{'nodes'}}) {
250+
next unless $n->{'create_obj'};
251+
$n->set_status(3, 'UNKNOWN - backend(s) offline: '.join(", ", @{$failed}));
252+
push @{$results}, $n->{'id'};
253+
}
254+
$self->set_status(3, 'UNKNOWN - backend(s) offline: '.join(", ", @{$failed}));
255+
} else {
256+
die("invalid offline_handling value (valid from 0-2): ".$offline_handling);
257+
}
230258
}
231-
for my $n (@{$self->{'nodes'}}) {
232-
my $r = $n->update_status($c, $self, $livedata);
233-
push @{$results}, $n->{'id'} if $r;
259+
if(!$skip_update) {
260+
for my $n (@{$self->{'nodes'}}) {
261+
my $r = $n->update_status($c, $self, $livedata);
262+
push @{$results}, $n->{'id'} if $r;
263+
}
234264
}
235265
}
236266

237-
my $iterations = 0;
238-
while(scalar keys %{$self->{'need_update'}} > 0) {
239-
$iterations++;
240-
for my $id (keys %{$self->{'need_update'}}) {
241-
my $r = $self->{'nodes_by_id'}->{$id}->update_status($c, $self, $livedata, $type);
242-
push @{$results}, $id if $r;
267+
if(!$skip_update) {
268+
my $iterations = 0;
269+
while(scalar keys %{$self->{'need_update'}} > 0) {
270+
$iterations++;
271+
for my $id (keys %{$self->{'need_update'}}) {
272+
my $r = $self->{'nodes_by_id'}->{$id}->update_status($c, $self, $livedata, $type);
273+
push @{$results}, $id if $r;
274+
}
275+
die("circular dependencies? Still have these on the update list: ".Dumper($self->{'need_update'})) if $iterations > 10;
243276
}
244-
die("circular dependenies? Still have these on the update list: ".Dumper($self->{'need_update'})) if $iterations > 10;
245277
}
246278

247279
$results = Thruk::Base::array_uniq($results);
248280

249281
# update last check time
250282
my $now = time();
251-
$self->{'last_check'} = $now;
283+
$self->{'last_check'} = $now unless $skip_update;
252284
if($last_state != $self->{'status'}) {
253285
$self->{'last_state_change'} = $now;
254286
}

thruk.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,9 @@ locked_message = account is locked, please contact an adminis
11621162
# to wait until a backend is considered offline.
11631163
#offline_grace_time = 180
11641164

1165+
# How to handle offline backends (0 = do nothing, 1 = calculation normally after grace time, 2 = set all bps to unknown)
1166+
#offline_handling=1
1167+
11651168
# Default filter
11661169
# add filter to all business processes, can be set multiple times
11671170
#default_filter = add_recursive_output_filter

0 commit comments

Comments
 (0)