Skip to content

Commit 0955fce

Browse files
ROX-26637: Reread configmap when it is updated (#1889)
1 parent 466a38f commit 0955fce

File tree

5 files changed

+153
-16
lines changed

5 files changed

+153
-16
lines changed

collector/lib/CollectorConfig.cpp

Lines changed: 132 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
#include <optional>
55
#include <sstream>
66

7+
#include <sys/inotify.h>
8+
#include <sys/select.h>
9+
710
#include <libsinsp/sinsp.h>
811

912
#include "CollectionMethod.h"
@@ -279,7 +282,6 @@ void CollectorConfig::InitCollectorConfig(CollectorArgs* args) {
279282
HandleAfterglowEnvVars();
280283
HandleConnectionStatsEnvVars();
281284
HandleSinspEnvVars();
282-
HandleConfig(config_file.value());
283285

284286
host_config_ = ProcessHostHeuristics(*this);
285287
}
@@ -398,9 +400,11 @@ void CollectorConfig::HandleSinspEnvVars() {
398400
}
399401

400402
void CollectorConfig::YamlConfigToConfig(YAML::Node& yamlConfig) {
403+
// Don't read the file during a scrape
404+
std::unique_lock<std::shared_mutex> lock(mutex_);
405+
401406
if (yamlConfig.IsNull() || !yamlConfig.IsDefined()) {
402-
CLOG(FATAL) << "Unable to read config from config file";
403-
return;
407+
throw std::runtime_error("Unable to read config from config file");
404408
}
405409
YAML::Node networking = yamlConfig["networking"];
406410
if (!networking) {
@@ -422,33 +426,147 @@ void CollectorConfig::YamlConfigToConfig(YAML::Node& yamlConfig) {
422426
externalIpsConfig->set_enable(enableExternalIps);
423427

424428
SetRuntimeConfig(runtime_config);
425-
CLOG(INFO) << "Runtime configuration:";
426-
CLOG(INFO) << GetRuntimeConfigStr();
429+
CLOG(INFO) << "Runtime configuration:\n"
430+
<< GetRuntimeConfigStr();
427431

428432
return;
429433
}
430434

431-
void CollectorConfig::HandleConfig(const std::filesystem::path& filePath) {
435+
bool CollectorConfig::HandleConfig(const std::filesystem::path& filePath) {
432436
if (!std::filesystem::exists(filePath)) {
433437
CLOG(DEBUG) << "No configuration file found. " << filePath;
434-
return;
438+
return true;
435439
}
436440

437441
try {
438442
YAML::Node yamlConfig = YAML::LoadFile(filePath);
439443
YamlConfigToConfig(yamlConfig);
440444
} catch (const YAML::BadFile& e) {
441-
CLOG(FATAL) << "Failed to open the configuration file: " << filePath
442-
<< ". Error: " << e.what();
445+
CLOG(ERROR) << "Failed to open the configuration file: " << filePath << ". Error: " << e.what();
446+
return false;
443447
} catch (const YAML::ParserException& e) {
444-
CLOG(FATAL) << "Failed to parse the configuration file: " << filePath
445-
<< ". Error: " << e.what();
448+
CLOG(ERROR) << "Failed to parse the configuration file: " << filePath << ". Error: " << e.what();
449+
return false;
446450
} catch (const YAML::Exception& e) {
447-
CLOG(FATAL) << "An error occurred while loading the configuration file: " << filePath
448-
<< ". Error: " << e.what();
451+
CLOG(ERROR) << "An error occurred while loading the configuration file: " << filePath << ". Error: " << e.what();
452+
return false;
449453
} catch (const std::exception& e) {
450-
CLOG(FATAL) << "An unexpected error occurred while trying to read: " << filePath << e.what();
454+
CLOG(ERROR) << "An unexpected error occurred while trying to read: " << filePath << e.what();
455+
return false;
456+
}
457+
458+
return true;
459+
}
460+
461+
void CollectorConfig::WaitForFileToExist(const std::filesystem::path& filePath) {
462+
int count = 0;
463+
while (!std::filesystem::exists(filePath) && !thread_.should_stop()) {
464+
sleep(1);
465+
count++;
466+
if (count == 45) {
467+
std::unique_lock<std::shared_mutex> lock(mutex_);
468+
runtime_config_.reset();
469+
}
470+
}
471+
}
472+
473+
int CollectorConfig::WaitForInotifyAddWatch(int fd, const std::filesystem::path& filePath) {
474+
while (!thread_.should_stop()) {
475+
int wd = inotify_add_watch(fd, filePath.c_str(), IN_MODIFY | IN_MOVE_SELF | IN_DELETE_SELF);
476+
if (wd < 0) {
477+
CLOG_THROTTLED(ERROR, std::chrono::seconds(30)) << "Failed to add inotify watch for " << filePath << ": (" << errno << ") " << StrError();
478+
} else {
479+
return wd;
480+
}
481+
sleep(1);
482+
}
483+
484+
return -1;
485+
}
486+
487+
void CollectorConfig::WatchConfigFile(const std::filesystem::path& filePath) {
488+
fd_set rfds;
489+
struct timeval tv = {};
490+
int fd = inotify_init();
491+
if (fd < 0) {
492+
CLOG(ERROR) << "inotify_init() failed: " << StrError();
493+
CLOG(ERROR) << "Runtime configuration will not be used.";
494+
return;
495+
}
496+
497+
WaitForFileToExist(filePath);
498+
int wd = WaitForInotifyAddWatch(fd, filePath);
499+
if (wd == -1) {
500+
CLOG(ERROR) << "Failed to add inotify watch, runtime configuration will not be used.";
501+
close(fd);
502+
return;
451503
}
504+
505+
bool success = HandleConfig(filePath);
506+
if (!success) {
507+
CLOG(FATAL) << "Unable to parse configuration file: " << filePath;
508+
}
509+
510+
char buffer[1024];
511+
512+
while (!thread_.should_stop()) {
513+
tv.tv_sec = 2;
514+
tv.tv_usec = 0;
515+
FD_ZERO(&rfds);
516+
FD_SET(fd, &rfds);
517+
518+
int retval = select(FD_SETSIZE, &rfds, nullptr, nullptr, &tv);
519+
if (retval < 0) {
520+
CLOG(WARNING) << "'select' call failed: (" << errno << ") " << StrError(errno);
521+
continue;
522+
}
523+
524+
if (retval == 0) {
525+
// select timed out, check if we should be stopping.
526+
continue;
527+
}
528+
529+
// Received event from inotify.
530+
int length = read(fd, buffer, sizeof(buffer));
531+
if (length < 0) {
532+
CLOG_THROTTLED(ERROR, std::chrono::seconds(30)) << "Unable to read event for " << filePath;
533+
continue;
534+
}
535+
536+
struct inotify_event* event;
537+
for (int i = 0; i < length; i += sizeof(struct inotify_event) + event->len) {
538+
event = (struct inotify_event*)&buffer[i];
539+
if (event->mask & IN_MODIFY) {
540+
HandleConfig(filePath);
541+
} else if ((event->mask & IN_MOVE_SELF) || (event->mask & IN_DELETE_SELF)) {
542+
inotify_rm_watch(fd, wd);
543+
544+
WaitForFileToExist(filePath);
545+
wd = WaitForInotifyAddWatch(fd, filePath);
546+
if (wd == -1) {
547+
// Only situation that could get us here is if collector is
548+
// stopping, so we break out and let the thread finish.
549+
break;
550+
}
551+
552+
HandleConfig(filePath);
553+
}
554+
}
555+
}
556+
557+
CLOG(INFO) << "No longer using inotify on " << filePath;
558+
inotify_rm_watch(fd, wd);
559+
close(fd);
560+
}
561+
562+
void CollectorConfig::Start() {
563+
thread_.Start([this] { WatchConfigFile(config_file.value()); });
564+
CLOG(INFO) << "Watching config file: " << config_file.value();
565+
}
566+
567+
void CollectorConfig::Stop() {
568+
thread_.Stop();
569+
CLOG(INFO) << "No longer watching config file" << config_file.value();
452570
}
453571

454572
bool CollectorConfig::TurnOffScrape() const {

collector/lib/CollectorConfig.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <optional>
55
#include <ostream>
6+
#include <shared_mutex>
67
#include <vector>
78

89
#include <json/json.h>
@@ -15,6 +16,7 @@
1516
#include "CollectionMethod.h"
1617
#include "HostConfig.h"
1718
#include "NetworkConnection.h"
19+
#include "StoppableThread.h"
1820
#include "TlsConfig.h"
1921
#include "json/value.h"
2022
#include "optionparser.h"
@@ -95,6 +97,7 @@ class CollectorConfig {
9597
// of a runtime configuration, and defer to that value
9698
// otherwise, we rely on the feature flag (env var)
9799
bool EnableExternalIPs() const {
100+
std::shared_lock<std::shared_mutex> lock(mutex_);
98101
if (runtime_config_.has_value()) {
99102
return runtime_config_.value()
100103
.networking()
@@ -127,6 +130,8 @@ class CollectorConfig {
127130
unsigned int GetSinspBufferSize() const;
128131
unsigned int GetSinspTotalBufferSize() const { return sinsp_total_buffer_size_; }
129132
unsigned int GetSinspThreadCacheSize() const { return sinsp_thread_cache_size_; }
133+
void Start();
134+
void Stop();
130135

131136
static std::pair<option::ArgStatus, std::string> CheckConfiguration(const char* config, Json::Value* root);
132137

@@ -197,12 +202,17 @@ class CollectorConfig {
197202
std::optional<TlsConfig> tls_config_;
198203

199204
std::optional<sensor::CollectorConfig> runtime_config_;
205+
StoppableThread thread_;
206+
mutable std::shared_mutex mutex_{};
200207

201208
void HandleAfterglowEnvVars();
202209
void HandleConnectionStatsEnvVars();
203210
void HandleSinspEnvVars();
204211
void YamlConfigToConfig(YAML::Node& yamlConfig);
205-
void HandleConfig(const std::filesystem::path& filePath);
212+
bool HandleConfig(const std::filesystem::path& filePath);
213+
void WaitForFileToExist(const std::filesystem::path& filePath);
214+
int WaitForInotifyAddWatch(int fd, const std::filesystem::path& filePath);
215+
void WatchConfigFile(const std::filesystem::path& filePath);
206216

207217
// Protected, used for testing purposes
208218
void SetSinspBufferSize(unsigned int buffer_size);

collector/lib/CollectorService.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ void CollectorService::RunForever() {
5959
exposer.RegisterCollectable(registry);
6060

6161
CollectorStatsExporter exporter(registry, &config_, &system_inspector_);
62+
config_.Start();
6263

6364
std::unique_ptr<NetworkStatusNotifier> net_status_notifier;
6465

@@ -136,6 +137,7 @@ void CollectorService::RunForever() {
136137
if (net_status_notifier) {
137138
net_status_notifier->Stop();
138139
}
140+
config_.Stop();
139141
// Shut down these first since they access the system inspector object.
140142
exporter.stop();
141143
server.close();

collector/test/CollectorConfigTest.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,11 @@ TEST(CollectorConfigTest, TestYamlConfigToConfigEmpty) {
223223

224224
MockCollectorConfig config;
225225

226-
EXPECT_DEATH({ config.MockYamlConfigToConfig(yamlNode); }, ".*");
226+
EXPECT_THROW({ config.MockYamlConfigToConfig(yamlNode); }, std::runtime_error);
227+
228+
auto runtime_config = config.GetRuntimeConfig();
229+
230+
EXPECT_FALSE(runtime_config.has_value());
227231
}
228232

229233
} // namespace collector

docs/references.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ data:
141141
```
142142

143143
The file path can be set using the `ROX_COLLECTOR_CONFIG_PATH` environment variable.
144+
Whenever the configuration file is updated or created, collector will update
145+
the configuration. If the configuration file is deleted the configuration will revert
146+
to the default. The configuration file can be created after collector start up.
144147

145148
### Other arguments
146149

0 commit comments

Comments
 (0)