Skip to content

Commit 1491bba

Browse files
ovalentiMolter73Stringy
authored
ROX-26025: Backport UTF-8 sanitizing fix for 4.5 (#1863)
* ROX-26025: Sanitize UTF-8 strings in process signals (#1857) Replace every byte of an invalid UTF-8 sequence with '?' * Pin ansible version to <2.17 to avoid loop bug (#1837) --------- Co-authored-by: Mauro Ezequiel Moltrasio <mmoltras@redhat.com> Co-authored-by: Giles Hutton <ghutton@redhat.com>
1 parent 26b423f commit 1491bba

File tree

6 files changed

+77
-12
lines changed

6 files changed

+77
-12
lines changed

ansible/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# TODO: unpin if RHEL 8 VMs on GCP update their python interpreter to 3.7+
22
# https://github.com/ansible/ansible/blob/v2.17.0/changelogs/CHANGELOG-v2.17.rst#removed-features-previously-deprecated
3-
ansible<10
3+
ansible-core==2.16.10
4+
ansible==9.7.0
45
# TODO: unpin after https://github.com/docker/docker-py gets a release with
56
# https://github.com/docker/docker-py/commit/7785ad913ddf2d86478f08278bb2c488d05a29ff
67
requests==2.31.0

ansible/requirements.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
collections:
3+
- google.cloud
34
- community.general
45
- community.docker
56
- containers.podman

collector/lib/ProcessSignalFormatter.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ std::string extract_proc_args(sinsp_threadinfo* tinfo) {
4040
if (tinfo->m_args.empty()) return "";
4141
std::ostringstream args;
4242
for (auto it = tinfo->m_args.begin(); it != tinfo->m_args.end();) {
43-
args << *it++;
43+
auto arg = *it++;
44+
auto arg_sanitized = SanitizedUTF8(arg);
45+
46+
args << ((arg_sanitized ? *arg_sanitized : arg));
47+
4448
if (it != tinfo->m_args.end()) args << " ";
4549
}
4650
return args.str();
@@ -106,22 +110,37 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_evt* event) {
106110
const std::string* name = event_extractor_->get_comm(event);
107111
const std::string* exepath = event_extractor_->get_exepath(event);
108112

113+
std::optional<std::string> name_sanitized;
114+
std::optional<std::string> exepath_sanitized;
115+
116+
if (name) {
117+
name_sanitized = SanitizedUTF8(*name);
118+
}
119+
120+
if (exepath) {
121+
exepath_sanitized = SanitizedUTF8(*exepath);
122+
}
123+
109124
// set name (if name is missing or empty, try to use exec_file_path)
110125
if (name && !name->empty() && *name != "<NA>") {
111-
signal->set_name(*name);
126+
signal->set_name(name_sanitized ? *name_sanitized : *name);
112127
} else if (exepath && !exepath->empty() && *exepath != "<NA>") {
113-
signal->set_name(*exepath);
128+
signal->set_name(exepath_sanitized ? *exepath_sanitized : *exepath);
114129
}
115130

116131
// set exec_file_path (if exec_file_path is missing or empty, try to use name)
117132
if (exepath && !exepath->empty() && *exepath != "<NA>") {
118-
signal->set_exec_file_path(*exepath);
133+
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : *exepath);
119134
} else if (name && !name->empty() && *name != "<NA>") {
120-
signal->set_exec_file_path(*name);
135+
signal->set_exec_file_path(name_sanitized ? *name_sanitized : *name);
121136
}
122137

123138
// set process arguments
124-
if (const char* args = event_extractor_->get_proc_args(event)) signal->set_args(args);
139+
if (const char* args = event_extractor_->get_proc_args(event)) {
140+
std::string args_str = args;
141+
auto args_sanitized = SanitizedUTF8(args_str);
142+
signal->set_args(args_sanitized ? *args_sanitized : args_str);
143+
}
125144

126145
// set pid
127146
if (const int64_t* pid = event_extractor_->get_pid(event)) signal->set_pid(*pid);
@@ -164,20 +183,22 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_threadinfo* tin
164183
signal->set_id(UUIDStr());
165184

166185
const auto& name = tinfo->m_comm;
186+
auto name_sanitized = SanitizedUTF8(name);
167187
const auto& exepath = tinfo->m_exepath;
188+
auto exepath_sanitized = SanitizedUTF8(exepath);
168189

169190
// set name (if name is missing or empty, try to use exec_file_path)
170191
if (!name.empty() && name != "<NA>") {
171-
signal->set_name(name);
192+
signal->set_name(name_sanitized ? *name_sanitized : name);
172193
} else if (!exepath.empty() && exepath != "<NA>") {
173-
signal->set_name(exepath);
194+
signal->set_name(exepath_sanitized ? *exepath_sanitized : exepath);
174195
}
175196

176197
// set exec_file_path (if exec_file_path is missing or empty, try to use name)
177198
if (!exepath.empty() && exepath != "<NA>") {
178-
signal->set_exec_file_path(exepath);
199+
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : exepath);
179200
} else if (!name.empty() && name != "<NA>") {
180-
signal->set_exec_file_path(name);
201+
signal->set_exec_file_path(name_sanitized ? *name_sanitized : name);
181202
}
182203

183204
// set the process as coming from a scrape as opposed to an exec

collector/lib/Utility.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ extern "C" {
2020

2121
#include <libsinsp/sinsp.h>
2222

23+
#include <google/protobuf/stubs/common.h>
24+
2325
#include "HostInfo.h"
2426
#include "Logging.h"
2527
#include "Utility.h"
@@ -247,4 +249,17 @@ std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cg
247249
}
248250
return std::make_optional(container_id_part.substr(0, SHORT_CONTAINER_ID_LENGTH));
249251
}
252+
253+
std::optional<std::string> SanitizedUTF8(const std::string& str) {
254+
std::unique_ptr<char[]> work_buffer(new char[str.size()]);
255+
char* sanitized;
256+
257+
sanitized = google::protobuf::internal::UTF8CoerceToStructurallyValid(str, work_buffer.get(), '?');
258+
259+
if (sanitized != work_buffer.get()) {
260+
return std::nullopt;
261+
}
262+
return std::string(sanitized, str.size());
263+
}
264+
250265
} // namespace collector

collector/lib/Utility.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ ScopedLock<Mutex> Lock(Mutex& mutex) {
115115
extern const std::string kKernelModulesDir;
116116

117117
std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cgroup);
118-
} // namespace collector
119118

119+
// Replace any occurrence of an invalid UTF-8 sequence with the '?' character
120+
// Returns :
121+
// - a new string with invalid characters replaced.
122+
// - nullopt if there is no invalid character (the input string is valid).
123+
std::optional<std::string> SanitizedUTF8(const std::string& str);
124+
125+
} // namespace collector
120126
#endif // _UTILITY_H_

collector/test/UtilityTest.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,25 @@ TEST(ExtractContainerIDFromCgroupTest, TestExtractContainerIDFromCgroup) {
115115
EXPECT_EQ(short_container_id, c.expected_output);
116116
}
117117
}
118+
119+
TEST(SanitizeUTF8Test, TestSanitizeUTF8_Invalid) {
120+
std::string input(
121+
"ab\x80"
122+
"cd");
123+
std::string expected_output("ab?cd");
124+
125+
auto output = SanitizedUTF8(input);
126+
127+
EXPECT_TRUE(output.has_value());
128+
EXPECT_EQ(*output, expected_output);
129+
}
130+
131+
TEST(SanitizeUTF8Test, TestSanitizeUTF8_Valid) {
132+
std::string input("abcd");
133+
134+
auto output = SanitizedUTF8(input);
135+
136+
EXPECT_FALSE(output);
137+
}
138+
118139
} // namespace collector

0 commit comments

Comments
 (0)