-
Notifications
You must be signed in to change notification settings - Fork 11
For Kubernetes watch, add tests of backoff and reconnection on failures. Also, when watch response is >= 300, count that as a failure. #205
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,8 +78,14 @@ class KubernetesReader::NonRetriableError | |
|
||
KubernetesReader::KubernetesReader(const Configuration& config, | ||
HealthChecker* health_checker) | ||
: KubernetesReader(config, health_checker, SleeperImpl::New()) {} | ||
|
||
KubernetesReader::KubernetesReader(const Configuration& config, | ||
HealthChecker* health_checker, | ||
std::unique_ptr<Sleeper> sleeper) | ||
: config_(config), environment_(config), health_checker_(health_checker), | ||
service_account_directory_(kServiceAccountDirectory) {} | ||
service_account_directory_(kServiceAccountDirectory), | ||
sleeper_(std::move(sleeper)) {} | ||
|
||
std::string KubernetesReader::SecretPath(const std::string& secret) const { | ||
return service_account_directory_ + "/" + secret; | ||
|
@@ -837,21 +843,29 @@ void KubernetesReader::WatchMaster( | |
if (verbose) { | ||
LOG(INFO) << "WatchMaster(" << name << ") completed " << body(response); | ||
} | ||
if (status(response) >= 300) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an unrelated fix not mentioned in the PR description. Mitigations, in order of preference, would be to pull it (and the associated test changes) out into a separate PR, or at least mention it in the description. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the PR description. The fix is intertwined with the new tests, so it's not really possible to pull out into a separate PR. |
||
throw boost::system::system_error( | ||
boost::system::errc::make_error_code( | ||
boost::system::errc::not_connected), | ||
format::Substitute("Server responded with '{{message}}' ({{code}})", | ||
{{"message", status_message(response)}, | ||
{"code", format::str(status(response))}})); | ||
} | ||
// Connection closed without an error; reset failure count. | ||
failures = 0; | ||
} catch (const boost::system::system_error& e) { | ||
LOG(ERROR) << "Failed to query " << endpoint << ": " << e.what(); | ||
++failures; | ||
if (failures > config_.KubernetesUpdaterWatchMaxConnectionFailures()) { | ||
if (failures >= config_.KubernetesUpdaterWatchMaxConnectionFailures()) { | ||
LOG(ERROR) << "WatchMaster(" << name << "): Exiting after " | ||
<< failures << " failures"; | ||
throw QueryException(endpoint + " -> " + e.what()); | ||
} | ||
++failures; | ||
double backoff = fmin(pow(1.5, failures), 30); | ||
if (verbose) { | ||
LOG(INFO) << "Backing off for " << backoff << " seconds"; | ||
} | ||
std::this_thread::sleep_for(time::seconds(backoff)); | ||
sleeper_->SleepFor(backoff); | ||
} | ||
} | ||
} | ||
|
@@ -1310,10 +1324,17 @@ void KubernetesReader::WatchEndpoints( | |
KubernetesUpdater::KubernetesUpdater(const Configuration& config, | ||
HealthChecker* health_checker, | ||
MetadataStore* store) | ||
: reader_(config, health_checker), PollingMetadataUpdater( | ||
config, store, "KubernetesUpdater", | ||
config.KubernetesUpdaterIntervalSeconds(), | ||
[=]() { return reader_.MetadataQuery(); }) { } | ||
: KubernetesUpdater(config, health_checker, store, SleeperImpl::New()) {} | ||
|
||
KubernetesUpdater::KubernetesUpdater(const Configuration& config, | ||
HealthChecker* health_checker, | ||
MetadataStore* store, | ||
std::unique_ptr<Sleeper> sleeper) | ||
: reader_(config, health_checker, std::move(sleeper)), | ||
PollingMetadataUpdater( | ||
config, store, "KubernetesUpdater", | ||
config.KubernetesUpdaterIntervalSeconds(), | ||
[=]() { return reader_.MetadataQuery(); }) { } | ||
|
||
void KubernetesUpdater::ValidateDynamicConfiguration() const | ||
throw(ConfigurationValidationError) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,10 @@ GTEST_HEADERS=$(GTEST_DIR)/include/gtest/*.h \ | |
$(GTEST_DIR)/include/gtest/internal/*.h | ||
GTEST_SRCS_=$(GTEST_SOURCEDIR)/*.cc $(GTEST_SOURCEDIR)/*.h $(GTEST_HEADERS) | ||
GMOCK_DIR=$(LIBDIR)/googletest/googlemock | ||
GMOCK_SOURCEDIR=$(GMOCK_DIR)/src | ||
GMOCK_HEADERS=$(GMOCK_DIR)/include/gtest/*.h \ | ||
$(GMOCK_DIR)/include/gtest/internal/*.h | ||
GMOCK_SRCS_=$(GMOCK_SOURCEDIR)/*.cc $(GMOCK_SOURCEDIR)/*.h $(GMOCK_HEADERS) | ||
|
||
# TODO: Factor out the common variables. | ||
CPP_NETLIB_DIR=$(LIBDIR)/cpp-netlib | ||
|
@@ -85,6 +89,7 @@ UTIL_SOURCES=$(TEST_DIR)/fake_clock.cc $(TEST_DIR)/fake_http_server.cc | |
UTIL_OBJS=$(UTIL_SOURCES:$(TEST_DIR)/%.cc=%.o) | ||
|
||
GTEST_LIB=gtest_lib.a | ||
GMOCK_LIB=gmock_lib.a | ||
|
||
all: $(SRC_DIR)/build-cpp-netlib $(SRC_DIR)/build-yaml-cpp $(TESTS) | ||
|
||
|
@@ -97,14 +102,14 @@ test: $(TESTS) | |
done | ||
|
||
clean: | ||
$(RM) $(TESTS) $(GTEST_LIB) $(TEST_OBJS) $(UTIL_OBJS) | ||
$(RM) $(TESTS) $(GTEST_LIB) $(GMOCK_LIB) $(TEST_OBJS) $(UTIL_OBJS) | ||
|
||
purge: clean | ||
$(RM) init-submodules | ||
(cd .. && git submodule deinit -f $(GTEST_MODULE:../%=%)) | ||
(cd .. && git submodule deinit -f $(GTEST_MODULE:../%=%) && git submodule deinit -f $(GMOCK_MODULE:../%=%)) | ||
|
||
|
||
init-submodules: | ||
(cd .. && git submodule update --init $(GTEST_MODULE:../%=%)) | ||
(cd .. && git submodule update --init $(GTEST_MODULE:../%=%) && git submodule update --init $(GMOCK_MODULE:../%=%)) | ||
|
||
touch init-submodules | ||
|
||
$(SRC_DIR)/init-submodules: | ||
|
@@ -121,21 +126,28 @@ $(SRC_DIR)/%.o: $(SRC_DIR)/build-cpp-netlib $(SRC_DIR)/build-yaml-cpp $(SRC_DIR) | |
cd $(SRC_DIR) && $(MAKE) $(@:$(SRC_DIR)/%=%) | ||
|
||
$(GTEST_SOURCEDIR)/gtest-all.cc $(GTEST_SOURCEDIR)/gtest_main.cc: init-submodules | ||
$(GMOCK_SOURCEDIR)/gmock-all.cc $(GMOCK_SOURCEDIR)/gmock_main.cc: init-submodules | ||
|
||
gtest-all.o: $(GTEST_SOURCEDIR)/gtest-all.cc | ||
$(CXX) -c $(CPPFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -o $@ $^ | ||
gmock-all.o: $(GMOCK_SOURCEDIR)/gmock-all.cc | ||
$(CXX) -c $(CPPFLAGS) -I$(GMOCK_DIR) $(CXXFLAGS) -o $@ $^ | ||
|
||
gtest_main.o: $(GTEST_SOURCEDIR)/gtest_main.cc | ||
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $^ | ||
gmock_main.o: $(GMOCK_SOURCEDIR)/gmock_main.cc | ||
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $^ | ||
|
||
$(GTEST_LIB): gtest-all.o gtest_main.o | ||
$(AR) $(ARFLAGS) $@ $^ | ||
$(GMOCK_LIB): gmock-all.o gmock_main.o | ||
$(AR) $(ARFLAGS) $@ $^ | ||
|
||
$(TESTS): $(GTEST_LIB) $(CPP_NETLIB_LIBS) $(YAML_CPP_LIBS) | ||
$(TESTS): $(GTEST_LIB) $(GMOCK_LIB) $(CPP_NETLIB_LIBS) $(YAML_CPP_LIBS) | ||
|
||
# All unittest objects depend on GTEST_LIB. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No longer needed. |
||
# Some headers need CPP_NETLIB_LIBS and YAML_CPP_LIBS. | ||
$(TESTS:%=%.o): $(GTEST_LIB) $(CPP_NETLIB_LIBS) $(YAML_CPP_LIBS) | ||
$(TESTS:%=%.o): $(GTEST_LIB) $(GMOCK_LIB) $(CPP_NETLIB_LIBS) $(YAML_CPP_LIBS) | ||
|
||
api_server_unittest: api_server_unittest.o $(SRC_DIR)/api_server.o $(SRC_DIR)/configuration.o $(SRC_DIR)/store.o $(SRC_DIR)/json.o $(SRC_DIR)/resource.o $(SRC_DIR)/logging.o $(SRC_DIR)/time.o $(SRC_DIR)/health_checker.o | ||
$(CXX) $(LDFLAGS) $^ $(LDLIBS) -o $@ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we rename
sleeper
to be a bit more descriptive? This only seems to be used during retry backoffs.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1. The sleep functionality is to induce the passage of time, so time-related tool names like "Timer", "Stopwatch", "AlarmClock", etc are usually appropriate. Would it make sense to reuse the existing
Timer
class and broaden its interface?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Happy to rename. What would you like?