From 02ee88a587dbfe0dcf4d4370f87ebcbcc6fb8f65 Mon Sep 17 00:00:00 2001 From: Dave Dykstra <2129743+DrDaveD@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:28:29 -0500 Subject: [PATCH 1/4] fix bogus changelog entry --- packaging/rpm/cvmfs-universal.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/rpm/cvmfs-universal.spec b/packaging/rpm/cvmfs-universal.spec index 5b8dd2e9a3..f730ceedad 100644 --- a/packaging/rpm/cvmfs-universal.spec +++ b/packaging/rpm/cvmfs-universal.spec @@ -708,7 +708,7 @@ systemctl daemon-reload %endif %changelog -* Wed Nov 7 2023 Valentin Volkl - 2.11.2 +* Tue Nov 7 2023 Valentin Volkl - 2.11.2 - Rename registry-webhook.py to registry_webhook.py to allow imports * Wed Nov 16 2022 Jakob Blomer - 2.11.0 - Make cvmfs-libs a dependency of the cvmfs package From a52d91bece3f8d060dbf129417f97cbf523f2b16 Mon Sep 17 00:00:00 2001 From: Dave Dykstra <2129743+DrDaveD@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:28:34 -0500 Subject: [PATCH 2/4] add metalink support infrastructure, based on host support --- cvmfs/mountpoint.cc | 13 ++- cvmfs/network/download.cc | 216 +++++++++++++++++++++++++------------- cvmfs/network/download.h | 55 +++++++--- cvmfs/network/jobinfo.h | 3 + cvmfs/talk.cc | 33 ++++++ cvmfs/talk.h | 1 + 6 files changed, 231 insertions(+), 90 deletions(-) diff --git a/cvmfs/mountpoint.cc b/cvmfs/mountpoint.cc index 5fc5071c5a..a188b2c58e 100644 --- a/cvmfs/mountpoint.cc +++ b/cvmfs/mountpoint.cc @@ -2138,7 +2138,11 @@ bool MountPoint::SetupExternalDownloadMgr(bool dogeosort) { } external_download_mgr_->SetTimeout(timeout, timeout_direct); - if (options_mgr_->GetValue("CVMFS_EXTERNAL_URL", &optarg)) { + if (options_mgr_->GetValue("CVMFS_EXTERNAL_METALINK", &optarg)) { + external_download_mgr_->SetMetalinkChain(optarg); + // host chain will be set later when the metalink server is contacted + external_download_mgr_->SetHostChain(""); + } else if (options_mgr_->GetValue("CVMFS_EXTERNAL_URL", &optarg)) { external_download_mgr_->SetHostChain(optarg); if (dogeosort) { std::vector host_chain; @@ -2208,8 +2212,13 @@ void MountPoint::SetupHttpTuning() { if (options_mgr_->GetValue("CVMFS_LOW_SPEED_LIMIT", &optarg)) download_mgr_->SetLowSpeedLimit(String2Uint64(optarg)); - if (options_mgr_->GetValue("CVMFS_PROXY_RESET_AFTER", &optarg)) + if (options_mgr_->GetValue("CVMFS_PROXY_RESET_AFTER", &optarg)) { download_mgr_->SetProxyGroupResetDelay(String2Uint64(optarg)); + // Use the proxy reset delay as the default for the metalink reset delay + download_mgr_->SetMetalinkResetDelay(String2Uint64(optarg)); + } + if (options_mgr_->GetValue("CVMFS_METALINK_RESET_AFTER", &optarg)) + download_mgr_->SetMetalinkResetDelay(String2Uint64(optarg)); if (options_mgr_->GetValue("CVMFS_HOST_RESET_AFTER", &optarg)) download_mgr_->SetHostResetDelay(String2Uint64(optarg)); diff --git a/cvmfs/network/download.cc b/cvmfs/network/download.cc index bf1923eb38..155ddf8ba4 100644 --- a/cvmfs/network/download.cc +++ b/cvmfs/network/download.cc @@ -222,6 +222,11 @@ static size_t CallbackCurlHeader(void *ptr, size_t size, size_t nmemb, // This comes along with redirects LogCvmfs(kLogDownload, kLogDebug, "(id %" PRId64 ") %s", info->id(), header_line.c_str()); + } else if (HasPrefix(header_line, "LINK:", true)) { + // This is metalink info + LogCvmfs(kLogDownload, kLogDebug, "(id %" PRId64 ") %s", + info->id(), header_line.c_str()); + info->SetLink(header_line.substr(5)); } else if (HasPrefix(header_line, "X-SQUID-ERROR:", true)) { // Reinterpret host error as proxy error if (info->error_code() == kFailHostHttp) { @@ -1002,6 +1007,28 @@ void DownloadManager::InitializeRequest(JobInfo *info, CURL *handle) { #endif } +void DownloadManager::CheckHostInfoReset( + std::string typ, + HostInfo &info, + JobInfo *jobinfo) +{ + if (info.timestamp_backup > 0) { + const time_t now = time(NULL); + if (static_cast(now) > + static_cast(info.timestamp_backup + + info.reset_after)) + { + LogCvmfs(kLogDownload, kLogDebug | kLogSyslogWarn, + "(manager %s - id %" PRId64 ") " + "switching %s from %s to %s (reset %s)", name_.c_str(), + jobinfo->id(), typ, (*info.chain)[info.current].c_str(), + (*info.chain)[0].c_str(), typ); + info.current = 0; + info.timestamp_backup = 0; + } + } +} + /** * Sets the URL specific options such as host to use and timeout. It might also @@ -1047,22 +1074,6 @@ void DownloadManager::SetUrlOptions(JobInfo *info) { "Reset load-balanced proxies within the active group"); } } - // Check if host needs to be reset - if (opt_timestamp_backup_host_ > 0) { - const time_t now = time(NULL); - if (static_cast(now) > - static_cast(opt_timestamp_backup_host_ + - opt_host_reset_after_)) - { - LogCvmfs(kLogDownload, kLogDebug | kLogSyslogWarn, - "(manager %s - id %" PRId64 ") " - "switching host from %s to %s (reset host)", name_.c_str(), - info->id(), (*opt_host_chain_)[opt_host_chain_current_].c_str(), - (*opt_host_chain_)[0].c_str()); - opt_host_chain_current_ = 0; - opt_timestamp_backup_host_ = 0; - } - } ProxyInfo *proxy = ChooseProxyUnlocked(info->expected_hash()); if (!proxy || (proxy->url == "DIRECT")) { @@ -1090,6 +1101,10 @@ void DownloadManager::SetUrlOptions(JobInfo *info) { } } // end !sharding + // Check if metalink and host chains need to be reset + CheckHostInfoReset("metalink", opt_metalink_, info); + CheckHostInfoReset("host", opt_metalink_, info); + curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_LIMIT, opt_low_speed_limit_); if (info->proxy() != "DIRECT") { curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, opt_timeout_proxy_); @@ -1101,9 +1116,9 @@ void DownloadManager::SetUrlOptions(JobInfo *info) { if (!opt_dns_server_.empty()) curl_easy_setopt(curl_handle, CURLOPT_DNS_SERVERS, opt_dns_server_.c_str()); - if (info->probe_hosts() && opt_host_chain_) { - url_prefix = (*opt_host_chain_)[opt_host_chain_current_]; - info->SetCurrentHostChainIndex(opt_host_chain_current_); + if (info->probe_hosts() && opt_host_.chain) { + url_prefix = (*opt_host_.chain)[opt_host_.current]; + info->SetCurrentHostChainIndex(opt_host_.current); } string url = url_prefix + *(info->url()); @@ -1478,7 +1493,7 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { break; } - std::vector *host_chain = opt_host_chain_; + std::vector *host_chain = opt_host_.chain; // Determination if download should be repeated bool try_again = false; @@ -1714,7 +1729,7 @@ DownloadManager::~DownloadManager() { free(user_agent_); delete counters_; - delete opt_host_chain_; + delete opt_host_.chain; delete opt_host_chain_rtt_; delete opt_proxy_groups_; @@ -1774,9 +1789,9 @@ DownloadManager::DownloadManager(const unsigned max_pool_handles, follow_redirects_(false), ignore_signature_failures_(false), enable_http_tracing_(false), - opt_host_chain_(NULL), + opt_metalink_(NULL, 0, 0, 0), + opt_host_(NULL, 0, 0, 0), opt_host_chain_rtt_(NULL), - opt_host_chain_current_(0), opt_proxy_groups_(NULL), opt_proxy_groups_current_(0), opt_proxy_groups_current_burned_(0), @@ -1789,8 +1804,6 @@ DownloadManager::DownloadManager(const unsigned max_pool_handles, opt_timestamp_backup_proxies_(0), opt_timestamp_failover_proxies_(0), opt_proxy_groups_reset_after_(0), - opt_timestamp_backup_host_(0), - opt_host_reset_after_(0), credentials_attachment_(NULL), counters_(new Counters(statistics)) { @@ -2072,6 +2085,31 @@ void DownloadManager::GetTimeout(unsigned *seconds_proxy, } +/** + * Parses a list of ';'-separated hosts for the metalink chain. The empty + * string removes the metalink list. + */ +void DownloadManager::SetMetalinkChain(const string &metalink_list) { + SetMetalinkChain(SplitString(metalink_list, ';')); +} + + +void DownloadManager::SetMetalinkChain( + const std::vector &metalink_list) { + MutexLockGuard m(lock_options_); + opt_metalink_.timestamp_backup = 0; + delete opt_metalink_.chain; + opt_metalink_.current = 0; + + if (metalink_list.empty()) { + opt_metalink_.chain = NULL; + return; + } + + opt_metalink_.chain = new vector(metalink_list); +} + + /** * Parses a list of ';'-separated hosts for the host chain. The empty string * removes the host list. @@ -2083,26 +2121,25 @@ void DownloadManager::SetHostChain(const string &host_list) { void DownloadManager::SetHostChain(const std::vector &host_list) { MutexLockGuard m(lock_options_); - opt_timestamp_backup_host_ = 0; - delete opt_host_chain_; + opt_host_.timestamp_backup = 0; + delete opt_host_.chain; delete opt_host_chain_rtt_; - opt_host_chain_current_ = 0; + opt_host_.current = 0; if (host_list.empty()) { - opt_host_chain_ = NULL; + opt_host_.chain = NULL; opt_host_chain_rtt_ = NULL; return; } - opt_host_chain_ = new vector(host_list); + opt_host_.chain = new vector(host_list); opt_host_chain_rtt_ = - new vector(opt_host_chain_->size(), kProbeUnprobed); + new vector(opt_host_.chain->size(), kProbeUnprobed); // LogCvmfs(kLogDownload, kLogSyslog, "using host %s", - // (*opt_host_chain_)[0].c_str()); + // (*opt_host_.chain)[0].c_str()); } - /** * Retrieves the currently set chain of hosts, their round trip times, and the * currently used host. @@ -2111,9 +2148,9 @@ void DownloadManager::GetHostInfo(vector *host_chain, vector *rtt, unsigned *current_host) { MutexLockGuard m(lock_options_); - if (opt_host_chain_) { - if (current_host) {*current_host = opt_host_chain_current_;} - if (host_chain) {*host_chain = *opt_host_chain_;} + if (opt_host_.chain) { + if (current_host) {*current_host = opt_host_.current;} + if (host_chain) {*host_chain = *opt_host_.chain;} if (rtt) {*rtt = *opt_host_chain_rtt_;} } } @@ -2191,60 +2228,82 @@ void DownloadManager::SwitchProxy(JobInfo *info) { /** - * Switches to the next host in the chain. If info is set, switch only if the - * current host is identical to the one used by info, otherwise another transfer - * has already done the switch. + * Switches to the next host in the chain. If jobinfo is set, switch only if + * the current host is identical to the one used by jobinfo, otherwise another + * transfer has already done the switch. */ -void DownloadManager::SwitchHost(JobInfo *info) { +void DownloadManager::SwitchHostInfo(const std::string typ, + HostInfo &info, + JobInfo *jobinfo) { MutexLockGuard m(lock_options_); - if (!opt_host_chain_ || (opt_host_chain_->size() == 1)) { + if (!info.chain || (info.chain->size() == 1)) { return; } - if (info && (info->current_host_chain_index() != opt_host_chain_current_)) { + if (jobinfo && (typ == "host") && + (jobinfo->current_host_chain_index() != info.current)) { + // The current_host_chain_index is only used by the probe_hosts + // feature which is not relevant to metalink LogCvmfs(kLogDownload, kLogDebug, "(manager '%s' - id %" PRId64 ")" "don't switch host, " - "last used host: %s, current host: %s", name_.c_str(), info->id(), - (*opt_host_chain_)[info->current_host_chain_index()].c_str(), - (*opt_host_chain_)[opt_host_chain_current_].c_str()); + "last used host: %s, current host: %s", + name_.c_str(), jobinfo->id(), + (*info.chain)[jobinfo->current_host_chain_index()].c_str(), + (*info.chain)[info.current].c_str()); return; } string reason = "manually triggered"; string info_id = "(manager " + name_; - if (info) { - reason = download::Code2Ascii(info->error_code()); - info_id = " - id " + StringifyInt(info->id()); + if (jobinfo) { + reason = download::Code2Ascii(jobinfo->error_code()); + info_id = " - id " + StringifyInt(jobinfo->id()); } info_id += ")"; - string old_host = (*opt_host_chain_)[opt_host_chain_current_]; - opt_host_chain_current_ = - (opt_host_chain_current_ + 1) % opt_host_chain_->size(); - perf::Inc(counters_->n_host_failover); + string old_host = (*info.chain)[info.current]; + info.current = (info.current + 1) % info.chain->size(); + if (typ == "host") { + perf::Inc(counters_->n_host_failover); + } else { + perf::Inc(counters_->n_metalink_failover); + } LogCvmfs(kLogDownload, kLogDebug | kLogSyslogWarn, - "%s switching host from %s to %s (%s)", info_id.c_str(), - old_host.c_str(), (*opt_host_chain_)[opt_host_chain_current_].c_str(), + "%s switching %s from %s to %s (%s)", info_id.c_str(), typ, + old_host.c_str(), (*info.chain)[info.current].c_str(), reason.c_str()); // Remember the timestamp of switching to backup host - if (opt_host_reset_after_ > 0) { - if (opt_host_chain_current_ != 0) { - if (opt_timestamp_backup_host_ == 0) - opt_timestamp_backup_host_ = time(NULL); + if (info.reset_after > 0) { + if (info.current != 0) { + if (info.timestamp_backup == 0) + info.timestamp_backup = time(NULL); } else { - opt_timestamp_backup_host_ = 0; + info.timestamp_backup = 0; } } } +void DownloadManager::SwitchHost(JobInfo *info) { + SwitchHostInfo("host", opt_host_, info); +} + void DownloadManager::SwitchHost() { SwitchHost(NULL); } +void DownloadManager::SwitchMetalink(JobInfo *info) { + SwitchHostInfo("metalink", opt_metalink_, info); +} + +void DownloadManager::SwitchMetalink() { + SwitchMetalink(NULL); +} + + /** * Orders the hostlist according to RTT of downloading .cvmfschecksum. * Sets the current host to the best-responsive host. @@ -2296,11 +2355,11 @@ void DownloadManager::ProbeHosts() { } MutexLockGuard m(lock_options_); - delete opt_host_chain_; + delete opt_host_.chain; delete opt_host_chain_rtt_; - opt_host_chain_ = new vector(host_chain); + opt_host_.chain = new vector(host_chain); opt_host_chain_rtt_ = new vector(host_rtt); - opt_host_chain_current_ = 0; + opt_host_.current = 0; } bool DownloadManager::GeoSortServers(std::vector *servers, @@ -2442,9 +2501,9 @@ bool DownloadManager::ProbeGeo() { // Re-install host chain and proxy chain MutexLockGuard m(lock_options_); - delete opt_host_chain_; + delete opt_host_.chain; opt_num_proxies_ = 0; - opt_host_chain_ = new vector(host_chain.size()); + opt_host_.chain = new vector(host_chain.size()); // It's possible that opt_proxy_groups_fallback_ might have changed while // the lock wasn't held @@ -2467,7 +2526,7 @@ bool DownloadManager::ProbeGeo() { if (orderval < static_cast(last_geo_host)) { // LogCvmfs(kLogCvmfs, kLogSyslog, "this is orderval %u at host index // %u", orderval, hosti); - (*opt_host_chain_)[hosti++] = host_chain[orderval]; + (*opt_host_.chain)[hosti++] = host_chain[orderval]; } else if (orderval >= static_cast(first_geo_fallback)) { // LogCvmfs(kLogCvmfs, kLogSyslog, // "this is orderval %u at proxy index %u, using proxy_chain index %u", @@ -2497,7 +2556,7 @@ bool DownloadManager::ProbeGeo() { delete opt_host_chain_rtt_; opt_host_chain_rtt_ = new vector(host_chain.size(), kProbeGeo); - opt_host_chain_current_ = 0; + opt_host_.current = 0; return true; } @@ -2845,8 +2904,8 @@ void DownloadManager::UpdateProxiesUnlocked(const string &reason) { // Report any change in proxy usage string new_proxy = JoinStrings(opt_proxies_, "|"); - string curr_host = "Current host: " + (opt_host_chain_ ? - (*opt_host_chain_)[opt_host_chain_current_] : ""); + string curr_host = "Current host: " + (opt_host_.chain ? + (*opt_host_.chain)[opt_host_.current] : ""); if (new_proxy != old_proxy) { LogCvmfs(kLogDownload, kLogDebug | kLogSyslogWarn, "(manager '%s') switching proxy from %s to %s. Reason: %s [%s]", @@ -2914,12 +2973,21 @@ void DownloadManager::SetProxyGroupResetDelay(const unsigned seconds) { } +void DownloadManager::SetMetalinkResetDelay(const unsigned seconds) +{ + MutexLockGuard m(lock_options_); + opt_metalink_.reset_after = seconds; + if (opt_metalink_.reset_after == 0) + opt_metalink_.timestamp_backup = 0; +} + + void DownloadManager::SetHostResetDelay(const unsigned seconds) { MutexLockGuard m(lock_options_); - opt_host_reset_after_ = seconds; - if (opt_host_reset_after_ == 0) - opt_timestamp_backup_host_ = 0; + opt_host_.reset_after = seconds; + if (opt_host_.reset_after == 0) + opt_host_.timestamp_backup = 0; } @@ -3017,8 +3085,8 @@ DownloadManager *DownloadManager::Clone( clone->http_tracing_headers_ = http_tracing_headers_; clone->follow_redirects_ = follow_redirects_; clone->ignore_signature_failures_ = ignore_signature_failures_; - if (opt_host_chain_) { - clone->opt_host_chain_ = new vector(*opt_host_chain_); + if (opt_host_.chain) { + clone->opt_host_.chain = new vector(*opt_host_.chain); clone->opt_host_chain_rtt_ = new vector(*opt_host_chain_rtt_); } @@ -3027,7 +3095,7 @@ DownloadManager *DownloadManager::Clone( clone->proxy_template_direct_ = proxy_template_direct_; clone->proxy_template_forced_ = proxy_template_forced_; clone->opt_proxy_groups_reset_after_ = opt_proxy_groups_reset_after_; - clone->opt_host_reset_after_ = opt_host_reset_after_; + clone->opt_host_.reset_after = opt_host_.reset_after; clone->credentials_attachment_ = credentials_attachment_; clone->ssl_certificate_store_ = ssl_certificate_store_; diff --git a/cvmfs/network/download.h b/cvmfs/network/download.h index 8f77c1abb2..a36c5000e0 100644 --- a/cvmfs/network/download.h +++ b/cvmfs/network/download.h @@ -44,8 +44,9 @@ struct Counters { perf::Counter *sz_transfer_time; // measured in milliseconds perf::Counter *n_requests; perf::Counter *n_retries; - perf::Counter *n_proxy_failover; + perf::Counter *n_metalink_failover; perf::Counter *n_host_failover; + perf::Counter *n_proxy_failover; explicit Counters(perf::StatisticsTemplate statistics) { sz_transferred_bytes = statistics.RegisterTemplated("sz_transferred_bytes", @@ -55,10 +56,12 @@ struct Counters { n_requests = statistics.RegisterTemplated("n_requests", "Number of requests"); n_retries = statistics.RegisterTemplated("n_retries", "Number of retries"); - n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover", - "Number of proxy failovers"); + n_metalink_failover = statistics.RegisterTemplated("n_metalink_failover", + "Number of metalink failovers"); n_host_failover = statistics.RegisterTemplated("n_host_failover", "Number of host failovers"); + n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover", + "Number of proxy failovers"); } }; // Counters @@ -119,6 +122,25 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) FRIEND_TEST(T_Download, EscapeUrl); public: + // HostInfo is used for both metalink and host + struct HostInfo { + HostInfo() { } + HostInfo( + std::vector *chain, + const unsigned current, + const time_t timestamp_backup, + const unsigned reset_after) + : chain(chain) + , current(current) + , timestamp_backup(timestamp_backup) + , reset_after(reset_after) + { } + std::vector *chain; + unsigned current; + time_t timestamp_backup; + unsigned reset_after; + }; + struct ProxyInfo { ProxyInfo() { } explicit ProxyInfo(const std::string &url) : url(url) { } @@ -176,6 +198,11 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct); void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct); void SetLowSpeedLimit(const unsigned low_speed_limit); + void SetMetalinkChain(const std::string &metalink_list); + void SetMetalinkChain(const std::vector &metalink_list); + void GetMetalinkInfo(std::vector *metalink_chain, + unsigned *current_metalink); + void SwitchMetalink(); void SetHostChain(const std::string &host_list); void SetHostChain(const std::vector &host_list); void GetHostInfo(std::vector *host_chain, @@ -201,6 +228,7 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) void RebalanceProxies(); void SwitchProxyGroup(); void SetProxyGroupResetDelay(const unsigned seconds); + void SetMetalinkResetDelay(const unsigned seconds); void SetHostResetDelay(const unsigned seconds); void SetRetryParameters(const unsigned max_retries, const unsigned backoff_init_ms, @@ -219,7 +247,7 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) void SetFqrn(const std::string &fqrn) { fqrn_ = fqrn; } unsigned num_hosts() { - if (opt_host_chain_) return opt_host_chain_->size(); + if (opt_host_.chain) return opt_host_.chain->size(); return 0; } @@ -236,6 +264,8 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) bool ValidateGeoReply(const std::string &reply_order, const unsigned expected_size, std::vector *reply_vals); + void SwitchHostInfo(std::string typ, HostInfo &info, JobInfo *jobinfo); + void SwitchMetalink(JobInfo *info); void SwitchHost(JobInfo *info); void SwitchProxy(JobInfo *info); ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash); @@ -255,6 +285,8 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) bool VerifyAndFinalize(const int curl_error, JobInfo *info); void InitHeaders(); void CloneProxyConfig(DownloadManager *clone); + void CheckHostInfoReset(const std::string typ, HostInfo &info, + JobInfo *jobinfo); bool EscapeUrlChar(unsigned char input, char output[3]); std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url); @@ -307,14 +339,17 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) bool enable_http_tracing_; std::vector http_tracing_headers_; + // Metalink + HostInfo opt_metalink_; + // Host list - std::vector *opt_host_chain_; + HostInfo opt_host_; + /** * Created by SetHostChain(), filled by probe_hosts. Contains time to get * .cvmfschecksum in ms. -1 is unprobed, -2 is error. */ std::vector *opt_host_chain_rtt_; - unsigned opt_host_chain_current_; // Proxy list std::vector< std::vector > *opt_proxy_groups_; @@ -422,14 +457,6 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) time_t opt_timestamp_failover_proxies_; // failover within the same group unsigned opt_proxy_groups_reset_after_; - /** - * Similarly to proxy group reset, we'd also like to reset the host after a - * failover. Host outages can last longer and might come with a separate - * reset delay. - */ - time_t opt_timestamp_backup_host_; - unsigned opt_host_reset_after_; - CredentialsAttachment *credentials_attachment_; /** diff --git a/cvmfs/network/jobinfo.h b/cvmfs/network/jobinfo.h index b451a07d64..0b6f0f7fed 100644 --- a/cvmfs/network/jobinfo.h +++ b/cvmfs/network/jobinfo.h @@ -99,6 +99,7 @@ class JobInfo { z_stream zstream_; shash::ContextPtr hash_context_; std::string proxy_; + std::string link_; bool nocache_; Failures error_code_; int http_code_; @@ -196,6 +197,7 @@ class JobInfo { z_stream zstream() const { return zstream_; } shash::ContextPtr hash_context() const { return hash_context_; } std::string proxy() const { return proxy_; } + std::string link() const { return link_; } bool nocache() const { return nocache_; } Failures error_code() const { return error_code_; } int http_code() const { return http_code_; } @@ -246,6 +248,7 @@ class JobInfo { void SetHashContext(shash::ContextPtr hash_context) { hash_context_ = hash_context; } void SetProxy(const std::string &proxy) { proxy_ = proxy; } + void SetLink(const std::string &link) { link_ = link; } void SetNocache(bool nocache) { nocache_ = nocache; } void SetErrorCode(Failures error_code) { error_code_ = error_code; } void SetHttpCode(int http_code) { http_code_ = http_code; } diff --git a/cvmfs/talk.cc b/cvmfs/talk.cc index 5ebe6930c6..301e648a14 100644 --- a/cvmfs/talk.cc +++ b/cvmfs/talk.cc @@ -96,6 +96,24 @@ TalkManager *TalkManager::Create( } +string TalkManager::FormatMetalinkInfo(download::DownloadManager *download_mgr) +{ + vector metalink_chain; + unsigned active_metalink; + + download_mgr->GetMetalinkInfo(&metalink_chain, &active_metalink); + if (metalink_chain.size() == 0) + return "No metalinks defined\n"; + + string metalink_str; + for (unsigned i = 0; i < metalink_chain.size(); ++i) { + metalink_str += " [" + StringifyInt(i) + "] " + metalink_chain[i]; + } + metalink_str += "Active metalink " + StringifyInt(active_metalink) + ": " + + metalink_chain[active_metalink] + "\n"; + return metalink_str; +} + string TalkManager::FormatHostInfo(download::DownloadManager *download_mgr) { vector host_chain; vector rtt; @@ -386,6 +404,10 @@ void *TalkManager::MainResponder(void *data) { mount_point->download_mgr()->SetDnsServer(host); talk_mgr->Answer(con_fd, "OK\n"); } + } else if (line == "external metalink info") { + string external_metalink_info = + talk_mgr->FormatMetalinkInfo(mount_point->external_download_mgr()); + talk_mgr->Answer(con_fd, external_metalink_info); } else if (line == "external host info") { string external_host_info = talk_mgr->FormatHostInfo(mount_point->external_download_mgr()); @@ -402,12 +424,23 @@ void *TalkManager::MainResponder(void *data) { talk_mgr->Answer(con_fd, "OK\n"); else talk_mgr->Answer(con_fd, "Failed\n"); + } else if (line == "external metalink switch") { + mount_point->external_download_mgr()->SwitchMetalink(); + talk_mgr->Answer(con_fd, "OK\n"); } else if (line == "external host switch") { mount_point->external_download_mgr()->SwitchHost(); talk_mgr->Answer(con_fd, "OK\n"); } else if (line == "host switch") { mount_point->download_mgr()->SwitchHost(); talk_mgr->Answer(con_fd, "OK\n"); + } else if (line.substr(0, 21) == "external metalink set") { + if (line.length() < 23) { + talk_mgr->Answer(con_fd, "Usage: external metalink set \n"); + } else { + const std::string host = line.substr(22); + mount_point->external_download_mgr()->SetMetalinkChain(host); + talk_mgr->Answer(con_fd, "OK\n"); + } } else if (line.substr(0, 17) == "external host set") { if (line.length() < 19) { talk_mgr->Answer(con_fd, "Usage: external host set \n"); diff --git a/cvmfs/talk.h b/cvmfs/talk.h index 28630f618d..9d72d6d7ab 100644 --- a/cvmfs/talk.h +++ b/cvmfs/talk.h @@ -47,6 +47,7 @@ class TalkManager : SingleCopy { static void *MainResponder(void *data); void Answer(int con_fd, const std::string &msg); void AnswerStringList(int con_fd, const std::vector &list); + std::string FormatMetalinkInfo(download::DownloadManager *download_mgr); std::string FormatHostInfo(download::DownloadManager *download_mgr); std::string FormatProxyInfo(download::DownloadManager *download_mgr); std::string FormatLatencies(const MountPoint &mount_point, From 7fc37d501b92514fdb53263e6721b0deac76c4c0 Mon Sep 17 00:00:00 2001 From: Dave Dykstra <2129743+DrDaveD@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:21:11 -0500 Subject: [PATCH 3/4] request CVMFS_EXTERNAL_METALINK hosts (if set) before CVMFS_EXTERNAL_URL hosts --- cvmfs/mountpoint.cc | 2 + cvmfs/network/download.cc | 138 +++++++++++++++++++++++++++----------- cvmfs/network/download.h | 9 ++- cvmfs/network/jobinfo.cc | 4 +- cvmfs/network/jobinfo.h | 16 +++-- cvmfs/talk.cc | 2 +- 6 files changed, 123 insertions(+), 48 deletions(-) diff --git a/cvmfs/mountpoint.cc b/cvmfs/mountpoint.cc index a188b2c58e..bc34194cfb 100644 --- a/cvmfs/mountpoint.cc +++ b/cvmfs/mountpoint.cc @@ -2142,6 +2142,8 @@ bool MountPoint::SetupExternalDownloadMgr(bool dogeosort) { external_download_mgr_->SetMetalinkChain(optarg); // host chain will be set later when the metalink server is contacted external_download_mgr_->SetHostChain(""); + // metalink requires redirects + external_download_mgr_->EnableRedirects(); } else if (options_mgr_->GetValue("CVMFS_EXTERNAL_URL", &optarg)) { external_download_mgr_->SetHostChain(optarg); if (dogeosort) { diff --git a/cvmfs/network/download.cc b/cvmfs/network/download.cc index 155ddf8ba4..00a4bf8637 100644 --- a/cvmfs/network/download.cc +++ b/cvmfs/network/download.cc @@ -931,6 +931,7 @@ void DownloadManager::InitializeRequest(JobInfo *info, CURL *handle) { info->SetHttpCode(-1); info->SetFollowRedirects(follow_redirects_); info->SetNumUsedProxies(1); + info->SetNumUsedMetalinks(1); info->SetNumUsedHosts(1); info->SetNumRetries(0); info->SetBackoffMs(0); @@ -1010,19 +1011,20 @@ void DownloadManager::InitializeRequest(JobInfo *info, CURL *handle) { void DownloadManager::CheckHostInfoReset( std::string typ, HostInfo &info, - JobInfo *jobinfo) + JobInfo *jobinfo, + time_t &now) { if (info.timestamp_backup > 0) { - const time_t now = time(NULL); + if (now == 0) + now = time(NULL); if (static_cast(now) > - static_cast(info.timestamp_backup + - info.reset_after)) + static_cast(info.timestamp_backup + info.reset_after)) { LogCvmfs(kLogDownload, kLogDebug | kLogSyslogWarn, "(manager %s - id %" PRId64 ") " "switching %s from %s to %s (reset %s)", name_.c_str(), - jobinfo->id(), typ, (*info.chain)[info.current].c_str(), - (*info.chain)[0].c_str(), typ); + jobinfo->id(), typ.c_str(), (*info.chain)[info.current].c_str(), + (*info.chain)[0].c_str(), typ.c_str()); info.current = 0; info.timestamp_backup = 0; } @@ -1037,6 +1039,7 @@ void DownloadManager::CheckHostInfoReset( void DownloadManager::SetUrlOptions(JobInfo *info) { CURL *curl_handle = info->curl_handle(); string url_prefix; + time_t now = 0; MutexLockGuard m(lock_options_); @@ -1053,7 +1056,7 @@ void DownloadManager::SetUrlOptions(JobInfo *info) { } else { // no sharding policy // Check if proxy group needs to be reset from backup to primary if (opt_timestamp_backup_proxies_ > 0) { - const time_t now = time(NULL); + now = time(NULL); if (static_cast(now) > static_cast(opt_timestamp_backup_proxies_ + opt_proxy_groups_reset_after_)) @@ -1065,7 +1068,8 @@ void DownloadManager::SetUrlOptions(JobInfo *info) { } // Check if load-balanced proxies within the group need to be reset if (opt_timestamp_failover_proxies_ > 0) { - const time_t now = time(NULL); + if (now == 0) + now = time(NULL); if (static_cast(now) > static_cast(opt_timestamp_failover_proxies_ + opt_proxy_groups_reset_after_)) @@ -1102,8 +1106,8 @@ void DownloadManager::SetUrlOptions(JobInfo *info) { } // end !sharding // Check if metalink and host chains need to be reset - CheckHostInfoReset("metalink", opt_metalink_, info); - CheckHostInfoReset("host", opt_metalink_, info); + CheckHostInfoReset("metalink", opt_metalink_, info, now); + CheckHostInfoReset("host", opt_metalink_, info, now); curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_LIMIT, opt_low_speed_limit_); if (info->proxy() != "DIRECT") { @@ -1116,9 +1120,24 @@ void DownloadManager::SetUrlOptions(JobInfo *info) { if (!opt_dns_server_.empty()) curl_easy_setopt(curl_handle, CURLOPT_DNS_SERVERS, opt_dns_server_.c_str()); - if (info->probe_hosts() && opt_host_.chain) { - url_prefix = (*opt_host_.chain)[opt_host_.current]; - info->SetCurrentHostChainIndex(opt_host_.current); + if (info->probe_hosts()) { + if (opt_metalink_.chain && + ((opt_metalink_timestamp_link_ == 0) || + (static_cast((now == 0) ? time(NULL) : now) > + static_cast(opt_metalink_timestamp_link_ + + opt_metalink_.reset_after)))) { + url_prefix = (*opt_metalink_.chain)[opt_metalink_.current]; + info->SetCurrentMetalinkChainIndex(opt_metalink_.current); + LogCvmfs(kLogDownload, kLogDebug, "(manager %s - id %" PRId64 ") " + "reading from metalink %d", + name_.c_str(), info->id(), opt_metalink_.current); + } else if (opt_host_.chain) { + url_prefix = (*opt_host_.chain)[opt_host_.current]; + info->SetCurrentHostChainIndex(opt_host_.current); + LogCvmfs(kLogDownload, kLogDebug, "(manager %s - id %" PRId64 ") " + "reading from host %d", + name_.c_str(), info->id(), opt_host_.current); + } } string url = url_prefix + *(info->url()); @@ -1493,7 +1512,21 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { break; } - std::vector *host_chain = opt_host_.chain; + bool was_metalink; + std::string typ; + std::vector *host_chain; + unsigned char num_used_hosts; + if (info->current_metalink_chain_index() >= 0) { + was_metalink = true; + typ = "metalink"; + host_chain = opt_metalink_.chain; + num_used_hosts = info->num_used_metalinks(); + } else { + was_metalink = false; + typ = "host"; + host_chain = opt_host_.chain; + num_used_hosts = info->num_used_hosts(); + } // Determination if download should be repeated bool try_again = false; @@ -1507,8 +1540,8 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { // Make it a host failure LogCvmfs(kLogDownload, kLogDebug | kLogSyslogWarn, "(manager '%s' - id %" PRId64 ") " - "data corruption with no-cache header, try another host", - name_.c_str(), info->id()); + "data corruption with no-cache header, try another %s", + name_.c_str(), info->id(), typ.c_str()); info->SetErrorCode(kFailHostHttp); } @@ -1518,7 +1551,7 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { IsHostTransferError(info->error_code()) || (info->error_code() == kFailHostHttp)) && info->probe_hosts() && - host_chain && (info->num_used_hosts() < host_chain->size())) + host_chain && (num_used_hosts < host_chain->size())) ) { try_again = true; @@ -1538,8 +1571,7 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { if (!same_url_retry && (info->num_used_proxies() >= opt_num_proxies_)) { // Check if this can be made a host fail-over if (info->probe_hosts() && - host_chain && - (info->num_used_hosts() < host_chain->size())) + host_chain && (num_used_hosts < host_chain->size())) { // reset proxy group if not already performed by other handle if (opt_proxy_groups_) { @@ -1548,14 +1580,15 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { { opt_proxy_groups_current_ = 0; opt_timestamp_backup_proxies_ = 0; - RebalanceProxiesUnlocked("reset proxies for host failover"); + std::string msg = "reset proxies for " + typ + " failover"; + RebalanceProxiesUnlocked(msg); } } // Make it a host failure LogCvmfs(kLogDownload, kLogDebug, - "(manager '%s' - id %" PRId64 ") make it a host failure", - name_.c_str(), info->id()); + "(manager '%s' - id %" PRId64 ") make it a %s failure", + name_.c_str(), info->id(), typ.c_str()); info->SetNumUsedProxies(1); info->SetErrorCode(kFailHostAfterProxy); } else { @@ -1566,12 +1599,13 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { "(manager '%s' - id %" PRId64 ") " "VerifyAndFinalize() would fail the download here. " "Instead switch proxy and retry download. " - "info->probe_hosts=%d host_chain=%p info->num_used_hosts=%d " + "typ=%s " + "info->probe_hosts=%d host_chain=%p num_used_hosts=%d " "host_chain->size()=%lu same_url_retry=%d " "info->num_used_proxies=%d opt_num_proxies_=%d", - name_.c_str(), info->id(), + name_.c_str(), info->id(), typ.c_str(), static_cast(info->probe_hosts()), - host_chain, info->num_used_hosts(), + host_chain, num_used_hosts, host_chain ? host_chain->size() : -1, static_cast(same_url_retry), info->num_used_proxies(), opt_num_proxies_); @@ -1659,8 +1693,13 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { } if (switch_host) { ReleaseCredential(info); - SwitchHost(info); - info->SetNumUsedHosts(info->num_used_hosts() + 1); + if (was_metalink) { + SwitchMetalink(info); + info->SetNumUsedMetalinks(num_used_hosts + 1); + } else { + SwitchHost(info); + info->SetNumUsedHosts(num_used_hosts + 1); + } SetUrlOptions(info); } } // end !sharding @@ -2110,6 +2149,21 @@ void DownloadManager::SetMetalinkChain( } +/** + * Retrieves the currently set chain of metalink hosts and the currently + * used metalink host. + */ +void DownloadManager::GetMetalinkInfo(vector *metalink_chain, + unsigned *current_metalink) +{ + MutexLockGuard m(lock_options_); + if (opt_metalink_.chain) { + if (current_metalink) {*current_metalink = opt_metalink_.current;} + if (metalink_chain) {*metalink_chain = *opt_metalink_.chain;} + } +} + + /** * Parses a list of ';'-separated hosts for the host chain. The empty string * removes the host list. @@ -2241,18 +2295,22 @@ void DownloadManager::SwitchHostInfo(const std::string typ, return; } - if (jobinfo && (typ == "host") && - (jobinfo->current_host_chain_index() != info.current)) { - // The current_host_chain_index is only used by the probe_hosts - // feature which is not relevant to metalink - LogCvmfs(kLogDownload, kLogDebug, - "(manager '%s' - id %" PRId64 ")" - "don't switch host, " - "last used host: %s, current host: %s", - name_.c_str(), jobinfo->id(), - (*info.chain)[jobinfo->current_host_chain_index()].c_str(), - (*info.chain)[info.current].c_str()); - return; + if (jobinfo) { + unsigned lastused; + if (typ == "host") + lastused = jobinfo->current_host_chain_index(); + else + lastused = jobinfo->current_metalink_chain_index(); + if (lastused != info.current) { + LogCvmfs(kLogDownload, kLogDebug, + "(manager '%s' - id %" PRId64 ")" + "don't switch %s, " + "last used %s: %s, current %s: %s", + name_.c_str(), jobinfo->id(), typ.c_str(), + typ.c_str(), (*info.chain)[lastused].c_str(), + typ.c_str(), (*info.chain)[info.current].c_str()); + return; + } } string reason = "manually triggered"; @@ -2271,7 +2329,7 @@ void DownloadManager::SwitchHostInfo(const std::string typ, perf::Inc(counters_->n_metalink_failover); } LogCvmfs(kLogDownload, kLogDebug | kLogSyslogWarn, - "%s switching %s from %s to %s (%s)", info_id.c_str(), typ, + "%s switching %s from %s to %s (%s)", info_id.c_str(), typ.c_str(), old_host.c_str(), (*info.chain)[info.current].c_str(), reason.c_str()); diff --git a/cvmfs/network/download.h b/cvmfs/network/download.h index a36c5000e0..f83b2019bb 100644 --- a/cvmfs/network/download.h +++ b/cvmfs/network/download.h @@ -251,6 +251,11 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) return 0; } + unsigned num_metalinks() { + if (opt_metalink_.chain) return opt_metalink_.chain->size(); + return 0; + } + dns::IpPreference opt_ip_preference() const { return opt_ip_preference_; } @@ -286,7 +291,7 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) void InitHeaders(); void CloneProxyConfig(DownloadManager *clone); void CheckHostInfoReset(const std::string typ, HostInfo &info, - JobInfo *jobinfo); + JobInfo *jobinfo, time_t &now); bool EscapeUrlChar(unsigned char input, char output[3]); std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url); @@ -341,10 +346,10 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) // Metalink HostInfo opt_metalink_; + time_t opt_metalink_timestamp_link_; // Host list HostInfo opt_host_; - /** * Created by SetHostChain(), filled by probe_hosts. Contains time to get * .cvmfschecksum in ms. -1 is unprobed, -2 is error. diff --git a/cvmfs/network/jobinfo.cc b/cvmfs/network/jobinfo.cc index 8cb99e5697..be040bcab6 100644 --- a/cvmfs/network/jobinfo.cc +++ b/cvmfs/network/jobinfo.cc @@ -67,10 +67,12 @@ void JobInfo::Init() { error_code_ = kFailOther; http_code_ = -1; num_used_proxies_ = 0; + num_used_metalinks_ = 0; num_used_hosts_ = 0; num_retries_ = 0; backoff_ms_ = 0; - current_host_chain_index_ = 0; + current_metalink_chain_index_ = -1; + current_host_chain_index_ = -1; allow_failure_ = false; diff --git a/cvmfs/network/jobinfo.h b/cvmfs/network/jobinfo.h index 0b6f0f7fed..24d3bed6e5 100644 --- a/cvmfs/network/jobinfo.h +++ b/cvmfs/network/jobinfo.h @@ -104,10 +104,12 @@ class JobInfo { Failures error_code_; int http_code_; unsigned char num_used_proxies_; + unsigned char num_used_metalinks_; unsigned char num_used_hosts_; unsigned char num_retries_; unsigned backoff_ms_; - unsigned int current_host_chain_index_; + int current_metalink_chain_index_; + int current_host_chain_index_; // Don't fail-over proxies on download errors. default = false bool allow_failure_; @@ -202,11 +204,13 @@ class JobInfo { Failures error_code() const { return error_code_; } int http_code() const { return http_code_; } unsigned char num_used_proxies() const { return num_used_proxies_; } + unsigned char num_used_metalinks() const { return num_used_metalinks_; } unsigned char num_used_hosts() const { return num_used_hosts_; } unsigned char num_retries() const { return num_retries_; } unsigned backoff_ms() const { return backoff_ms_; } - unsigned int current_host_chain_index() const { - return current_host_chain_index_; } + int current_metalink_chain_index() const { + return current_metalink_chain_index_; } + int current_host_chain_index() const { return current_host_chain_index_; } bool allow_failure() const { return allow_failure_; } int64_t id() const { return id_; } @@ -254,11 +258,15 @@ class JobInfo { void SetHttpCode(int http_code) { http_code_ = http_code; } void SetNumUsedProxies(unsigned char num_used_proxies) { num_used_proxies_ = num_used_proxies; } + void SetNumUsedMetalinks(unsigned char num_used_metalinks) + { num_used_metalinks_ = num_used_metalinks; } void SetNumUsedHosts(unsigned char num_used_hosts) { num_used_hosts_ = num_used_hosts; } void SetNumRetries(unsigned char num_retries) { num_retries_ = num_retries; } void SetBackoffMs(unsigned backoff_ms) { backoff_ms_ = backoff_ms; } - void SetCurrentHostChainIndex(unsigned int current_host_chain_index) + void SetCurrentMetalinkChainIndex(int current_metalink_chain_index) + { current_metalink_chain_index_ = current_metalink_chain_index; } + void SetCurrentHostChainIndex(int current_host_chain_index) { current_host_chain_index_ = current_host_chain_index; } void SetAllowFailure(bool allow_failure) { allow_failure_ = allow_failure; } diff --git a/cvmfs/talk.cc b/cvmfs/talk.cc index 301e648a14..e6acfa5f02 100644 --- a/cvmfs/talk.cc +++ b/cvmfs/talk.cc @@ -107,7 +107,7 @@ string TalkManager::FormatMetalinkInfo(download::DownloadManager *download_mgr) string metalink_str; for (unsigned i = 0; i < metalink_chain.size(); ++i) { - metalink_str += " [" + StringifyInt(i) + "] " + metalink_chain[i]; + metalink_str += " [" + StringifyInt(i) + "] " + metalink_chain[i] + "\n"; } metalink_str += "Active metalink " + StringifyInt(active_metalink) + ": " + metalink_chain[active_metalink] + "\n"; From 99cd7b9880c1f70d6547e0c53254ea88a01b4397 Mon Sep 17 00:00:00 2001 From: Dave Dykstra <2129743+DrDaveD@users.noreply.github.com> Date: Thu, 24 Oct 2024 20:36:54 -0500 Subject: [PATCH 4/4] process the LINK: header --- cvmfs/mountpoint.cc | 8 ++- cvmfs/network/download.cc | 111 +++++++++++++++++++++++++++++++++++--- cvmfs/network/download.h | 3 +- cvmfs/network/jobinfo.cc | 1 + 4 files changed, 113 insertions(+), 10 deletions(-) diff --git a/cvmfs/mountpoint.cc b/cvmfs/mountpoint.cc index bc34194cfb..502bcda7ff 100644 --- a/cvmfs/mountpoint.cc +++ b/cvmfs/mountpoint.cc @@ -1417,7 +1417,13 @@ bool MountPoint::CreateDownloadManagers() { download_mgr_->SetFailoverIndefinitely(); } - if (options_mgr_->GetValue("CVMFS_SERVER_URL", &optarg)) { + if (options_mgr_->GetValue("CVMFS_METALINK_URL", &optarg)) { + download_mgr_->SetMetalinkChain(optarg); + // host chain will be set later when the metalink server is contacted + download_mgr_->SetHostChain(""); + // metalink requires redirects + download_mgr_->EnableRedirects(); + } else if (options_mgr_->GetValue("CVMFS_SERVER_URL", &optarg)) { download_mgr_->SetHostChain(optarg); } diff --git a/cvmfs/network/download.cc b/cvmfs/network/download.cc index 00a4bf8637..51786758da 100644 --- a/cvmfs/network/download.cc +++ b/cvmfs/network/download.cc @@ -226,7 +226,14 @@ static size_t CallbackCurlHeader(void *ptr, size_t size, size_t nmemb, // This is metalink info LogCvmfs(kLogDownload, kLogDebug, "(id %" PRId64 ") %s", info->id(), header_line.c_str()); - info->SetLink(header_line.substr(5)); + std::string link = info->link(); + if (link.size() != 0) { + // multiple LINK headers are allowed + link = link + ", " + header_line.substr(5); + } else { + link = header_line.substr(5); + } + info->SetLink(link); } else if (HasPrefix(header_line, "X-SQUID-ERROR:", true)) { // Reinterpret host error as proxy error if (info->error_code() == kFailHostHttp) { @@ -1399,6 +1406,81 @@ void DownloadManager::ReleaseCredential(JobInfo *info) { } +/* Sort links based on the "pri=" parameter */ +static bool sortlinks(std::string s1, std::string s2) { + size_t pos1 = s1.find("; pri="); + size_t pos2 = s2.find("; pri="); + int pri1, pri2; + if ((pos1 != std::string::npos) && + (pos2 != std::string::npos) && + (sscanf(s1.substr(pos1+6).c_str(), "%d", &pri1) == 1) && + (sscanf(s2.substr(pos2+6).c_str(), "%d", &pri2) == 1)) { + return pri1 < pri2; + } + return false; +} + +/** + * Parses Link header and uses it to set a new host chain. + * See rfc6249. + */ +void DownloadManager::ProcessLink(JobInfo *info) { + + std::vector links = SplitString(info->link(), ','); + if (info->link().find("; pri=") != std::string::npos) + std::sort(links.begin(), links.end(), sortlinks); + + std::vector host_list; + + std::vector::const_iterator il = links.begin(); + for (; il != links.end(); ++il) { + std::string link = *il; + if ((link.find("; rel=duplicate") == std::string::npos) && + (link.find("; rel=\"duplicate\"") == std::string::npos)) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' because it does not contain rel=duplicate", + link.c_str()); + continue; + } + // ignore depth= field since there's nothing useful we can do with it + + size_t start = link.find('<'); + if (start == std::string::npos) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' because it does not have a left angle bracket", + link.c_str()); + continue; + } + + start++; + if ((link.substr(start, 7) != "http://") && + (link.substr(start, 8) != "https://")) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' of unrecognized url protocol", link.c_str()); + continue; + } + + size_t end = link.find('/', start+8); + if (end == std::string::npos) + end = link.find('>'); + if (end == std::string::npos) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' because no slash in url and no right angle bracket", + link.c_str()); + continue; + } + std::string host = link.substr(start, end-start); + LogCvmfs(kLogDownload, kLogDebug, "adding linked host '%s'", host.c_str()); + host_list.push_back(host); + } + + if (host_list.size() > 0) { + SetHostChain(host_list); + opt_metalink_timestamp_link_ = time(NULL); + } +} + + /** * Checks the result of a curl download and implements the failure logic, such * as changing the proxy server. Takes care of cleanup. @@ -1412,6 +1494,21 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { info->proxy().c_str(), curl_error); UpdateStatistics(info->curl_handle()); + bool was_metalink; + std::string typ; + if (info->current_metalink_chain_index() >= 0) { + was_metalink = true; + typ = "metalink"; + if (info->link() != "") { + // process Link header whether or not the redirected URL got an error + ProcessLink(info); + } + } else { + was_metalink = false; + typ = "host"; + } + + // Verification and error classification switch (curl_error) { case CURLE_OK: @@ -1512,18 +1609,12 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { break; } - bool was_metalink; - std::string typ; std::vector *host_chain; unsigned char num_used_hosts; - if (info->current_metalink_chain_index() >= 0) { - was_metalink = true; - typ = "metalink"; + if (was_metalink) { host_chain = opt_metalink_.chain; num_used_hosts = info->num_used_metalinks(); } else { - was_metalink = false; - typ = "host"; host_chain = opt_host_.chain; num_used_hosts = info->num_used_hosts(); } @@ -1925,6 +2016,9 @@ Failures DownloadManager::Fetch(JobInfo *info) { info->GetHashContextPtr()->buffer = alloca(info->hash_context().size); } + // In case JobInfo object is being reused + info->SetLink(""); + // Prepare cvmfs-info: header, allocate string on the stack info->SetInfoHeader(NULL); if (enable_info_header_ && info->extra_info()) { @@ -3153,6 +3247,7 @@ DownloadManager *DownloadManager::Clone( clone->proxy_template_direct_ = proxy_template_direct_; clone->proxy_template_forced_ = proxy_template_forced_; clone->opt_proxy_groups_reset_after_ = opt_proxy_groups_reset_after_; + clone->opt_metalink_.reset_after = opt_metalink_.reset_after; clone->opt_host_.reset_after = opt_host_.reset_after; clone->credentials_attachment_ = credentials_attachment_; clone->ssl_certificate_store_ = ssl_certificate_store_; diff --git a/cvmfs/network/download.h b/cvmfs/network/download.h index f83b2019bb..369cb3282d 100644 --- a/cvmfs/network/download.h +++ b/cvmfs/network/download.h @@ -287,6 +287,7 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) void Backoff(JobInfo *info); void SetNocache(JobInfo *info); void SetRegularCache(JobInfo *info); + void ProcessLink(JobInfo *info); bool VerifyAndFinalize(const int curl_error, JobInfo *info); void InitHeaders(); void CloneProxyConfig(DownloadManager *clone); @@ -344,7 +345,7 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) bool enable_http_tracing_; std::vector http_tracing_headers_; - // Metalink + // Metalink list HostInfo opt_metalink_; time_t opt_metalink_timestamp_link_; diff --git a/cvmfs/network/jobinfo.cc b/cvmfs/network/jobinfo.cc index be040bcab6..46d84cec2a 100644 --- a/cvmfs/network/jobinfo.cc +++ b/cvmfs/network/jobinfo.cc @@ -66,6 +66,7 @@ void JobInfo::Init() { nocache_ = false; error_code_ = kFailOther; http_code_ = -1; + link_ = ""; num_used_proxies_ = 0; num_used_metalinks_ = 0; num_used_hosts_ = 0;