chore: Pipelining fixes

romange · romange · commit 545def5d9408 · 2025-04-26T12:53:40.000+03:00
Addresses #4998. 1. Reduces agressive yielding when reading multiple requests since it humpers pipeline efficiency. Now we yield consistently based on cpu time spend since the last resume point. 2. Increases socket read buffer size effectively allowing processing more requests in bulk. 3. Changes the sharding function for cluster mode to shard by slot id. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
diff --git a/src/facade/dragonfly_connection.cc b/src/facade/dragonfly_connection.cc
@@ -13,6 +13,7 @@
 #include <numeric>
 #include <variant>
 
+#include "base/cycle_clock.h"
 #include "base/flags.h"
 #include "base/histogram.h"
 #include "base/io_buf.h"
@@ -24,6 +25,7 @@
 #include "facade/redis_parser.h"
 #include "facade/service_interface.h"
 #include "io/file.h"
+#include "util/fibers/fibers.h"
 #include "util/fibers/proactor_base.h"
 
 #ifdef DFLY_USE_SSL
@@ -1136,19 +1138,6 @@ void Connection::DispatchSingle(bool has_more, absl::FunctionRef<void()> invoke_
   // Dispatch async if we're handling a pipeline or if we can't dispatch sync.
   if (optimize_for_async || !can_dispatch_sync) {
     SendAsync(cmd_msg_cb());
-
-    auto epoch = fb2::FiberSwitchEpoch();
-
-    if (async_fiber_epoch_ == epoch) {
-      // If we pushed too many items without context switching - yield
-      if (++async_streak_len_ >= 10 && !cc_->async_dispatch) {
-        async_streak_len_ = 0;
-        ThisFiber::Yield();
-      }
-    } else {
-      async_streak_len_ = 0;
-      async_fiber_epoch_ = epoch;
-    }
   } else {
     ShrinkPipelinePool();  // Gradually release pipeline request pool.
     {
@@ -1393,6 +1382,8 @@ auto Connection::IoLoop() -> variant<error_code, ParserStatus> {
   auto* peer = socket_.get();
   recv_buf_.res_len = 0;
 
+  const uint64_t kCyclesPerJiffy = base::CycleClock::Frequency() >> 16;  // ~15usec.
+
   do {
     HandleMigrateRequest();
     ec = HandleRecvSocket();
@@ -1410,6 +1401,10 @@ auto Connection::IoLoop() -> variant<error_code, ParserStatus> {
       parse_status = ParseMemcache();
     }
 
+    if (reply_builder_->GetError()) {
+      return reply_builder_->GetError();
+    }
+
     if (parse_status == NEED_MORE) {
       parse_status = OK;
 
@@ -1429,33 +1424,25 @@ auto Connection::IoLoop() -> variant<error_code, ParserStatus> {
                               [&]() { io_buf_.Reserve(std::min(max_iobfuf_len, parser_hint)); });
         }
 
-        // If we got a partial request and we couldn't parse the length, just
-        // double the capacity.
         // If we got a partial request because iobuf was full, grow it up to
         // a reasonable limit to save on Recv() calls.
-        if (io_buf_.AppendLen() < 64u || (is_iobuf_full && capacity < 4096)) {
+        if (is_iobuf_full && capacity < max_iobfuf_len / 2) {
           // Last io used most of the io_buf to the end.
           UpdateIoBufCapacity(io_buf_, stats_, [&]() {
             io_buf_.Reserve(capacity * 2);  // Valid growth range.
           });
         }
 
         DCHECK_GT(io_buf_.AppendLen(), 0U);
-      } else if (io_buf_.AppendLen() == 0) {
-        // We have a full buffer and we can not progress with parsing.
-        // This means that we have request too large.
-        LOG(ERROR) << "Request is too large, closing connection";
-        parse_status = ERROR;
-        break;
       }
     } else if (parse_status != OK) {
       break;
     }
-    ec = reply_builder_->GetError();
-  } while (peer->IsOpen() && !ec);
 
-  if (ec)
-    return ec;
+    if (ThisFiber::GetRunningTimeCycles() > kCyclesPerJiffy) {
+      ThisFiber::Yield();
+    }
+  } while (peer->IsOpen());
 
   return parse_status;
 }
diff --git a/src/facade/dragonfly_connection.h b/src/facade/dragonfly_connection.h
@@ -455,10 +455,6 @@ class Connection : public util::Connection {
 
   unsigned parser_error_ = 0;
 
-  // amount of times we enqued requests asynchronously during the same async_fiber_epoch_.
-  unsigned async_streak_len_ = 0;
-  uint64_t async_fiber_epoch_ = 0;
-
   BreakerCb breaker_cb_;
 
   // Used by redis parser to avoid allocations
diff --git a/src/server/server_family.cc b/src/server/server_family.cc
@@ -1314,6 +1314,20 @@ void PrintPrometheusMetrics(uint64_t uptime, const Metrics& m, DflyCmd* dfly_cmd
   AppendMetricWithoutLabels("pipeline_commands_duration_seconds", "",
                             conn_stats.pipelined_cmd_latency * 1e-6, MetricType::COUNTER,
                             &resp->body());
+
+  AppendMetricWithoutLabels("cmd_squash_hop_total", "", m.coordinator_stats.multi_squash_executions,
+                            MetricType::COUNTER, &resp->body());
+
+  AppendMetricWithoutLabels("cmd_squash_commands_total", "", m.coordinator_stats.squashed_commands,
+                            MetricType::COUNTER, &resp->body());
+
+  AppendMetricWithoutLabels("cmd_squash_hop_duration_seconds", "",
+                            m.coordinator_stats.multi_squash_exec_hop_usec * 1e-6,
+                            MetricType::COUNTER, &resp->body());
+  AppendMetricWithoutLabels("cmd_squash_hop_reply_seconds", "",
+                            m.coordinator_stats.multi_squash_exec_reply_usec * 1e-6,
+                            MetricType::COUNTER, &resp->body());
+
   AppendMetricWithoutLabels("commands_squashing_replies_bytes", "",
                             MultiCommandSquasher::GetRepliesMemSize(), MetricType::GAUGE,
                             &resp->body());
@@ -2486,7 +2500,6 @@ string ServerFamily::FormatInfoMetrics(const Metrics& m, std::string_view sectio
     append("total_commands_processed", conn_stats.command_cnt_main + conn_stats.command_cnt_other);
     append("instantaneous_ops_per_sec", m.qps);
     append("total_pipelined_commands", conn_stats.pipelined_cmd_cnt);
-    append("total_pipelined_squashed_commands", m.coordinator_stats.squashed_commands);
     append("pipeline_throttle_total", conn_stats.pipeline_throttle_count);
     append("pipelined_latency_usec", conn_stats.pipelined_cmd_latency);
     append("total_net_input_bytes", conn_stats.io_read_bytes);
@@ -2628,9 +2641,6 @@ string ServerFamily::FormatInfoMetrics(const Metrics& m, std::string_view sectio
     append("eval_shardlocal_coordination_total",
            m.coordinator_stats.eval_shardlocal_coordination_cnt);
     append("eval_squashed_flushes", m.coordinator_stats.eval_squashed_flushes);
-    append("multi_squash_execution_total", m.coordinator_stats.multi_squash_executions);
-    append("multi_squash_execution_hop_usec", m.coordinator_stats.multi_squash_exec_hop_usec);
-    append("multi_squash_execution_reply_usec", m.coordinator_stats.multi_squash_exec_reply_usec);
   };
 
   auto add_repl_info = [&] {
diff --git a/tests/dragonfly/connection_test.py b/tests/dragonfly/connection_test.py
@@ -561,7 +561,7 @@ async def measure(aw):
     e = async_client.pipeline(transaction=True)
     for _ in range(100):
         e.incr("num-1")
-    assert await measure(e.execute()) == 2  # OK + Response
+    assert await measure(e.execute()) == 1
 
     # Just pipeline
     p = async_client.pipeline(transaction=False)
diff --git a/tools/local/monitoring/grafana/provisioning/dashboards/dragonfly.json b/tools/local/monitoring/grafana/provisioning/dashboards/dragonfly.json