Skip to content

Commit 29641be

Browse files
committed
Merge branch 'main' of https://github.com/datafuselabs/databend into add_runtime_bloom_filter_for_merge_into
2 parents 2a51166 + 7171319 commit 29641be

File tree

168 files changed

+3239
-1852
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+3239
-1852
lines changed

โ€ŽCargo.lock

Lines changed: 405 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

โ€ŽCargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ opendal = { version = "0.45.0", features = [
120120
sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1", default-features = false }
121121

122122
# openraft for debugging
123-
openraft = { git = "https://github.com/datafuselabs/openraft", tag = "v0.9.0", features = [
123+
# openraft = { git = "https://github.com/datafuselabs/openraft", tag = "v0.9.0", features = [
124+
openraft = { version = "0.9.1", features = [
124125
"serde",
125126
"tracing-log",
126127
"generic-snapshot-data",

โ€ŽREADME.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
## ๐Ÿ‹ Introduction
3636

37-
**Databend** is an open-source, elastic, and workload-aware cloud data warehouse built in Rust, offering a cost-effective [alternative to Snowflake](https://github.com/datafuselabs/databend/issues/13059). It's designed for complex analysis of the world's largest datasets.
37+
**Databend**, built in Rust, is an open-source cloud data warehouse that serves as a cost-effective [alternative to Snowflake](https://github.com/datafuselabs/databend/issues/13059). With its focus on fast query execution and data ingestion, it's designed for complex analysis of the world's largest datasets.
3838

3939
## โšก Performance
4040

@@ -46,6 +46,14 @@
4646

4747
![Databend vs. Snowflake](https://github.com/datafuselabs/wizard/assets/172204/d796acf0-0a66-4b1d-8754-cd2cd1de04c7)
4848

49+
<div align="center">
50+
51+
[Data Ingestion Benchmark: Databend Cloud vs. Snowflake](https://docs.databend.com/guides/benchmark/data-ingest)
52+
53+
</div>
54+
55+
![Databend vs. Snowflake](https://github.com/datafuselabs/databend/assets/172204/c61d7a40-f6fe-4fb9-83e8-06ea9599aeb4)
56+
4957

5058
## ๐Ÿš€ Why Databend
5159

@@ -213,6 +221,15 @@ docker run --net=host datafuselabs/databend
213221

214222
</details>
215223

224+
<details>
225+
<summary>Accessing Data Lake</summary>
226+
227+
- [Apache Hive](https://docs.databend.com/guides/access-data-lake/hive)
228+
- [Apache Iceberg](https://docs.databend.com/guides/access-data-lake/iceberg/iceberg-engine)
229+
- [Delta Lake](https://docs.databend.com/guides/access-data-lake/delta)
230+
231+
</details>
232+
216233
<details>
217234
<summary>Security</summary>
218235

@@ -227,7 +244,8 @@ docker run --net=host datafuselabs/databend
227244
<summary>Performance</summary>
228245

229246
- [Review Clickbench](https://databend.com/blog/clickbench-databend-top)
230-
- [How to Benchmark Databend using TPC-H](https://databend.com/blog/2022/08/08/benchmark-tpc-h)
247+
- [TPC-H Benchmark: Databend Cloud vs. Snowflake](https://docs.databend.com/guides/benchmark/tpch)
248+
- [Databend vs. Snowflake: Data Ingestion Benchmark](https://docs.databend.com/guides/benchmark/data-ingest)
231249

232250
</details>
233251

โ€Žsrc/binaries/metabench/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ async fn benchmark_table(client: &Arc<ClientHandle>, prefix: u64, client_num: u6
189189

190190
let res = client
191191
.create_database(CreateDatabaseReq {
192-
create_option: CreateOption::None,
192+
create_option: CreateOption::Create,
193193
name_ident: DatabaseNameIdent {
194194
tenant: tenant(),
195195
db_name: db_name(),

โ€Žsrc/common/metrics/src/metrics/transform.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,19 @@ pub fn metrics_inc_aggregate_partial_hashtable_allocated_bytes(c: u64) {
7474
AGGREGATE_PARTIAL_HASHTABLE_ALLOCATED_BYTES.inc_by(c);
7575
}
7676

77+
pub fn metrics_inc_group_by_partial_spill_count() {
78+
let labels = &vec![("spill", "group_by_partial_spill".to_string())];
79+
SPILL_COUNT.get_or_create(labels).inc();
80+
}
81+
82+
pub fn metrics_inc_group_by_partial_spill_cell_count(c: u64) {
83+
AGGREGATE_PARTIAL_SPILL_CELL_COUNT.inc_by(c);
84+
}
85+
86+
pub fn metrics_inc_group_by_partial_hashtable_allocated_bytes(c: u64) {
87+
AGGREGATE_PARTIAL_HASHTABLE_ALLOCATED_BYTES.inc_by(c);
88+
}
89+
7790
pub fn metrics_inc_group_by_spill_write_count() {
7891
let labels = &vec![("spill", "group_by_spill".to_string())];
7992
SPILL_WRITE_COUNT.get_or_create(labels).inc();

โ€Žsrc/meta/api/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ databend-common-meta-types = { path = "../types" }
2222
databend-common-proto-conv = { path = "../proto-conv" }
2323

2424
anyhow = { workspace = true }
25+
async-backtrace = { workspace = true }
2526
async-trait = { workspace = true }
2627
chrono = { workspace = true }
2728
enumflags2 = { workspace = true }

โ€Žsrc/meta/api/src/crud/errors.rs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::error::Error;
16+
17+
use databend_common_exception::ErrorCode;
18+
use databend_common_meta_app::tenant_key::TenantResource;
19+
use databend_common_meta_app::tenant_key_errors::ExistError;
20+
use databend_common_meta_app::tenant_key_errors::UnknownError;
21+
use databend_common_meta_types::MetaError;
22+
23+
/// CRUD Error that can be an API level error or a business error.
24+
#[derive(Clone, Debug, thiserror::Error)]
25+
pub enum CrudError<E>
26+
where E: Error + 'static
27+
{
28+
#[error(transparent)]
29+
ApiError(#[from] MetaError),
30+
31+
#[error(transparent)]
32+
Business(E),
33+
}
34+
35+
impl<E> CrudError<E>
36+
where E: Error + 'static
37+
{
38+
pub fn is_business_error(&self) -> bool {
39+
match self {
40+
CrudError::ApiError(_e) => false,
41+
CrudError::Business(_e) => true,
42+
}
43+
}
44+
45+
/// Convert the error into a layered result.
46+
pub fn into_result(self) -> Result<Result<(), E>, MetaError> {
47+
match self {
48+
CrudError::ApiError(meta_err) => Err(meta_err),
49+
CrudError::Business(e) => Ok(Err(e)),
50+
}
51+
}
52+
}
53+
54+
impl<R> From<ExistError<R>> for CrudError<ExistError<R>>
55+
where R: TenantResource + 'static
56+
{
57+
fn from(e: ExistError<R>) -> Self {
58+
CrudError::Business(e)
59+
}
60+
}
61+
62+
impl<R> From<UnknownError<R>> for CrudError<UnknownError<R>>
63+
where R: TenantResource + 'static
64+
{
65+
fn from(e: UnknownError<R>) -> Self {
66+
CrudError::Business(e)
67+
}
68+
}
69+
70+
impl<E> From<CrudError<E>> for ErrorCode
71+
where
72+
E: Into<ErrorCode>,
73+
E: Error,
74+
{
75+
fn from(value: CrudError<E>) -> Self {
76+
match value {
77+
CrudError::ApiError(meta_err) => meta_err.into(),
78+
CrudError::Business(e) => e.into(),
79+
}
80+
}
81+
}

โ€Žsrc/meta/api/src/crud/mod.rs

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
//! A generic CRUD interface for meta data operations.
16+
17+
mod errors;
18+
use std::marker::PhantomData;
19+
use std::sync::Arc;
20+
21+
use databend_common_meta_app::schema::CreateOption;
22+
use databend_common_meta_app::tenant::Tenant;
23+
use databend_common_meta_app::tenant_key::TIdent;
24+
use databend_common_meta_app::tenant_key::TenantResource;
25+
use databend_common_meta_app::tenant_key_errors::ExistError;
26+
use databend_common_meta_app::tenant_key_errors::UnknownError;
27+
use databend_common_meta_kvapi::kvapi;
28+
use databend_common_meta_kvapi::kvapi::DirName;
29+
use databend_common_meta_kvapi::kvapi::ValueWithName;
30+
use databend_common_meta_types::MatchSeq;
31+
use databend_common_meta_types::MatchSeqExt;
32+
use databend_common_meta_types::MetaError;
33+
use databend_common_meta_types::NonEmptyString;
34+
use databend_common_meta_types::SeqV;
35+
use databend_common_meta_types::SeqValue;
36+
use databend_common_meta_types::With;
37+
use databend_common_proto_conv::FromToProto;
38+
pub use errors::CrudError;
39+
use futures::TryStreamExt;
40+
41+
use crate::kv_pb_api::KVPbApi;
42+
use crate::kv_pb_api::UpsertPB;
43+
44+
/// A generic CRUD interface for meta data operations.
45+
///
46+
/// - It provide `add`, `update`, `get`, `remove` and `list` operations.
47+
/// - The key space it operates on is defined by the type [`TIdent<R>`],
48+
/// which contains a `Tenant` and a `Name`.
49+
///
50+
/// One `CrudMgr` instance can only access keys of exactly one [`Tenant`].
51+
///
52+
/// [`TIdent<R>`]: TIdent
53+
pub struct CrudMgr<R> {
54+
kv_api: Arc<dyn kvapi::KVApi<Error = MetaError>>,
55+
tenant: Tenant,
56+
_p: PhantomData<R>,
57+
}
58+
59+
impl<R> CrudMgr<R> {
60+
/// Create a new `CrudMgr` instance providing CRUD access for a key space defined by `R`: [`TenantResource`].
61+
pub fn create(
62+
kv_api: Arc<dyn kvapi::KVApi<Error = MetaError>>,
63+
tenant: &NonEmptyString,
64+
) -> Self {
65+
CrudMgr {
66+
kv_api,
67+
tenant: Tenant::new_nonempty(tenant.clone()),
68+
_p: Default::default(),
69+
}
70+
}
71+
72+
/// Create a structured key for the given name.
73+
fn ident(&self, name: &str) -> TIdent<R> {
74+
TIdent::new(self.tenant.clone(), name)
75+
}
76+
}
77+
78+
/// A shortcut
79+
type ValueOf<R> = <TIdent<R> as kvapi::Key>::ValueType;
80+
81+
impl<R> CrudMgr<R>
82+
where
83+
R: TenantResource + Send + 'static,
84+
// As a kvapi::Key, the corresponding value contains a name.
85+
ValueOf<R>: ValueWithName + FromToProto + Clone,
86+
{
87+
#[async_backtrace::framed]
88+
#[minitrace::trace]
89+
pub async fn add(
90+
&self,
91+
value: ValueOf<R>,
92+
create_option: &CreateOption,
93+
) -> Result<(), CrudError<ExistError<R>>> {
94+
let ident = self.ident(value.name());
95+
96+
let seq = MatchSeq::from(*create_option);
97+
let upsert = UpsertPB::insert(ident, value.clone()).with(seq);
98+
99+
let res = self.kv_api.upsert_pb(&upsert).await?;
100+
101+
if let CreateOption::Create = create_option {
102+
if res.prev.is_some() {
103+
return Err(ExistError::new(value.name(), "Exist when add").into());
104+
}
105+
}
106+
107+
Ok(())
108+
}
109+
110+
#[async_backtrace::framed]
111+
#[minitrace::trace]
112+
pub async fn update(
113+
&self,
114+
value: ValueOf<R>,
115+
match_seq: MatchSeq,
116+
) -> Result<u64, CrudError<UnknownError<R>>> {
117+
let ident = self.ident(value.name());
118+
let upsert = UpsertPB::update(ident, value.clone()).with(match_seq);
119+
120+
let res = self.kv_api.upsert_pb(&upsert).await?;
121+
122+
match res.result {
123+
Some(SeqV { seq, .. }) => Ok(seq),
124+
None => Err(UnknownError::new(value.name(), "NotFound when update").into()),
125+
}
126+
}
127+
128+
#[async_backtrace::framed]
129+
#[minitrace::trace]
130+
pub async fn remove(
131+
&self,
132+
name: &str,
133+
seq: MatchSeq,
134+
) -> Result<(), CrudError<UnknownError<R>>> {
135+
let ident = self.ident(name);
136+
137+
let upsert = UpsertPB::delete(ident).with(seq);
138+
139+
let res = self.kv_api.upsert_pb(&upsert).await?;
140+
res.removed_or_else(|e| {
141+
UnknownError::new(
142+
name,
143+
format_args!("NotFound when remove, seq of existing record: {}", e.seq()),
144+
)
145+
})?;
146+
147+
Ok(())
148+
}
149+
150+
#[async_backtrace::framed]
151+
#[minitrace::trace]
152+
pub async fn get(
153+
&self,
154+
name: &str,
155+
seq: MatchSeq,
156+
) -> Result<SeqV<ValueOf<R>>, CrudError<UnknownError<R>>> {
157+
let ident = self.ident(name);
158+
159+
let res = self.kv_api.get_pb(&ident).await?;
160+
161+
let seq_value = res.ok_or_else(|| UnknownError::new(name, "NotFound when get"))?;
162+
163+
match seq.match_seq(&seq_value) {
164+
Ok(_) => Ok(seq_value),
165+
Err(e) => Err(UnknownError::new(name, format_args!("NotFound when get: {}", e)).into()),
166+
}
167+
}
168+
169+
#[async_backtrace::framed]
170+
#[minitrace::trace]
171+
pub async fn list(&self) -> Result<Vec<ValueOf<R>>, MetaError> {
172+
let dir_name = DirName::new(self.ident("dummy"));
173+
174+
let values = self.kv_api.list_pb_values(&dir_name).await?;
175+
let values = values.try_collect().await?;
176+
177+
Ok(values)
178+
}
179+
}

โ€Žsrc/meta/api/src/data_mask_api_impl.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ impl<KV: kvapi::KVApi<Error = MetaError>> DatamaskApi for KV {
7979

8080
if seq > 0 {
8181
match req.create_option {
82-
CreateOption::None => {
82+
CreateOption::Create => {
8383
return Err(KVAppError::AppError(AppError::DatamaskAlreadyExists(
8484
DatamaskAlreadyExists::new(
8585
&name_key.name,

โ€Žsrc/meta/api/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ pub(crate) mod testing;
4141
pub mod txn_backoff;
4242
pub(crate) mod util;
4343

44+
pub mod crud;
45+
4446
pub use background_api::BackgroundApi;
4547
pub use background_api_test_suite::BackgroundApiTestSuite;
4648
pub use data_mask_api::DatamaskApi;

0 commit comments

Comments
ย (0)