Skip to content

Commit bbfcb5e

Browse files
evanevanevanevannnnblinkov
authored andcommitted
YQ-4087 fix (#14545)
1 parent ff5b388 commit bbfcb5e

File tree

2 files changed

+113
-14
lines changed

2 files changed

+113
-14
lines changed

ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -468,20 +468,9 @@ class TS3DqIntegration: public TDqIntegrationBase {
468468
std::make_move_iterator(pathsChunk.end()));
469469
}
470470

471-
NS3::TRange range;
472-
range.SetStartPathIndex(0);
473-
TFileTreeBuilder builder;
474-
std::for_each(paths.cbegin(), paths.cend(), [&builder](const TPath& f) {
475-
builder.AddPath(f.Path, f.Size, f.IsDirectory);
476-
});
477-
builder.Save(&range);
478-
479-
TVector<TString> serialized(1);
480-
TStringOutput out(serialized.front());
481-
range.Save(&out);
482-
483-
paths.clear();
484-
ReadPathsList({}, serialized, paths);
471+
for (size_t i = 0; i < paths.size(); ++i) {
472+
paths[i].PathIndex = i;
473+
}
485474

486475
const NDq::TS3ReadActorFactoryConfig& readActorConfig = State_->Configuration->S3ReadActorFactoryConfig;
487476
ui64 fileSizeLimit = readActorConfig.FileSizeLimit;

ydb/tests/fq/s3/test_explicit_partitioning_1.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,3 +574,113 @@ def test_parquet(self, kikimr, s3, client, runtime_listing, unique_prefix):
574574
assert result_set.rows[1].items[2].int32_value == 2
575575
assert result_set.rows[2].items[2].int32_value == 1
576576
assert result_set.rows[3].items[2].int32_value == 2
577+
578+
@yq_all
579+
@pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True)
580+
@pytest.mark.parametrize("runtime_listing", ["true"])
581+
def test_valid_projected_column_values(self, kikimr, s3, client, runtime_listing, yq_version, unique_prefix):
582+
resource = boto3.resource(
583+
"s3", endpoint_url=s3.s3_url, aws_access_key_id="key", aws_secret_access_key="secret_key"
584+
)
585+
586+
bucket = resource.Bucket("bucket")
587+
bucket.create(ACL='public-read')
588+
589+
s3_client = boto3.client(
590+
"s3", endpoint_url=s3.s3_url, aws_access_key_id="key", aws_secret_access_key="secret_key"
591+
)
592+
593+
s3_client.put_object(
594+
Body='''data
595+
text1''',
596+
Bucket='bucket',
597+
Key='2024/01/file.txt',
598+
ContentType='text/plain',
599+
)
600+
s3_client.put_object(
601+
Body='''data
602+
text2''',
603+
Bucket='bucket',
604+
Key='2024/02/file.txt',
605+
ContentType='text/plain',
606+
)
607+
s3_client.put_object(
608+
Body='''data
609+
text3''',
610+
Bucket='bucket',
611+
Key='2025/01/file.txt',
612+
ContentType='text/plain',
613+
)
614+
615+
kikimr.control_plane.wait_bootstrap(1)
616+
storage_connection_name = unique_prefix + "bucket"
617+
client.create_storage_connection(storage_connection_name, "bucket")
618+
619+
sql = (
620+
f'''
621+
pragma s3.UseRuntimeListing="{runtime_listing}";
622+
'''
623+
+ R'''
624+
$projection = @@ {
625+
"projection.enabled" : true,
626+
627+
"projection.year.type" : "integer",
628+
"projection.year.min" : 2024,
629+
"projection.year.max" : 2025,
630+
"projection.year.interval" : 1,
631+
632+
"projection.month.type" : "integer",
633+
"projection.month.min" : 1,
634+
"projection.month.max" : 2,
635+
"projection.month.interval" : 1,
636+
"projection.month.digits" : 2,
637+
638+
"storage.location.template" : "${year}/${month}"
639+
}
640+
@@;
641+
'''
642+
+ fR'''
643+
SELECT
644+
*
645+
FROM
646+
`{storage_connection_name}`.`/`
647+
WITH
648+
(
649+
format=csv_with_names,
650+
schema=(
651+
data String NOT NULL,
652+
year Int NOT NULL,
653+
month Int NOT NULL,
654+
),
655+
partitioned_by=(year, month),
656+
projection=$projection
657+
)
658+
WHERE data ILIKE '%text2%';
659+
'''
660+
)
661+
662+
# temporary fix for dynamic listing
663+
if yq_version == "v1":
664+
sql = 'pragma dq.MaxTasksPerStage="10"; ' + sql
665+
666+
query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id
667+
client.wait_query_status(query_id, fq.QueryMeta.COMPLETED)
668+
669+
describe_result = client.describe_query(query_id).result
670+
logging.info("AST: {}".format(describe_result.query.ast.data))
671+
672+
data = client.get_result_data(query_id)
673+
result_set = data.result.result_set
674+
logging.debug(str(result_set))
675+
676+
assert len(result_set.columns) == 3
677+
assert result_set.columns[0].name == "data"
678+
assert result_set.columns[0].type.type_id == ydb.Type.STRING
679+
assert result_set.columns[1].name == "month"
680+
assert result_set.columns[1].type.type_id == ydb.Type.INT32
681+
assert result_set.columns[2].name == "year"
682+
assert result_set.columns[2].type.type_id == ydb.Type.INT32
683+
assert len(result_set.rows) == 1
684+
assert result_set.rows[0].items[0].bytes_value == b"text2"
685+
assert result_set.rows[0].items[1].int32_value == 2
686+
assert result_set.rows[0].items[2].int32_value == 2024

0 commit comments

Comments
 (0)