Skip to content

Commit 7492f96

Browse files
authored
Revert "list have been supported for s3 reader" (#9699)
1 parent e3f4b87 commit 7492f96

18 files changed

+20
-352
lines changed

ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5708,7 +5708,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
57085708
std::visit([&types](const auto& value) { types.IncNoBlockType(value); }, typeKindOrSlot);
57095709
};
57105710

5711-
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(lambda->Pos()), allInputTypes, ctx, false, onUnsupportedType);
5711+
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(lambda->Pos()), allInputTypes, ctx, onUnsupportedType);
57125712
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
57135713
if (resolveStatus != IArrowResolver::OK) {
57145714
return false;
@@ -5848,7 +5848,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
58485848
allTypes.push_back(node->Child(i)->GetTypeAnn());
58495849
}
58505850

5851-
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(node->Pos()), allTypes, ctx, false, onUnsupportedType);
5851+
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(node->Pos()), allTypes, ctx, onUnsupportedType);
58525852
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
58535853
if (resolveStatus != IArrowResolver::OK) {
58545854
return true;

ydb/library/yql/core/yql_arrow_resolver.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class IArrowResolver : public TThrRefBase {
2525

2626
virtual EStatus HasCast(const TPosition& pos, const TTypeAnnotationNode* from, const TTypeAnnotationNode* to, TExprContext& ctx) const = 0;
2727

28-
virtual EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx, bool extraTypes = false,
28+
virtual EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx,
2929
const TUnsupportedTypeCallback& onUnsupported = {}) const = 0;
3030
};
3131

ydb/library/yql/core/yql_expr_type_annotation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3153,7 +3153,7 @@ bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type
31533153
std::visit([&types](const auto& value) { types.IncNoBlockType(value); }, typeKindOrSlot);
31543154
};
31553155
}
3156-
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(pos), { &type }, ctx, false, onUnsupportedType);
3156+
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(pos), { &type }, ctx, onUnsupportedType);
31573157
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
31583158
return resolveStatus == IArrowResolver::OK;
31593159
}

ydb/library/yql/minikql/arrow/mkql_functions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace NKikimr::NMiniKQL {
1515
bool ConvertInputArrowType(TType* blockType, arrow::ValueDescr& descr) {
1616
auto asBlockType = AS_TYPE(TBlockType, blockType);
1717
descr.shape = asBlockType->GetShape() == TBlockType::EShape::Scalar ? arrow::ValueDescr::SCALAR : arrow::ValueDescr::ARRAY;
18-
return ConvertArrowType(asBlockType->GetItemType(), descr.type, true);
18+
return ConvertArrowType(asBlockType->GetItemType(), descr.type);
1919
}
2020

2121
class TOutputTypeVisitor : public arrow::TypeVisitor

ydb/library/yql/minikql/computation/mkql_block_impl.cpp

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
#include <ydb/library/yql/minikql/arrow/arrow_util.h>
99
#include <ydb/library/yql/minikql/arrow/mkql_bit_utils.h>
1010
#include <ydb/library/yql/public/udf/arrow/args_dechunker.h>
11-
#include <contrib/libs/apache/arrow/cpp/src/arrow/array/builder_base.h>
12-
#include <contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h>
1311

1412
#include <ydb/library/yql/parser/pg_wrapper/interface/arrow.h>
1513

@@ -67,29 +65,6 @@ arrow::Datum DoConvertScalar(TType* type, const T& value, arrow::MemoryPool& poo
6765
return arrow::Datum(std::make_shared<arrow::StructScalar>(arrowValue, arrowType));
6866
}
6967

70-
if (type->IsList()) {
71-
auto listType = AS_TYPE(TListType, type);
72-
std::shared_ptr<arrow::DataType> itemType;
73-
MKQL_ENSURE(ConvertArrowType(listType->GetItemType(), itemType), "Unsupported type of scalar " << *listType->GetItemType());
74-
75-
std::unique_ptr<arrow::ArrayBuilder> builder;
76-
auto status = arrow::MakeBuilder(&pool, itemType, &builder);
77-
MKQL_ENSURE(status.ok(), "Couldn't create arrow list builder: " << status.ToString());
78-
79-
auto boxed = value.AsBoxed();
80-
auto iterator = NUdf::TBoxedValueAccessor::GetListIterator(*boxed);
81-
NYql::NUdf::TUnboxedValue unboxed;
82-
while (iterator.Next(unboxed)) {
83-
auto status = builder->AppendScalar(*DoConvertScalar(listType->GetItemType(), unboxed, pool).scalar());
84-
MKQL_ENSURE(status.ok(), "Couldn't append scalar to arrow list builder: " << status.ToString());
85-
}
86-
87-
std::shared_ptr<arrow::Array> array;
88-
status = builder->Finish(&array);
89-
MKQL_ENSURE(status.ok(), "Couldn't finish arrow list builder: " << status.ToString());
90-
return arrow::Datum(std::make_shared<arrow::ListScalar>(array));
91-
}
92-
9368
if (type->IsTuple()) {
9469
auto tupleType = AS_TYPE(TTupleType, type);
9570
std::vector<std::shared_ptr<arrow::Scalar>> arrowValue;

ydb/library/yql/minikql/computation/mkql_block_reader.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -306,15 +306,8 @@ struct TConverterTraits {
306306
}
307307
}
308308

309-
static std::unique_ptr<TResult> MakeList(bool isOptional, std::unique_ptr<IBlockItemConverter>&&) {
310-
if (isOptional) {
311-
return std::make_unique<TResourceBlockItemConverter<true>>();
312-
} else {
313-
return std::make_unique<TResourceBlockItemConverter<false>>();
314-
}
315-
}
316-
317309
static std::unique_ptr<TResult> MakeResource(bool isOptional) {
310+
Y_UNUSED(isOptional);
318311
if (isOptional) {
319312
return std::make_unique<TResourceBlockItemConverter<true>>();
320313
} else {

ydb/library/yql/minikql/computation/mkql_block_transport.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -630,11 +630,6 @@ struct TSerializerTraits {
630630
return std::make_unique<TStrings<arrow::BinaryType, true>>();
631631
}
632632

633-
static std::unique_ptr<TResult> MakeList(bool isOptional, std::unique_ptr<IBlockSerializer>&& inner) {
634-
Y_UNUSED(isOptional, inner);
635-
ythrow yexception() << "Serializer not implemented for list";
636-
}
637-
638633
static std::unique_ptr<TResult> MakeResource(bool isOptional) {
639634
Y_UNUSED(isOptional);
640635
ythrow yexception() << "Serializer not implemented for block resources";
@@ -671,11 +666,6 @@ struct TDeserializerTraits {
671666
return std::make_unique<TStrings<arrow::BinaryType, true>>();
672667
}
673668

674-
static std::unique_ptr<TResult> MakeList(bool isOptional, std::unique_ptr<TBlockDeserializerBase>&& inner) {
675-
Y_UNUSED(isOptional, inner);
676-
ythrow yexception() << "Deserializer not implemented for list";
677-
}
678-
679669
static std::unique_ptr<TResult> MakeResource(bool isOptional) {
680670
Y_UNUSED(isOptional);
681671
ythrow yexception() << "Deserializer not implemented for block resources";

ydb/library/yql/minikql/mkql_type_builder.cpp

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,7 +1517,7 @@ bool ConvertArrowType(NUdf::EDataSlot slot, std::shared_ptr<arrow::DataType>& ty
15171517
}
15181518
}
15191519

1520-
bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type, bool extraTypes, const TArrowConvertFailedCallback& onFail) {
1520+
bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type, const TArrowConvertFailedCallback& onFail) {
15211521
bool isOptional;
15221522
auto unpacked = UnpackOptional(itemType, isOptional);
15231523
if (unpacked->IsOptional() || isOptional && unpacked->IsPg()) {
@@ -1538,7 +1538,7 @@ bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type, b
15381538

15391539
// previousType is always Optional
15401540
std::shared_ptr<arrow::DataType> innerArrowType;
1541-
if (!ConvertArrowType(previousType, innerArrowType, extraTypes, onFail)) {
1541+
if (!ConvertArrowType(previousType, innerArrowType, onFail)) {
15421542
return false;
15431543
}
15441544

@@ -1560,7 +1560,7 @@ bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type, b
15601560
std::shared_ptr<arrow::DataType> childType;
15611561
const TString memberName(structType->GetMemberName(i));
15621562
auto memberType = structType->GetMemberType(i);
1563-
if (!ConvertArrowType(memberType, childType, extraTypes, onFail)) {
1563+
if (!ConvertArrowType(memberType, childType, onFail)) {
15641564
return false;
15651565
}
15661566
members.emplace_back(std::make_shared<arrow::Field>(memberName, childType, memberType->IsOptional()));
@@ -1570,27 +1570,13 @@ bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type, b
15701570
return true;
15711571
}
15721572

1573-
if (extraTypes) {
1574-
if (unpacked->IsList()) {
1575-
auto listType = AS_TYPE(TListType, unpacked);
1576-
std::shared_ptr<arrow::DataType> childType;
1577-
auto itemType = listType->GetItemType();
1578-
if (!ConvertArrowType(itemType, childType, extraTypes)) {
1579-
return false;
1580-
}
1581-
type = std::make_shared<arrow::ListType>(std::make_shared<arrow::Field>("item", childType, itemType->IsOptional()));
1582-
return true;
1583-
}
1584-
}
1585-
1586-
15871573
if (unpacked->IsTuple()) {
15881574
auto tupleType = AS_TYPE(TTupleType, unpacked);
15891575
std::vector<std::shared_ptr<arrow::Field>> fields;
15901576
for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) {
15911577
std::shared_ptr<arrow::DataType> childType;
15921578
auto elementType = tupleType->GetElementType(i);
1593-
if (!ConvertArrowType(elementType, childType, extraTypes, onFail)) {
1579+
if (!ConvertArrowType(elementType, childType, onFail)) {
15941580
return false;
15951581
}
15961582

@@ -2438,10 +2424,6 @@ size_t CalcMaxBlockItemSize(const TType* type) {
24382424
return result;
24392425
}
24402426

2441-
if (type->IsList()) {
2442-
return sizeof(NYql::NUdf::TUnboxedValue);
2443-
}
2444-
24452427
if (type->IsTuple()) {
24462428
auto tupleType = AS_TYPE(TTupleType, type);
24472429
size_t result = 0;
@@ -2544,11 +2526,6 @@ struct TComparatorTraits {
25442526
return std::unique_ptr<TResult>(MakePgItemComparator(desc.TypeId).Release());
25452527
}
25462528

2547-
static std::unique_ptr<TResult> MakeList(bool isOptional, std::unique_ptr<NYql::NUdf::IBlockItemComparator>&& inner) {
2548-
Y_UNUSED(isOptional, inner);
2549-
ythrow yexception() << "Comparator not implemented for block list: ";
2550-
}
2551-
25522529
static std::unique_ptr<TResult> MakeResource(bool isOptional) {
25532530
Y_UNUSED(isOptional);
25542531
ythrow yexception() << "Comparator not implemented for block resources: ";
@@ -2581,11 +2558,6 @@ struct THasherTraits {
25812558
return std::unique_ptr<TResult>(MakePgItemHasher(desc.TypeId).Release());
25822559
}
25832560

2584-
static std::unique_ptr<TResult> MakeList(bool isOptional, std::unique_ptr<NYql::NUdf::IBlockItemHasher>&& inner) {
2585-
Y_UNUSED(isOptional, inner);
2586-
ythrow yexception() << "Hasher not implemented for list";
2587-
}
2588-
25892561
static std::unique_ptr<TResult> MakeResource(bool isOptional) {
25902562
Y_UNUSED(isOptional);
25912563
ythrow yexception() << "Hasher not implemented for block resources";

ydb/library/yql/minikql/mkql_type_builder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ inline size_t CalcBlockLen(size_t maxBlockItemSize) {
3131
}
3232

3333
using TArrowConvertFailedCallback = std::function<void(TType*)>;
34-
bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type, bool extraTypes = false, const TArrowConvertFailedCallback& = {});
34+
bool ConvertArrowType(TType* itemType, std::shared_ptr<arrow::DataType>& type, const TArrowConvertFailedCallback& = {});
3535
bool ConvertArrowType(NUdf::EDataSlot slot, std::shared_ptr<arrow::DataType>& type);
3636

3737
template<NUdf::EDataSlot slot>

ydb/library/yql/providers/common/arrow_resolve/yql_simple_arrow_resolver.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class TSimpleArrowResolver: public IArrowResolver {
5757
}
5858
}
5959

60-
EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx, bool extraTypes,
60+
EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx,
6161
const TUnsupportedTypeCallback& onUnsupported = {}) const override
6262
{
6363
try {
@@ -84,7 +84,7 @@ class TSimpleArrowResolver: public IArrowResolver {
8484
TNullOutput null;
8585
auto mkqlType = NCommon::BuildType(*type, typeBuilder, null);
8686
std::shared_ptr<arrow::DataType> arrowType;
87-
if (!ConvertArrowType(mkqlType, arrowType, extraTypes, cb)) {
87+
if (!ConvertArrowType(mkqlType, arrowType, cb)) {
8888
allOk = false;
8989
if (!cb) {
9090
break;

ydb/library/yql/providers/s3/actors/yql_arrow_column_converters.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,8 +622,12 @@ TColumnConverter BuildColumnConverter(const std::string& columnName, const std::
622622
void BuildColumnConverters(std::shared_ptr<arrow::Schema> outputSchema, std::shared_ptr<arrow::Schema> dataSchema,
623623
std::vector<int>& columnIndices, std::vector<TColumnConverter>& columnConverters,
624624
std::unordered_map<TStringBuf, NKikimr::NMiniKQL::TType*, THash<TStringBuf>> rowTypes, const NDB::FormatSettings& settings) {
625+
625626
for (int i = 0; i < dataSchema->num_fields(); ++i) {
626627
switch (dataSchema->field(i)->type()->id()) {
628+
case arrow::Type::LIST:
629+
throw parquet::ParquetException(TStringBuilder() << "File contains LIST field "
630+
<< dataSchema->field(i)->name() << " and can't be parsed");
627631
case arrow::Type::STRUCT:
628632
throw parquet::ParquetException(TStringBuilder() << "File contains STRUCT field "
629633
<< dataSchema->field(i)->name() << " and can't be parsed");

ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2196,7 +2196,7 @@ std::pair<NYql::NDq::IDqComputeActorAsyncInput*, IActor*> CreateS3ReadActor(
21962196
auto memberType = extraStructType->GetMemberType(i);
21972197
std::shared_ptr<arrow::DataType> dataType;
21982198

2199-
YQL_ENSURE(ConvertArrowType(memberType, dataType, true), "Unsupported arrow type");
2199+
YQL_ENSURE(ConvertArrowType(memberType, dataType), "Unsupported arrow type");
22002200
THROW_ARROW_NOT_OK(builder.AddField(std::make_shared<arrow::Field>(std::string(memberName), dataType, memberType->IsOptional())));
22012201
readSpec->ColumnReorder.push_back(i);
22022202
readSpec->RowSpec.emplace(memberName, memberType);

ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ bool ExtractSettingValue(const TExprNode& value, TStringBuf settingName, TString
277277
}
278278

279279
bool EnsureParquetTypeSupported(TPositionHandle position, const TTypeAnnotationNode* type, TExprContext& ctx, const IArrowResolver::TPtr& arrowResolver) {
280-
auto resolveStatus = arrowResolver->AreTypesSupported(ctx.GetPosition(position), { type }, ctx, true);
280+
auto resolveStatus = arrowResolver->AreTypesSupported(ctx.GetPosition(position), { type }, ctx);
281281
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
282282

283283
if (resolveStatus != IArrowResolver::OK) {

ydb/library/yql/providers/yt/comp_nodes/dq/arrow_converter.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -483,11 +483,6 @@ struct TYsonBlockReaderTraits {
483483
}
484484
}
485485

486-
static std::unique_ptr<TResult> MakeList(bool isOptional, std::unique_ptr<IYsonBlockReader>&& inner) {
487-
Y_UNUSED(isOptional, inner);
488-
ythrow yexception() << "Yson reader not implemented for list";
489-
}
490-
491486
static std::unique_ptr<TResult> MakeResource(bool isOptional) {
492487
Y_UNUSED(isOptional);
493488
ythrow yexception() << "Yson reader not implemented for block resources";

ydb/library/yql/public/udf/arrow/block_item.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,6 @@ class TBlockItem {
161161
return Raw.Resource.Value;
162162
}
163163

164-
inline IBoxedValuePtr AsBoxed() const
165-
{
166-
Y_DEBUG_ABORT_UNLESS(GetMarkers() == EMarkers::Boxed, "Value is not boxed");
167-
return Raw.Resource.Value;
168-
}
169-
170164
inline void* GetRawPtr()
171165
{
172166
return &Raw;

0 commit comments

Comments
 (0)