From 3bec858e24a10773f4259fc0933516f8b0bc2e95 Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Tue, 13 Aug 2024 12:01:34 +0300 Subject: [PATCH 01/11] Update python app docs --- .../ru/core/dev/example-app/python/index.md | 326 ++++++++++-------- 1 file changed, 188 insertions(+), 138 deletions(-) diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index 366284eaced2..c9efec79ff9c 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -1,6 +1,6 @@ # Приложение на Python -На этой странице подробно разбирается код [тестового приложения](https://github.com/ydb-platform/ydb-python-sdk/tree/master/examples/basic_example_v1), доступного в составе [Python SDK](https://github.com/ydb-platform/ydb-python-sdk) {{ ydb-short-name }}. +На этой странице подробно разбирается код [тестового приложения](https://github.com/ydb-platform/ydb-python-sdk/tree/master/examples/basic_example_v2), доступного в составе [Python SDK](https://github.com/ydb-platform/ydb-python-sdk) {{ ydb-short-name }}. ## Скачивание и запуск {#download} @@ -37,27 +37,62 @@ def run(endpoint, database, path): exit(1) ``` -Фрагмент кода приложения для создания сессии: - -```python -session = driver.table_client.session().create() -``` - {% include [create_table.md](../_includes/steps/02_create_table.md) %} -Для создания таблиц используется метод `session.create_table()`: +Для создания таблиц используется метод `pool.execute_with_retries()`: ```python -def create_tables(session, path): - session.create_table( - os.path.join(path, 'series'), - ydb.TableDescription() - .with_column(ydb.Column('series_id', ydb.PrimitiveType.Uint64)) # not null column - .with_column(ydb.Column('title', ydb.OptionalType(ydb.PrimitiveType.Utf8))) - .with_column(ydb.Column('series_info', ydb.OptionalType(ydb.PrimitiveType.Utf8))) - .with_column(ydb.Column('release_date', ydb.OptionalType(ydb.PrimitiveType.Uint64))) - .with_primary_key('series_id') +def create_tables(pool, path): + print("\nCreating table series...") + pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `series` ( + `series_id` Uint64, + `title` Utf8, + `series_info` Utf8, + `release_date` Uint64, + PRIMARY KEY (`series_id`) + ) + """.format( + path + ) + ) + + print("\nCreating table seasons...") + pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `seasons` ( + `series_id` Uint64, + `season_id` Uint64, + `title` Utf8, + `first_aired` Uint64, + `last_aired` Uint64, + PRIMARY KEY (`series_id`, `season_id`) + ) + """.format( + path + ) + ) + + print("\nCreating table episodes...") + pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `episodes` ( + `series_id` Uint64, + `season_id` Uint64, + `episode_id` Uint64, + `title` Utf8, + `air_date` Uint64, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """.format( + path + ) ) + ``` В параметр path передаётся абсолютный путь от корня: @@ -66,39 +101,41 @@ def create_tables(session, path): full_path = os.path.join(database, path) ``` -С помощью метода `session.describe_table()` можно вывести информацию о структуре таблицы и убедиться, что она успешно создалась: +Функция `pool.execute_with_retries(query)`, в отличие от `tx.execute()`, загружает в память результат запроса до его возвращения клиенту. +Благодаря этому нет необходимости использовать специальные контрукции для контроля над стримом, однако нужно с осторожностью пользоваться данным методом с большими `SELECT` запросами. +Подробнее про стримы будет сказано ниже. -```python -def describe_table(session, path, name): - result = session.describe_table(os.path.join(path, name)) - print("\n> describe table: series") - for column in result.columns: - print("column, name:", column.name, ",", str(column.type.item).strip()) -``` +## Работа со стримами {#work-with-streams} -Приведенный фрагмент кода при запуске выводит на консоль текст: +Результатом выполнения `tx.execute()` является стрим. Стрим позволяет считать неограниченное количество строк и объем данных, не загружая в память весь результат. Однако, для корректного сохранения состояния транзакции на стороне `ydb` +стрим необходимо прочитывать до конца после каждого запроса. Для удобства результат функции `tx.execute()` представлен в виде контекстного менеджера, который долистывает стрим до конца после выхода. -```bash -> describe table: series -('column, name:', 'series_id', ',', 'type_id: UINT64') -('column, name:', 'title', ',', 'type_id: UTF8') -('column, name:', 'series_info', ',', 'type_id: UTF8') -('column, name:', 'release_date', ',', 'type_id: UINT64') +```python +with tx.execute(query) as _: + pass ``` + {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} Фрагмент кода, демонстрирующий выполнение запроса на запись/изменение данных: ```python -def upsert_simple(session, path): - session.transaction().execute( - """ - PRAGMA TablePathPrefix("{}"); - UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES - (2, 6, 1, "TBD"); - """.format(path), - commit_tx=True, - ) +def upsert_simple(pool, path): + print("\nPerforming UPSERT into episodes...") + + def callee(session): + with session.transaction().execute( + """ + PRAGMA TablePathPrefix("{}"); + UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); + """.format( + path + ), + commit_tx=True, + ) as _: + pass + + return pool.retry_operation_sync(callee) ``` {% include [pragmatablepathprefix.md](../_includes/auxilary/pragmatablepathprefix.md) %} @@ -108,28 +145,45 @@ def upsert_simple(session, path): Для выполнения YQL-запросов используется метод `session.transaction().execute()`. SDK позволяет в явном виде контролировать выполнение транзакций и настраивать необходимый режим выполнения транзакций с помощью класса `TxControl`. -В фрагменте кода, приведенном ниже, транзакция выполняется с помощью метода `transaction().execute()`. Устанавливается режим выполнения транзакции `ydb.SerializableReadWrite()`. После завершения всех запросов транзакции она будет автоматически завершена с помощью явного указания флага: `commit_tx=True`. Тело запроса описано с помощью синтаксиса YQL и как параметр передается методу `execute`. +В фрагменте кода, приведенном ниже, транзакция выполняется с помощью метода `transaction().execute()`. Устанавливается режим выполнения транзакции `ydb.QuerySerializableReadWrite()`. После завершения всех запросов транзакции она будет автоматически завершена с помощью явного указания флага: `commit_tx=True`. Тело запроса описано с помощью синтаксиса YQL и как параметр передается методу `execute`. ```python -def select_simple(session, path): - result_sets = session.transaction(ydb.SerializableReadWrite()).execute( - """ - PRAGMA TablePathPrefix("{}"); - $format = DateTime::Format("%Y-%m-%d"); - SELECT - series_id, - title, - $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(release_date AS Int16))) AS Uint32))) AS release_date - FROM series - WHERE series_id = 1; - """.format(path), - commit_tx=True, - ) - print("\n> select_simple_transaction:") - for row in result_sets[0].rows: - print("series, id: ", row.series_id, ", title: ", row.title, ", release date: ", row.release_date) - - return result_sets[0] +def select_simple(pool, path): + print("\nCheck series table...") + + def callee(session): + # new transaction in serializable read write mode + # if query successfully completed you will get result sets. + # otherwise exception will be raised + with session.transaction(ydb.QuerySerializableReadWrite()).execute( + """ + PRAGMA TablePathPrefix("{}"); + $format = DateTime::Format("%Y-%m-%d"); + SELECT + series_id, + title, + $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(release_date AS Int16))) AS Uint32))) AS release_date + FROM series + WHERE series_id = 1; + """.format( + path + ), + commit_tx=True, + ) as result_sets: + first_set = next(result_sets) + for row in first_set.rows: + print( + "series, id: ", + row.series_id, + ", title: ", + row.title, + ", release date: ", + row.release_date, + ) + + return first_set + + return pool.retry_operation_sync(callee) ``` В качестве результата выполнения запроса возвращается `result_set`, итерирование по которому выводит на консоль текст: @@ -139,38 +193,49 @@ def select_simple(session, path): series, Id: 1, title: IT Crowd, Release date: 2006-02-03 ``` +## Параметризованные запросы {#param-queries} -{% include [param_prep_queries.md](../_includes/steps/07_param_prep_queries.md) %} +Для выполнения параметризованных запросов в метод `tx.execute()` необходимо передать словарь с параметрами специального вида, где ключом служит имя параметра, а значение может быть одним из следующих: +1. Обычное значение (без указывания типов допустимо использовать только int, str, bool); +2. Кортеж со значением и типом; +3. Специальный тип ydb.TypedValue(value=value, value_type=value_type). -```python -def select_prepared(session, path, series_id, season_id, episode_id): - query = """ - PRAGMA TablePathPrefix("{}"); - DECLARE $seriesId AS Uint64; - DECLARE $seasonId AS Uint64; - DECLARE $episodeId AS Uint64; - $format = DateTime::Format("%Y-%m-%d"); - SELECT - title, - $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(air_date AS Int16))) AS Uint32))) AS air_date - FROM episodes - WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format(path) - - prepared_query = session.prepare(query) - result_sets = session.transaction(ydb.SerializableReadWrite()).execute( - prepared_query, { - '$seriesId': series_id, - '$seasonId': season_id, - '$episodeId': episode_id, - }, - commit_tx=True - ) - print("\n> select_prepared_transaction:") - for row in result_sets[0].rows: - print("episode title:", row.title, ", air date:", row.air_date) +Фрагмент кода, демонстрирующий возможность использования параметризованных запросов: - return result_sets[0] +```python +def select_with_parameters(pool, path, series_id, season_id, episode_id): + def callee(session): + query = """ + PRAGMA TablePathPrefix("{}"); + $format = DateTime::Format("%Y-%m-%d"); + SELECT + title, + $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(air_date AS Int16))) AS Uint32))) AS air_date + FROM episodes + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """.format( + path + ) + + with session.transaction(ydb.QuerySerializableReadWrite()).execute( + query, + { + "$seriesId": (series_id, ydb.PrimitiveType.Uint64), + "$seasonId": (season_id, ydb.PrimitiveType.Uint64), # could be defined via tuple + "$episodeId": ydb.TypedValue( + episode_id, ydb.PrimitiveType.Uint64 + ), # could be defined via special class + }, + commit_tx=True, + ) as result_sets: + print("\n> select_prepared_transaction:") + first_set = next(result_sets) + for row in first_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return first_set + + return pool.retry_operation_sync(callee) ``` Приведенный фрагмент кода при запуске выводит на консоль текст: @@ -180,58 +245,43 @@ def select_prepared(session, path, series_id, season_id, episode_id): ('episode title:', u'To Build a Better Beta', ', air date:', '2016-06-05') ``` -{% include [scan_query.md](../_includes/steps/08_scan_query.md) %} - -```python -def executeScanQuery(driver): - query = ydb.ScanQuery(""" - SELECT series_id, season_id, COUNT(*) AS episodes_count - FROM episodes - GROUP BY series_id, season_id - ORDER BY series_id, season_id - """, {}) - - it = driver.table_client.scan_query(query) - - while True: - try: - result = next(it) - print result.result_set.rows - except StopIteration: - break -``` {% include [transaction_control.md](../_includes/steps/10_transaction_control.md) %} -Фрагмент кода, демонстрирующий явное использование вызовов `transaction().begin()` и `tx.Commit()`: +Фрагмент кода, демонстрирующий явное использование вызовов `transaction().begin()` и `tx.commit()`: ```python -def explicit_tcl(session, path, series_id, season_id, episode_id): - query = """ - PRAGMA TablePathPrefix("{}"); - - DECLARE $seriesId AS Uint64; - DECLARE $seasonId AS Uint64; - DECLARE $episodeId AS Uint64; - - UPDATE episodes - SET air_date = CAST(CurrentUtcDate() AS Uint64) - WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format(path) - prepared_query = session.prepare(query) - - tx = session.transaction(ydb.SerializableReadWrite()).begin() - - tx.execute( - prepared_query, { - '$seriesId': series_id, - '$seasonId': season_id, - '$episodeId': episode_id - } - ) - - print("\n> explicit TCL call") - - tx.commit() +def explicit_tcl(pool, path, series_id, season_id, episode_id): + def callee(session): + query = """ + PRAGMA TablePathPrefix("{}"); + UPDATE episodes + SET air_date = CAST(CurrentUtcDate() AS Uint64) + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """.format( + path + ) + + # Get newly created transaction id + tx = session.transaction(ydb.QuerySerializableReadWrite()).begin() + + # Execute data query. + # Transaction control settings continues active transaction (tx) + with tx.execute( + query, + { + "$seriesId": (series_id, ydb.PrimitiveType.Uint64), + "$seasonId": (season_id, ydb.PrimitiveType.Uint64), + "$episodeId": (episode_id, ydb.PrimitiveType.Uint64), + }, + ) as _: + pass + + print("\n> explicit TCL call") + + # Commit active transaction(tx) + tx.commit() + + return pool.retry_operation_sync(callee) ``` From c6730e12bebd4a3fea616dc4447f08183793d18e Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Thu, 15 Aug 2024 16:43:55 +0300 Subject: [PATCH 02/11] Extend python sdk docs with async examples --- .../ru/core/dev/example-app/python/index.md | 648 ++++++++++++------ 1 file changed, 437 insertions(+), 211 deletions(-) diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index c9efec79ff9c..f3d0690e305c 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -21,79 +21,163 @@ python3 -m pip install iso8601 Фрагмент кода приложения для инициализации драйвера: -```python -def run(endpoint, database, path): - driver_config = ydb.DriverConfig( - endpoint, database, credentials=ydb.credentials_from_env_variables(), - root_certificates=ydb.load_ydb_root_certificate(), - ) - with ydb.Driver(driver_config) as driver: - try: - driver.wait(timeout=5) - except TimeoutError: - print("Connect failed to YDB") - print("Last reported errors by discovery:") - print(driver.discovery_debug_details()) - exit(1) -``` +{% list tabs %} + +- Sync + + ```python + def run(endpoint, database, path): + driver_config = ydb.DriverConfig( + endpoint, database, credentials=ydb.credentials_from_env_variables(), + root_certificates=ydb.load_ydb_root_certificate(), + ) + with ydb.Driver(driver_config) as driver: + try: + driver.wait(timeout=5) + except TimeoutError: + print("Connect failed to YDB") + print("Last reported errors by discovery:") + print(driver.discovery_debug_details()) + exit(1) + ``` + +- AsyncIO + + ```python + async def run(endpoint, database, path): + driver_config = ydb.DriverConfig( + endpoint, database, credentials=ydb.credentials_from_env_variables(), + root_certificates=ydb.load_ydb_root_certificate(), + ) + async with ydb.aio.Driver(driver_config) as driver: + try: + await driver.wait(timeout=5) + except TimeoutError: + print("Connect failed to YDB") + print("Last reported errors by discovery:") + print(driver.discovery_debug_details()) + exit(1) + ``` + +{% endlist %} {% include [create_table.md](../_includes/steps/02_create_table.md) %} Для создания таблиц используется метод `pool.execute_with_retries()`: -```python -def create_tables(pool, path): - print("\nCreating table series...") - pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `series` ( - `series_id` Uint64, - `title` Utf8, - `series_info` Utf8, - `release_date` Uint64, - PRIMARY KEY (`series_id`) - ) - """.format( - path - ) - ) - - print("\nCreating table seasons...") - pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `seasons` ( - `series_id` Uint64, - `season_id` Uint64, - `title` Utf8, - `first_aired` Uint64, - `last_aired` Uint64, - PRIMARY KEY (`series_id`, `season_id`) - ) - """.format( - path - ) - ) - - print("\nCreating table episodes...") - pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `episodes` ( - `series_id` Uint64, - `season_id` Uint64, - `episode_id` Uint64, - `title` Utf8, - `air_date` Uint64, - PRIMARY KEY (`series_id`, `season_id`, `episode_id`) - ) - """.format( - path - ) - ) - -``` +{% list tabs %} + +- Sync + + ```python + def create_tables(pool: ydb.QuerySessionPool, path: str): + print("\nCreating table series...") + pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `series` ( + `series_id` Uint64, + `title` Utf8, + `series_info` Utf8, + `release_date` Uint64, + PRIMARY KEY (`series_id`) + ) + """.format( + path + ) + ) + + print("\nCreating table seasons...") + pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `seasons` ( + `series_id` Uint64, + `season_id` Uint64, + `title` Utf8, + `first_aired` Uint64, + `last_aired` Uint64, + PRIMARY KEY (`series_id`, `season_id`) + ) + """.format( + path + ) + ) + + print("\nCreating table episodes...") + pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `episodes` ( + `series_id` Uint64, + `season_id` Uint64, + `episode_id` Uint64, + `title` Utf8, + `air_date` Uint64, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """.format( + path + ) + ) + ``` + +- AsyncIO + + ```python + async def create_tables(pool: ydb.aio.QuerySessionPoolAsync, path: str): + print("\nCreating table series...") + await pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `series` ( + `series_id` Uint64, + `title` Utf8, + `series_info` Utf8, + `release_date` Uint64, + PRIMARY KEY (`series_id`) + ) + """.format( + path + ) + ) + + print("\nCreating table seasons...") + await pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `seasons` ( + `series_id` Uint64, + `season_id` Uint64, + `title` Utf8, + `first_aired` Uint64, + `last_aired` Uint64, + PRIMARY KEY (`series_id`, `season_id`) + ) + """.format( + path + ) + ) + + print("\nCreating table episodes...") + await pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + CREATE table `episodes` ( + `series_id` Uint64, + `season_id` Uint64, + `episode_id` Uint64, + `title` Utf8, + `air_date` Uint64, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """.format( + path + ) + ) + ``` + +{% endlist %} В параметр path передаётся абсолютный путь от корня: @@ -105,86 +189,115 @@ full_path = os.path.join(database, path) Благодаря этому нет необходимости использовать специальные контрукции для контроля над стримом, однако нужно с осторожностью пользоваться данным методом с большими `SELECT` запросами. Подробнее про стримы будет сказано ниже. -## Работа со стримами {#work-with-streams} - -Результатом выполнения `tx.execute()` является стрим. Стрим позволяет считать неограниченное количество строк и объем данных, не загружая в память весь результат. Однако, для корректного сохранения состояния транзакции на стороне `ydb` -стрим необходимо прочитывать до конца после каждого запроса. Для удобства результат функции `tx.execute()` представлен в виде контекстного менеджера, который долистывает стрим до конца после выхода. - -```python -with tx.execute(query) as _: - pass -``` - {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} Фрагмент кода, демонстрирующий выполнение запроса на запись/изменение данных: -```python -def upsert_simple(pool, path): - print("\nPerforming UPSERT into episodes...") - - def callee(session): - with session.transaction().execute( - """ - PRAGMA TablePathPrefix("{}"); - UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); - """.format( - path - ), - commit_tx=True, - ) as _: - pass - - return pool.retry_operation_sync(callee) -``` +{% list tabs %} + +- Sync + + ```python + def upsert_simple(pool, path): + print("\nPerforming UPSERT into episodes...") + pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); + """.format( + path + ) + ) + ``` + +- AsyncIO + + ```python + async def upsert_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + print("\nPerforming UPSERT into episodes...") + await pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); + """.format( + path + ) + ) + ``` + +{% endlist %} {% include [pragmatablepathprefix.md](../_includes/auxilary/pragmatablepathprefix.md) %} {% include [steps/04_query_processing.md](../_includes/steps/04_query_processing.md) %} -Для выполнения YQL-запросов используется метод `session.transaction().execute()`. -SDK позволяет в явном виде контролировать выполнение транзакций и настраивать необходимый режим выполнения транзакций с помощью класса `TxControl`. - -В фрагменте кода, приведенном ниже, транзакция выполняется с помощью метода `transaction().execute()`. Устанавливается режим выполнения транзакции `ydb.QuerySerializableReadWrite()`. После завершения всех запросов транзакции она будет автоматически завершена с помощью явного указания флага: `commit_tx=True`. Тело запроса описано с помощью синтаксиса YQL и как параметр передается методу `execute`. - -```python -def select_simple(pool, path): - print("\nCheck series table...") - - def callee(session): - # new transaction in serializable read write mode - # if query successfully completed you will get result sets. - # otherwise exception will be raised - with session.transaction(ydb.QuerySerializableReadWrite()).execute( - """ - PRAGMA TablePathPrefix("{}"); - $format = DateTime::Format("%Y-%m-%d"); - SELECT - series_id, - title, - $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(release_date AS Int16))) AS Uint32))) AS release_date - FROM series - WHERE series_id = 1; - """.format( - path - ), - commit_tx=True, - ) as result_sets: - first_set = next(result_sets) - for row in first_set.rows: - print( - "series, id: ", - row.series_id, - ", title: ", - row.title, - ", release date: ", - row.release_date, - ) - - return first_set - - return pool.retry_operation_sync(callee) -``` +Для выполнения YQL-запросов чаще всего достаточно использования уже знакомого метода `pool.execute_with_retries()`. + +{% list tabs %} + +- Sync + + ```python + def select_simple(pool: ydb.QuerySessionPool, path: str): + print("\nCheck series table...") + result_sets = pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + SELECT + series_id, + title, + release_date + FROM series + WHERE series_id = 1; + """.format( + path + ), + ) + first_set = result_sets[0] + for row in first_set.rows: + print( + "series, id: ", + row.series_id, + ", title: ", + row.title, + ", release date: ", + row.release_date, + ) + return first_set + ``` + +- AsyncIO + + ```python + async def select_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + print("\nCheck series table...") + result_sets = await pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + SELECT + series_id, + title, + release_date + FROM series + WHERE series_id = 1; + """.format( + path + ), + ) + first_set = result_sets[0] + for row in first_set.rows: + print( + "series, id: ", + row.series_id, + ", title: ", + row.title, + ", release date: ", + row.release_date, + ) + return first_set + ``` + +{% endlist %} В качестве результата выполнения запроса возвращается `result_set`, итерирование по которому выводит на консоль текст: @@ -195,48 +308,86 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 ## Параметризованные запросы {#param-queries} -Для выполнения параметризованных запросов в метод `tx.execute()` необходимо передать словарь с параметрами специального вида, где ключом служит имя параметра, а значение может быть одним из следующих: -1. Обычное значение (без указывания типов допустимо использовать только int, str, bool); +Для выполнения параметризованных запросов в метод `pool.execute_with_retries()` (или `tx.execute()`, работа с которым будет показана в следующей секции) необходимо передать словарь с параметрами специального вида, где ключом служит имя параметра, а значение может быть одним из следующих: +1. Обычное значение; 2. Кортеж со значением и типом; 3. Специальный тип ydb.TypedValue(value=value, value_type=value_type). +В случае указания значения без типа, конвертация происходит по следующим правилам: +* `int` -> `ydb.PrimitiveType.Int64` +* `float` -> `ydb.PrimitiveType.Float` +* `str` -> `ydb.PrimitiveType.Utf8` +* `bool` -> `ydb.PrimitiveType.Bool` +* `list` -> `ydb.ListType` +* `dict` -> `ydb.DictType` + +Автоматическая конвертация списков и словарей возможна только в случае однородных структур, тип вложенного значения будет вычисляться рекурсивно по вышеупомянутым правилам. + Фрагмент кода, демонстрирующий возможность использования параметризованных запросов: -```python -def select_with_parameters(pool, path, series_id, season_id, episode_id): - def callee(session): - query = """ - PRAGMA TablePathPrefix("{}"); - $format = DateTime::Format("%Y-%m-%d"); - SELECT - title, - $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(air_date AS Int16))) AS Uint32))) AS air_date - FROM episodes - WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format( - path - ) - - with session.transaction(ydb.QuerySerializableReadWrite()).execute( - query, - { - "$seriesId": (series_id, ydb.PrimitiveType.Uint64), - "$seasonId": (season_id, ydb.PrimitiveType.Uint64), # could be defined via tuple - "$episodeId": ydb.TypedValue( - episode_id, ydb.PrimitiveType.Uint64 - ), # could be defined via special class - }, - commit_tx=True, - ) as result_sets: - print("\n> select_prepared_transaction:") - first_set = next(result_sets) - for row in first_set.rows: - print("episode title:", row.title, ", air date:", row.air_date) - - return first_set - - return pool.retry_operation_sync(callee) -``` +{% list tabs %} + +- Sync + + ```python + def select_with_parameters(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + result_sets = pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + SELECT + title, + air_date + FROM episodes + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """.format( + path + ), + { + "$seriesId": series_id, # could be defined implicit + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + }, + ) + + print("\n> select_with_parameters:") + first_set = result_sets[0] + for row in first_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return first_set + ``` + +- AsyncIO + + ```python + async def select_with_parameters(pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id): + result_sets = await pool.execute_with_retries( + """ + PRAGMA TablePathPrefix("{}"); + SELECT + title, + air_date + FROM episodes + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """.format( + path + ), + { + "$seriesId": series_id, # could be defined implicit + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + }, + ) + + print("\n> select_with_parameters:") + first_set = result_sets[0] + for row in first_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return first_set + ``` + +{% endlist %} Приведенный фрагмент кода при запуске выводит на консоль текст: @@ -248,40 +399,115 @@ def select_with_parameters(pool, path, series_id, season_id, episode_id): {% include [transaction_control.md](../_includes/steps/10_transaction_control.md) %} -Фрагмент кода, демонстрирующий явное использование вызовов `transaction().begin()` и `tx.commit()`: +Для выполнения YQL-запросов также метод `session.transaction().execute()`. +SDK позволяет в явном виде контролировать выполнение транзакций и настраивать необходимый режим выполнения транзакций с помощью класса `TxControl`. -```python -def explicit_tcl(pool, path, series_id, season_id, episode_id): - def callee(session): - query = """ - PRAGMA TablePathPrefix("{}"); - UPDATE episodes - SET air_date = CAST(CurrentUtcDate() AS Uint64) - WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format( - path - ) - - # Get newly created transaction id - tx = session.transaction(ydb.QuerySerializableReadWrite()).begin() - - # Execute data query. - # Transaction control settings continues active transaction (tx) - with tx.execute( - query, - { - "$seriesId": (series_id, ydb.PrimitiveType.Uint64), - "$seasonId": (season_id, ydb.PrimitiveType.Uint64), - "$episodeId": (episode_id, ydb.PrimitiveType.Uint64), - }, - ) as _: - pass - - print("\n> explicit TCL call") - - # Commit active transaction(tx) - tx.commit() - - return pool.retry_operation_sync(callee) -``` +Результатом выполнения `tx.execute()` является стрим. Стрим позволяет считать неограниченное количество строк и объем данных, не загружая в память весь результат. +Однако, для корректного сохранения состояния транзакции на стороне `ydb` стрим необходимо прочитывать до конца после каждого запроса. +Для удобства результат функции `tx.execute()` представлен в виде контекстного менеджера, который долистывает стрим до конца после выхода. + +{% list tabs %} + +- Sync + + ```python + with tx.execute(query) as _: + pass + ``` + +- AsyncIO + + ```python + async with await tx.execute(query) as _: + pass + ``` + +{% endlist %} + +В фрагменте кода, приведенном ниже, транзакция выполняется с помощью метода `transaction().execute()`. Устанавливается режим выполнения транзакции `ydb.QuerySerializableReadWrite()`. +Тело запроса описано с помощью синтаксиса YQL и как параметр передается методу `execute`. + +Фрагмент кода, демонстрирующий явное использование вызовов `transaction().begin()` и `tx.commit()`: +{% list tabs %} + +- Sync + + ```python + def explicit_transaction_control(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + def callee(session: ydb.QuerySessionSync): + query = """ + PRAGMA TablePathPrefix("{}"); + UPDATE episodes + SET air_date = CurrentUtcDate() + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """.format( + path + ) + + # Get newly created transaction id + tx = session.transaction(ydb.QuerySerializableReadWrite()).begin() + + # Execute data query. + # Transaction control settings continues active transaction (tx) + with tx.execute( + query, + { + "$seriesId": (series_id, ydb.PrimitiveType.Int64), + "$seasonId": (season_id, ydb.PrimitiveType.Int64), + "$episodeId": (episode_id, ydb.PrimitiveType.Int64), + }, + ) as _: + pass + + print("\n> explicit TCL call") + + # Commit active transaction(tx) + tx.commit() + + return pool.retry_operation_sync(callee) + ``` + +- AsyncIO + + ```python + async def explicit_transaction_control( + pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id + ): + async def callee(session: ydb.aio.QuerySessionAsync): + query = """ + PRAGMA TablePathPrefix("{}"); + UPDATE episodes + SET air_date = CurrentUtcDate() + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """.format( + path + ) + + # Get newly created transaction id + tx = await session.transaction(ydb.QuerySerializableReadWrite()).begin() + + # Execute data query. + # Transaction control settings continues active transaction (tx) + async with await tx.execute( + query, + { + "$seriesId": (series_id, ydb.PrimitiveType.Int64), + "$seasonId": (season_id, ydb.PrimitiveType.Int64), + "$episodeId": (episode_id, ydb.PrimitiveType.Int64), + }, + ) as _: + pass + + print("\n> explicit TCL call") + + # Commit active transaction(tx) + await tx.commit() + + return await pool.retry_operation_async(callee) + ``` + +{% endlist %} + +Однако, стоит помнить, что транзакция может быть открыта неявно при первом запросе. Завершиться же она может автоматически с помощью явного указания флага: `commit_tx=True`. +Неявное управление транзакцией является предпочтительным, так как используется меньше обращений на сервер. From eab2f6ca1abf58de888aad18abdfb4f71e16b5c3 Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Tue, 20 Aug 2024 10:42:11 +0300 Subject: [PATCH 03/11] Apply suggestions from code review Co-authored-by: Ivan Blinkov --- ydb/docs/ru/core/dev/example-app/python/index.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index f3d0690e305c..9e113243d6bb 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -63,7 +63,7 @@ python3 -m pip install iso8601 {% include [create_table.md](../_includes/steps/02_create_table.md) %} -Для создания таблиц используется метод `pool.execute_with_retries()`: +Для выполнения YQL запросов используется метод `pool.execute_with_retries()`. Например, можно создать строковые таблицы: {% list tabs %} @@ -185,9 +185,7 @@ python3 -m pip install iso8601 full_path = os.path.join(database, path) ``` -Функция `pool.execute_with_retries(query)`, в отличие от `tx.execute()`, загружает в память результат запроса до его возвращения клиенту. -Благодаря этому нет необходимости использовать специальные контрукции для контроля над стримом, однако нужно с осторожностью пользоваться данным методом с большими `SELECT` запросами. -Подробнее про стримы будет сказано ниже. +Функция `pool.execute_with_retries(query)`, в отличие от `tx.execute()`, загружает в память результат запроса перед его возвращением клиенту. Благодаря этому отпадает необходимость использования специальных конструкций для контроля над стримом, однако необходимо с осторожностью применять этот метод для больших запросов `SELECT`. Подробнее о стримах будет сказано ниже. {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} @@ -311,7 +309,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 Для выполнения параметризованных запросов в метод `pool.execute_with_retries()` (или `tx.execute()`, работа с которым будет показана в следующей секции) необходимо передать словарь с параметрами специального вида, где ключом служит имя параметра, а значение может быть одним из следующих: 1. Обычное значение; 2. Кортеж со значением и типом; -3. Специальный тип ydb.TypedValue(value=value, value_type=value_type). +3. Специальный тип `ydb.TypedValue(value=value, value_type=value_type)`. В случае указания значения без типа, конвертация происходит по следующим правилам: * `int` -> `ydb.PrimitiveType.Int64` @@ -343,7 +341,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 path ), { - "$seriesId": series_id, # could be defined implicit + "$seriesId": series_id, # data type could be defined implicitly "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class }, @@ -509,5 +507,5 @@ SDK позволяет в явном виде контролировать вы {% endlist %} -Однако, стоит помнить, что транзакция может быть открыта неявно при первом запросе. Завершиться же она может автоматически с помощью явного указания флага: `commit_tx=True`. -Неявное управление транзакцией является предпочтительным, так как используется меньше обращений на сервер. +Однако стоит помнить, что транзакция может быть открыта неявно при первом запросе. Завершиться же она может автоматически с явным указанием флага `commit_tx=True`. +Неявное управление транзакцией предпочтительно, так как требует меньше обращений к серверу. From 1da1a6ad1c7faa5eb86fc7d8bcd7bb9325374b2d Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Tue, 20 Aug 2024 11:45:30 +0300 Subject: [PATCH 04/11] review fixes --- .../ru/core/dev/example-app/python/index.md | 255 ++++++++---------- 1 file changed, 118 insertions(+), 137 deletions(-) diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index 9e113243d6bb..825984a8bb41 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -23,7 +23,7 @@ python3 -m pip install iso8601 {% list tabs %} -- Sync +- Синхронный ```python def run(endpoint, database, path): @@ -41,7 +41,7 @@ python3 -m pip install iso8601 exit(1) ``` -- AsyncIO +- Асинхронный ```python async def run(endpoint, database, path): @@ -67,113 +67,101 @@ python3 -m pip install iso8601 {% list tabs %} -- Sync +- Синхронный ```python def create_tables(pool: ydb.QuerySessionPool, path: str): print("\nCreating table series...") pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `series` ( - `series_id` Uint64, - `title` Utf8, - `series_info` Utf8, - `release_date` Uint64, - PRIMARY KEY (`series_id`) - ) - """.format( - path - ) + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `series` ( + `series_id` Uint64, + `title` Utf8, + `series_info` Utf8, + `release_date` Uint64, + PRIMARY KEY (`series_id`) + ) + """ ) print("\nCreating table seasons...") pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `seasons` ( - `series_id` Uint64, - `season_id` Uint64, - `title` Utf8, - `first_aired` Uint64, - `last_aired` Uint64, - PRIMARY KEY (`series_id`, `season_id`) - ) - """.format( - path - ) + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `seasons` ( + `series_id` Uint64, + `season_id` Uint64, + `title` Utf8, + `first_aired` Uint64, + `last_aired` Uint64, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ ) print("\nCreating table episodes...") pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `episodes` ( - `series_id` Uint64, - `season_id` Uint64, - `episode_id` Uint64, - `title` Utf8, - `air_date` Uint64, - PRIMARY KEY (`series_id`, `season_id`, `episode_id`) - ) - """.format( - path - ) + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `episodes` ( + `series_id` Uint64, + `season_id` Uint64, + `episode_id` Uint64, + `title` Utf8, + `air_date` Uint64, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ ) ``` -- AsyncIO +- Асинхронный ```python async def create_tables(pool: ydb.aio.QuerySessionPoolAsync, path: str): print("\nCreating table series...") await pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `series` ( - `series_id` Uint64, - `title` Utf8, - `series_info` Utf8, - `release_date` Uint64, - PRIMARY KEY (`series_id`) - ) - """.format( - path - ) + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `series` ( + `series_id` Uint64, + `title` Utf8, + `series_info` Utf8, + `release_date` Uint64, + PRIMARY KEY (`series_id`) + ) + """ ) print("\nCreating table seasons...") await pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `seasons` ( - `series_id` Uint64, - `season_id` Uint64, - `title` Utf8, - `first_aired` Uint64, - `last_aired` Uint64, - PRIMARY KEY (`series_id`, `season_id`) - ) - """.format( - path - ) + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `seasons` ( + `series_id` Uint64, + `season_id` Uint64, + `title` Utf8, + `first_aired` Uint64, + `last_aired` Uint64, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ ) print("\nCreating table episodes...") await pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); - CREATE table `episodes` ( - `series_id` Uint64, - `season_id` Uint64, - `episode_id` Uint64, - `title` Utf8, - `air_date` Uint64, - PRIMARY KEY (`series_id`, `season_id`, `episode_id`) - ) - """.format( - path - ) + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `episodes` ( + `series_id` Uint64, + `season_id` Uint64, + `episode_id` Uint64, + `title` Utf8, + `air_date` Uint64, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ ) ``` @@ -193,33 +181,29 @@ full_path = os.path.join(database, path) {% list tabs %} -- Sync +- Синхронный ```python def upsert_simple(pool, path): print("\nPerforming UPSERT into episodes...") pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); + f""" + PRAGMA TablePathPrefix("{path}"); UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); - """.format( - path - ) + """ ) ``` -- AsyncIO +- Асинхронный ```python async def upsert_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): print("\nPerforming UPSERT into episodes...") await pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); + f""" + PRAGMA TablePathPrefix("{path}"); UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); - """.format( - path - ) + """ ) ``` @@ -233,23 +217,21 @@ full_path = os.path.join(database, path) {% list tabs %} -- Sync +- Синхронный ```python def select_simple(pool: ydb.QuerySessionPool, path: str): print("\nCheck series table...") result_sets = pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); + f""" + PRAGMA TablePathPrefix("{path}"); SELECT series_id, title, release_date FROM series WHERE series_id = 1; - """.format( - path - ), + """, ) first_set = result_sets[0] for row in first_set.rows: @@ -264,23 +246,21 @@ full_path = os.path.join(database, path) return first_set ``` -- AsyncIO +- Асинхронный ```python async def select_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): print("\nCheck series table...") result_sets = await pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); + f""" + PRAGMA TablePathPrefix("{path}"); SELECT series_id, title, release_date FROM series WHERE series_id = 1; - """.format( - path - ), + """, ) first_set = result_sets[0] for row in first_set.rows: @@ -313,8 +293,9 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 В случае указания значения без типа, конвертация происходит по следующим правилам: * `int` -> `ydb.PrimitiveType.Int64` -* `float` -> `ydb.PrimitiveType.Float` +* `float` -> `ydb.PrimitiveType.Double` * `str` -> `ydb.PrimitiveType.Utf8` +* `bytes` -> `ydb.PrimitiveType.String` * `bool` -> `ydb.PrimitiveType.Bool` * `list` -> `ydb.ListType` * `dict` -> `ydb.DictType` @@ -325,21 +306,19 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 {% list tabs %} -- Sync +- Синхронный ```python def select_with_parameters(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): result_sets = pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); + f""" + PRAGMA TablePathPrefix("{path}"); SELECT title, air_date FROM episodes WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format( - path - ), + """, { "$seriesId": series_id, # data type could be defined implicitly "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple @@ -355,21 +334,19 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 return first_set ``` -- AsyncIO +- Асинхронный ```python async def select_with_parameters(pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id): result_sets = await pool.execute_with_retries( - """ - PRAGMA TablePathPrefix("{}"); + f""" + PRAGMA TablePathPrefix("{path}"); SELECT title, air_date FROM episodes WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format( - path - ), + """, { "$seriesId": series_id, # could be defined implicit "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple @@ -397,23 +374,31 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 {% include [transaction_control.md](../_includes/steps/10_transaction_control.md) %} -Для выполнения YQL-запросов также метод `session.transaction().execute()`. -SDK позволяет в явном виде контролировать выполнение транзакций и настраивать необходимый режим выполнения транзакций с помощью класса `TxControl`. +Для выполнения YQL-запросов также может использоваться метод `session.transaction().execute()`. +Данный способ, в отличие от `pool.execute_with_retries` позволяет в явном виде контролировать выполнение транзакций и настраивать необходимый режим выполнения транзакций с помощью класса `TxControl`. + +Доступные режимы транзакции: +* `ydb.QuerySerializableReadWrite()` (по умолчанию); +* `ydb.QueryOnlineReadOnly(allow_inconsistent_reads=False)`; +* `ydb.QuerySnapshotReadOnly()`; +* `ydb.QueryStaleReadOnly()`. -Результатом выполнения `tx.execute()` является стрим. Стрим позволяет считать неограниченное количество строк и объем данных, не загружая в память весь результат. -Однако, для корректного сохранения состояния транзакции на стороне `ydb` стрим необходимо прочитывать до конца после каждого запроса. -Для удобства результат функции `tx.execute()` представлен в виде контекстного менеджера, который долистывает стрим до конца после выхода. +Подробнее про режимы транзакций описано в [документации YDB](https://ydb.tech/docs/ru/concepts/transactions#modes). + +Результатом выполнения `tx.execute()` является итератор. Итератор позволяет считать неограниченное количество строк и объем данных, не загружая в память весь результат. +Однако, для корректного сохранения состояния транзакции на стороне `ydb` итератор необходимо прочитывать до конца после каждого запроса. +Для удобства результат функции `tx.execute()` представлен в виде контекстного менеджера, который долистывает итератор до конца после выхода. {% list tabs %} -- Sync +- Синхронный ```python with tx.execute(query) as _: pass ``` -- AsyncIO +- Асинхронный ```python async with await tx.execute(query) as _: @@ -429,19 +414,17 @@ SDK позволяет в явном виде контролировать вы {% list tabs %} -- Sync +- Синхронный ```python def explicit_transaction_control(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): def callee(session: ydb.QuerySessionSync): - query = """ - PRAGMA TablePathPrefix("{}"); + query = f""" + PRAGMA TablePathPrefix("{path}"); UPDATE episodes SET air_date = CurrentUtcDate() WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format( - path - ) + """ # Get newly created transaction id tx = session.transaction(ydb.QuerySerializableReadWrite()).begin() @@ -466,21 +449,19 @@ SDK позволяет в явном виде контролировать вы return pool.retry_operation_sync(callee) ``` -- AsyncIO +- Асинхронный ```python async def explicit_transaction_control( pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id ): async def callee(session: ydb.aio.QuerySessionAsync): - query = """ - PRAGMA TablePathPrefix("{}"); + query = f""" + PRAGMA TablePathPrefix("{path}"); UPDATE episodes SET air_date = CurrentUtcDate() WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format( - path - ) + """ # Get newly created transaction id tx = await session.transaction(ydb.QuerySerializableReadWrite()).begin() @@ -507,5 +488,5 @@ SDK позволяет в явном виде контролировать вы {% endlist %} -Однако стоит помнить, что транзакция может быть открыта неявно при первом запросе. Завершиться же она может автоматически с явным указанием флага `commit_tx=True`. +Однако стоит помнить, что транзакция может быть открыта неявно при первом запросе. Завершиться же она может автоматически с явным указанием флага `commit_tx=True`. Неявное управление транзакцией предпочтительно, так как требует меньше обращений к серверу. From 120ab80e4f79afd64b7395efde9c1834ea9a9307 Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Tue, 20 Aug 2024 13:12:54 +0300 Subject: [PATCH 05/11] EN version of pythos sdk app example --- .../en/core/dev/example-app/python/index.md | 596 +++++++++++++----- .../ru/core/dev/example-app/python/index.md | 2 +- 2 files changed, 426 insertions(+), 172 deletions(-) diff --git a/ydb/docs/en/core/dev/example-app/python/index.md b/ydb/docs/en/core/dev/example-app/python/index.md index c165c1dd921d..a39ea7febebb 100644 --- a/ydb/docs/en/core/dev/example-app/python/index.md +++ b/ydb/docs/en/core/dev/example-app/python/index.md @@ -1,6 +1,6 @@ # App in Python -This page contains a detailed description of the code of a [test app](https://github.com/ydb-platform/ydb-python-sdk/tree/master/examples/basic_example_v1) that is available as part of the {{ ydb-short-name }} [Python SDK](https://github.com/ydb-platform/ydb-python-sdk). +This page contains a detailed description of the code of a [test app](https://github.com/ydb-platform/ydb-python-sdk/tree/master/examples/basic_example_v2) that is available as part of the {{ ydb-short-name }} [Python SDK](https://github.com/ydb-platform/ydb-python-sdk). ## Downloading and starting {#download} @@ -21,44 +21,151 @@ Next, from the same working directory, run the command to start the test app. Th App code snippet for driver initialization: -```python -def run(endpoint, database, path): - driver_config = ydb.DriverConfig( - endpoint, database, credentials=ydb.credentials_from_env_variables(), - root_certificates=ydb.load_ydb_root_certificate(), - ) - with ydb.Driver(driver_config) as driver: - try: - driver.wait(timeout=5) - except TimeoutError: - print("Connect failed to YDB") - print("Last reported errors by discovery:") - print(driver.discovery_debug_details()) - exit(1) -``` - -App code snippet for creating a session: - -```python -session = driver.table_client.session().create() -``` +{% list tabs %} + +- Synchronous + + ```python + def run(endpoint, database, path): + driver_config = ydb.DriverConfig( + endpoint, database, credentials=ydb.credentials_from_env_variables(), + root_certificates=ydb.load_ydb_root_certificate(), + ) + with ydb.Driver(driver_config) as driver: + try: + driver.wait(timeout=5) + except TimeoutError: + print("Connect failed to YDB") + print("Last reported errors by discovery:") + print(driver.discovery_debug_details()) + exit(1) + ``` + +- Asynchronous + + ```python + async def run(endpoint, database, path): + driver_config = ydb.DriverConfig( + endpoint, database, credentials=ydb.credentials_from_env_variables(), + root_certificates=ydb.load_ydb_root_certificate(), + ) + async with ydb.aio.Driver(driver_config) as driver: + try: + await driver.wait(timeout=5) + except TimeoutError: + print("Connect failed to YDB") + print("Last reported errors by discovery:") + print(driver.discovery_debug_details()) + exit(1) + ``` + +{% endlist %} {% include [create_table.md](../_includes/steps/02_create_table.md) %} -To create tables, use the `session.create_table()` method: - -```python -def create_tables(session, path): - session.create_table( - os.path.join(path, 'series'), - ydb.TableDescription() - .with_column(ydb.Column('series_id', ydb.PrimitiveType.Uint64)) # not null column - .with_column(ydb.Column('title', ydb.OptionalType(ydb.PrimitiveType.Utf8))) - .with_column(ydb.Column('series_info', ydb.OptionalType(ydb.PrimitiveType.Utf8))) - .with_column(ydb.Column('release_date', ydb.OptionalType(ydb.PrimitiveType.Uint64))) - .with_primary_key('series_id') - ) -``` +To execute YQL queries, use the `pool.execute_with_retries()` method. For example, it is possible to create table: + +{% list tabs %} + +- Synchronous + + ```python + def create_tables(pool: ydb.QuerySessionPool, path: str): + print("\nCreating table series...") + pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `series` ( + `series_id` Uint64, + `title` Utf8, + `series_info` Utf8, + `release_date` Uint64, + PRIMARY KEY (`series_id`) + ) + """ + ) + + print("\nCreating table seasons...") + pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `seasons` ( + `series_id` Uint64, + `season_id` Uint64, + `title` Utf8, + `first_aired` Uint64, + `last_aired` Uint64, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ + ) + + print("\nCreating table episodes...") + pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `episodes` ( + `series_id` Uint64, + `season_id` Uint64, + `episode_id` Uint64, + `title` Utf8, + `air_date` Uint64, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ + ) + ``` + +- Asynchronous + + ```python + async def create_tables(pool: ydb.aio.QuerySessionPoolAsync, path: str): + print("\nCreating table series...") + await pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `series` ( + `series_id` Uint64, + `title` Utf8, + `series_info` Utf8, + `release_date` Uint64, + PRIMARY KEY (`series_id`) + ) + """ + ) + + print("\nCreating table seasons...") + await pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `seasons` ( + `series_id` Uint64, + `season_id` Uint64, + `title` Utf8, + `first_aired` Uint64, + `last_aired` Uint64, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ + ) + + print("\nCreating table episodes...") + await pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + CREATE table `episodes` ( + `series_id` Uint64, + `season_id` Uint64, + `episode_id` Uint64, + `title` Utf8, + `air_date` Uint64, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ + ) + ``` + +{% endlist %} The path parameter accepts the absolute path starting from the root: @@ -66,172 +173,319 @@ The path parameter accepts the absolute path starting from the root: full_path = os.path.join(database, path) ``` -You can use the `session.describe_table()` method to output information about the table structure and make sure that it was properly created: - -```python -def describe_table(session, path, name): - result = session.describe_table(os.path.join(path, name)) - print("\n> describe table: series") - for column in result.columns: - print("column, name:", column.name, ",", str(column.type.item).strip()) -``` - -The given code snippet prints the following text to the console at startup: +The function `pool.execute_with_retries(query)`, unlike `tx.execute()`, loads the result of the query into memory before returning it to the client. This eliminates the need to use special constructs to control the iterator, but it is necessary to use this method with caution for large `SELECT` queries. More information about streams will be discussed below. -```bash -> describe table: series -('column, name:', 'series_id', ',', 'type_id: UINT64') -('column, name:', 'title', ',', 'type_id: UTF8') -('column, name:', 'series_info', ',', 'type_id: UTF8') -('column, name:', 'release_date', ',', 'type_id: UINT64') -``` {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} Code snippet for data insert/update: -```python -def upsert_simple(session, path): - session.transaction().execute( - """ - PRAGMA TablePathPrefix("{}"); - UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES - (2, 6, 1, "TBD"); - """.format(path), - commit_tx=True, - ) -``` +{% list tabs %} -{% include [pragmatablepathprefix.md](../_includes/auxilary/pragmatablepathprefix.md) %} +- Synchronous -{% include [steps/04_query_processing.md](../_includes/steps/04_query_processing.md) %} + ```python + def upsert_simple(pool, path): + print("\nPerforming UPSERT into episodes...") + pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); + """ + ) + ``` -To execute YQL queries, use the `session.transaction().execute()` method. -The SDK lets you explicitly control the execution of transactions and configure the transaction execution mode using the `TxControl` class. +- Asynchronous -In the code snippet below, the transaction is executed using the `transaction().execute()` method. The transaction execution mode set is `ydb.SerializableReadWrite()`. When all the queries in the transaction are completed, the transaction is automatically committed by explicitly setting the flag `commit_tx=True`. The query body is described using YQL syntax and is passed to the `execute` method as a parameter. + ```python + async def upsert_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + print("\nPerforming UPSERT into episodes...") + await pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); + """ + ) + ``` -```python -def select_simple(session, path): - result_sets = session.transaction(ydb.SerializableReadWrite()).execute( - """ - PRAGMA TablePathPrefix("{}"); - $format = DateTime::Format("%Y-%m-%d"); - SELECT - series_id, - title, - $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(release_date AS Int16))) AS Uint32))) AS release_date - FROM series - WHERE series_id = 1; - """.format(path), - commit_tx=True, - ) - print("\n> select_simple_transaction:") - for row in result_sets[0].rows: - print("series, id: ", row.series_id, ", title: ", row.title, ", release date: ", row.release_date) - - return result_sets[0] -``` +{% endlist %} + +{% include [pragmatablepathprefix.md](../_includes/auxilary/pragmatablepathprefix.md) %} + +{% include [steps/04_query_processing.md](../_includes/steps/04_query_processing.md) %} -When the query is executed, `result_set` is returned whose iteration outputs the following text to the console: +To execute YQL queries, it is often enough to use the already familiar `pool.execute_with_retries()` method. + +{% list tabs %} + +- Synchronous + + ```python + def select_simple(pool: ydb.QuerySessionPool, path: str): + print("\nCheck series table...") + result_sets = pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + SELECT + series_id, + title, + release_date + FROM series + WHERE series_id = 1; + """, + ) + first_set = result_sets[0] + for row in first_set.rows: + print( + "series, id: ", + row.series_id, + ", title: ", + row.title, + ", release date: ", + row.release_date, + ) + return first_set + ``` + +- Asynchronous + + ```python + async def select_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + print("\nCheck series table...") + result_sets = await pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + SELECT + series_id, + title, + release_date + FROM series + WHERE series_id = 1; + """, + ) + first_set = result_sets[0] + for row in first_set.rows: + print( + "series, id: ", + row.series_id, + ", title: ", + row.title, + ", release date: ", + row.release_date, + ) + return first_set + ``` + +{% endlist %} + +As the result of executing the query, a `result_set` is returned, iterating on which the text is output to the console: ```bash > SelectSimple: series, Id: 1, title: IT Crowd, Release date: 2006-02-03 ``` - -{% include [param_prep_queries.md](../_includes/steps/07_param_prep_queries.md) %} - -```python -def select_prepared(session, path, series_id, season_id, episode_id): - query = """ - PRAGMA TablePathPrefix("{}"); - DECLARE $seriesId AS Uint64; - DECLARE $seasonId AS Uint64; - DECLARE $episodeId AS Uint64; - $format = DateTime::Format("%Y-%m-%d"); - SELECT - title, - $format(DateTime::FromSeconds(CAST(DateTime::ToSeconds(DateTime::IntervalFromDays(CAST(air_date AS Int16))) AS Uint32))) AS air_date - FROM episodes - WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format(path) - - prepared_query = session.prepare(query) - result_sets = session.transaction(ydb.SerializableReadWrite()).execute( - prepared_query, { - '$seriesId': series_id, - '$seasonId': season_id, - '$episodeId': episode_id, - }, - commit_tx=True - ) - print("\n> select_prepared_transaction:") - for row in result_sets[0].rows: - print("episode title:", row.title, ", air date:", row.air_date) - - return result_sets[0] -``` - -The given code snippet prints the following text to the console at startup: +## Parameterized queries {#param-queries} + +To execute parameterized queries in the `pool.execute_with_retries()` method (or `tx.execute()`, which will be shown in the next section) it is necessary to pass a dictionary with parameters of a special type, where the key is the parameter name, and the value can be one of the following: +1. The usual value; +2. Tuple with value and type; +3. A special type `ydb.TypedValue(value=value, value_type=value_type)`. + +If you specify a value without a type, the conversion takes place according to the following rules: +* `int` -> `ydb.PrimitiveType.Int64` +* `float` -> `ydb.PrimitiveType.Double` +* `str` -> `ydb.PrimitiveType.Utf8` +* `bytes` -> `ydb.PrimitiveType.String` +* `bool` -> `ydb.PrimitiveType.Bool` +* `list` -> `ydb.ListType` +* `dict` -> `ydb.DictType` + +Automatic conversion of lists and dictionaries is possible only in the case of homogeneous structures, the type of nested value will be calculated recursively according to the above rules. + +A code snippet demonstrating the possibility of using parameterized queries: + +{% list tabs %} + +- Synchronous + + ```python + def select_with_parameters(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + result_sets = pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + SELECT + title, + air_date + FROM episodes + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """, + { + "$seriesId": series_id, # data type could be defined implicitly + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + }, + ) + + print("\n> select_with_parameters:") + first_set = result_sets[0] + for row in first_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return first_set + ``` + +- Asynchronous + + ```python + async def select_with_parameters(pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id): + result_sets = await pool.execute_with_retries( + f""" + PRAGMA TablePathPrefix("{path}"); + SELECT + title, + air_date + FROM episodes + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """, + { + "$seriesId": series_id, # could be defined implicit + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + }, + ) + + print("\n> select_with_parameters:") + first_set = result_sets[0] + for row in first_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return first_set + ``` + +{% endlist %} + +The above code snippet outputs text to the console: ```bash > select_prepared_transaction: ('episode title:', u'To Build a Better Beta', ', air date:', '2016-06-05') ``` -{% include [scan_query.md](../_includes/steps/08_scan_query.md) %} +{% include [transaction_control.md](../_includes/steps/10_transaction_control.md) %} -```python -def executeScanQuery(driver): - query = ydb.ScanQuery(""" - SELECT series_id, season_id, COUNT(*) AS episodes_count - FROM episodes - GROUP BY series_id, season_id - ORDER BY series_id, season_id - """, {}) - - it = driver.table_client.scan_query(query) - - while True: - try: - result = next(it) - print result.result_set.rows - except StopIteration: - break -``` +The `session.transaction().execute()` method can also be used to execute YQL queries. +This method, unlike `pool.execute_with_retries`, allows you to explicitly control the execution of transactions and configure the needed transaction mode using the `TxControl` class. + +Available transaction modes: +* `ydb.QuerySerializableReadWrite()` (default); +* `ydb.QueryOnlineReadOnly(allow_inconsistent_reads=False)`; +* `ydb.QuerySnapshotReadOnly()`; +* `ydb.QueryStaleReadOnly()`. -{% include [transaction_control.md](../_includes/steps/10_transaction_control.md) %} +For more information about transaction modes, see [YDB docs](https://ydb.tech/docs/en/concepts/transactions#modes) + +The result of executing `tx.execute()` is an iterator. The iterator allows you to read an unlimited number of rows and a volume of data without loading the entire result into memory. +However, in order to correctly save the state of the transaction on the `ydb` side, the iterator must be read to the end after each request. +For convenience, the result of the `tx.execute()` function is presented as a context manager that scrolls through the iterator to the end after exiting. -Code snippet for `transaction().begin()` and `tx.Commit()` calls: +{% list tabs %} -```python -def explicit_tcl(session, path, series_id, season_id, episode_id): - query = """ - PRAGMA TablePathPrefix("{}"); +- Synchronous - DECLARE $seriesId AS Uint64; - DECLARE $seasonId AS Uint64; - DECLARE $episodeId AS Uint64; + ```python + with tx.execute(query) as _: + pass + ``` - UPDATE episodes - SET air_date = CAST(CurrentUtcDate() AS Uint64) - WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; - """.format(path) - prepared_query = session.prepare(query) +- Asynchronous - tx = session.transaction(ydb.SerializableReadWrite()).begin() + ```python + async with await tx.execute(query) as _: + pass + ``` - tx.execute( - prepared_query, { - '$seriesId': series_id, - '$seasonId': season_id, - '$episodeId': episode_id - } - ) +{% endlist %} - print("\n> explicit TCL call") +In the code snippet below, the transaction is executed using the `transaction().execute()` method. The transaction mode is set to `ydb.QuerySerializableReadWrite()`. +The request body is described using YQL syntax and is passed to the `execute` method as the parameter. - tx.commit() -``` +A code snippet demonstrating the explicit use of `transaction().begin()` and `tx.commit()`: + +{% list tabs %} + +- Synchronous + + ```python + def explicit_transaction_control(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + def callee(session: ydb.QuerySessionSync): + query = f""" + PRAGMA TablePathPrefix("{path}"); + UPDATE episodes + SET air_date = CurrentUtcDate() + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """ + + # Get newly created transaction id + tx = session.transaction(ydb.QuerySerializableReadWrite()).begin() + + # Execute data query. + # Transaction control settings continues active transaction (tx) + with tx.execute( + query, + { + "$seriesId": (series_id, ydb.PrimitiveType.Int64), + "$seasonId": (season_id, ydb.PrimitiveType.Int64), + "$episodeId": (episode_id, ydb.PrimitiveType.Int64), + }, + ) as _: + pass + + print("\n> explicit TCL call") + + # Commit active transaction(tx) + tx.commit() + + return pool.retry_operation_sync(callee) + ``` + +- Asynchronous + + ```python + async def explicit_transaction_control( + pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id + ): + async def callee(session: ydb.aio.QuerySessionAsync): + query = f""" + PRAGMA TablePathPrefix("{path}"); + UPDATE episodes + SET air_date = CurrentUtcDate() + WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; + """ + + # Get newly created transaction id + tx = await session.transaction(ydb.QuerySerializableReadWrite()).begin() + + # Execute data query. + # Transaction control settings continues active transaction (tx) + async with await tx.execute( + query, + { + "$seriesId": (series_id, ydb.PrimitiveType.Int64), + "$seasonId": (season_id, ydb.PrimitiveType.Int64), + "$episodeId": (episode_id, ydb.PrimitiveType.Int64), + }, + ) as _: + pass + + print("\n> explicit TCL call") + + # Commit active transaction(tx) + await tx.commit() + + return await pool.retry_operation_async(callee) + ``` + +{% endlist %} +However, it is worth remembering that a transaction can be opened implicitly at the first request. It could be commited automatically with the explicit indication of the `commit_tx=True` flag. +Implicit transaction management is preferable because it requires fewer server calls. diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index 825984a8bb41..7adf7784deeb 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -173,7 +173,7 @@ python3 -m pip install iso8601 full_path = os.path.join(database, path) ``` -Функция `pool.execute_with_retries(query)`, в отличие от `tx.execute()`, загружает в память результат запроса перед его возвращением клиенту. Благодаря этому отпадает необходимость использования специальных конструкций для контроля над стримом, однако необходимо с осторожностью применять этот метод для больших запросов `SELECT`. Подробнее о стримах будет сказано ниже. +Функция `pool.execute_with_retries(query)`, в отличие от `tx.execute()`, загружает в память результат запроса перед его возвращением клиенту. Благодаря этому отпадает необходимость использования специальных конструкций для контроля над итератором, однако необходимо с осторожностью применять этот метод для больших запросов `SELECT`. Подробнее о стримах будет сказано ниже. {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} From f07df6412441ebb5cddcc98fc0f4b78982e8311a Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Wed, 21 Aug 2024 10:37:15 +0300 Subject: [PATCH 06/11] Add huge select example --- .../en/core/dev/example-app/python/index.md | 56 ++++++++++++++++++- .../ru/core/dev/example-app/python/index.md | 56 ++++++++++++++++++- 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/ydb/docs/en/core/dev/example-app/python/index.md b/ydb/docs/en/core/dev/example-app/python/index.md index a39ea7febebb..8ca0912a0bb5 100644 --- a/ydb/docs/en/core/dev/example-app/python/index.md +++ b/ydb/docs/en/core/dev/example-app/python/index.md @@ -488,4 +488,58 @@ A code snippet demonstrating the explicit use of `transaction().begin()` and `tx {% endlist %} However, it is worth remembering that a transaction can be opened implicitly at the first request. It could be commited automatically with the explicit indication of the `commit_tx=True` flag. -Implicit transaction management is preferable because it requires fewer server calls. +Implicit transaction management is preferable because it requires fewer server calls. An example of implicit control will be demonstrated in the next block. + +## Huge Selects {#huge-selects} + +To perform `SELECT` operations with an unlimited number of found rows, you must also use the `transaction().execute(query)` method. As mentioned above, the result of the work is an iterator - unlike `pool.execute_with_retries(query)`, it allows you to go through the selection without first loading it into memory. + +Example of a `SELECT` with unlimited data and implicit transaction control: + +{% list tabs %} + +- Synchronous + + ```python + def huge_select(pool: ydb.QuerySessionPool, path: str): + def callee(session: ydb.QuerySessionSync): + query = f""" + PRAGMA TablePathPrefix("{path}"); + SELECT * from episodes; + """ + + with session.transaction().execute( + query, + commit_tx=True, + ) as result_sets: + print("\n> Huge SELECT call") + for result_set in result_sets: + for row in result_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return pool.retry_operation_sync(callee) + ``` + +- Asynchronous + + ```python + async def huge_select(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def callee(session: ydb.aio.QuerySessionAsync): + query = f""" + PRAGMA TablePathPrefix("{path}"); + SELECT * from episodes; + """ + + async with await session.transaction().execute( + query, + commit_tx=True, + ) as result_sets: + print("\n> Huge SELECT call") + async for result_set in result_sets: + for row in result_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return await pool.retry_operation_async(callee) + ``` + +{% endlist %} diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index 7adf7784deeb..d48a01d39563 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -489,4 +489,58 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 {% endlist %} Однако стоит помнить, что транзакция может быть открыта неявно при первом запросе. Завершиться же она может автоматически с явным указанием флага `commit_tx=True`. -Неявное управление транзакцией предпочтительно, так как требует меньше обращений к серверу. +Неявное управление транзакцией предпочтительно, так как требует меньше обращений к серверу. Пример неявного управления будет продемонстрирован в следующем блоке. + +## Чтение неограниченной выборки {#huge-selects} + +Для выполнения операций `SELECT` с неограниченным количеством найденных строк нужно также использовать метод `transaction().execute(query)`. Как было сказано выше, результатом работы является итератор - он, в отличие от `pool.execute_with_retries(query)`, позволяет пройтись по выборке не загружая ее предварительно в память. + +Пример `SELECT` с неограниченным количеством данных и неявным контролем транзакции: + +{% list tabs %} + +- Синхронный + + ```python + def huge_select(pool: ydb.QuerySessionPool, path: str): + def callee(session: ydb.QuerySessionSync): + query = f""" + PRAGMA TablePathPrefix("{path}"); + SELECT * from episodes; + """ + + with session.transaction().execute( + query, + commit_tx=True, + ) as result_sets: + print("\n> Huge SELECT call") + for result_set in result_sets: + for row in result_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return pool.retry_operation_sync(callee) + ``` + +- Асинхронный + + ```python + async def huge_select(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def callee(session: ydb.aio.QuerySessionAsync): + query = f""" + PRAGMA TablePathPrefix("{path}"); + SELECT * from episodes; + """ + + async with await session.transaction().execute( + query, + commit_tx=True, + ) as result_sets: + print("\n> Huge SELECT call") + async for result_set in result_sets: + for row in result_set.rows: + print("episode title:", row.title, ", air date:", row.air_date) + + return await pool.retry_operation_async(callee) + ``` + +{% endlist %} From e772ec7b2db8c1893e0b8d3de9f9cd0a9f8781d4 Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Wed, 21 Aug 2024 12:57:05 +0300 Subject: [PATCH 07/11] Remove pragma from example & add declare --- .../en/core/dev/example-app/python/index.md | 208 +++++++++--------- .../ru/core/dev/example-app/python/index.md | 208 +++++++++--------- 2 files changed, 196 insertions(+), 220 deletions(-) diff --git a/ydb/docs/en/core/dev/example-app/python/index.md b/ydb/docs/en/core/dev/example-app/python/index.md index 8ca0912a0bb5..1dca176a684a 100644 --- a/ydb/docs/en/core/dev/example-app/python/index.md +++ b/ydb/docs/en/core/dev/example-app/python/index.md @@ -26,7 +26,7 @@ App code snippet for driver initialization: - Synchronous ```python - def run(endpoint, database, path): + def run(endpoint, database): driver_config = ydb.DriverConfig( endpoint, database, credentials=ydb.credentials_from_env_variables(), root_certificates=ydb.load_ydb_root_certificate(), @@ -44,7 +44,7 @@ App code snippet for driver initialization: - Asynchronous ```python - async def run(endpoint, database, path): + async def run(endpoint, database): driver_config = ydb.DriverConfig( endpoint, database, credentials=ydb.credentials_from_env_variables(), root_certificates=ydb.load_ydb_root_certificate(), @@ -70,109 +70,97 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl - Synchronous ```python - def create_tables(pool: ydb.QuerySessionPool, path: str): + def create_tables(pool: ydb.QuerySessionPool): print("\nCreating table series...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `series` ( - `series_id` Uint64, - `title` Utf8, - `series_info` Utf8, - `release_date` Uint64, - PRIMARY KEY (`series_id`) - ) - """ + """ + CREATE table `series` ( + `series_id` Int64, + `title` Utf8, + `series_info` Utf8, + `release_date` Date, + PRIMARY KEY (`series_id`) + ) + """ ) print("\nCreating table seasons...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `seasons` ( - `series_id` Uint64, - `season_id` Uint64, - `title` Utf8, - `first_aired` Uint64, - `last_aired` Uint64, - PRIMARY KEY (`series_id`, `season_id`) - ) - """ + """ + CREATE table `seasons` ( + `series_id` Int64, + `season_id` Int64, + `title` Utf8, + `first_aired` Date, + `last_aired` Date, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ ) print("\nCreating table episodes...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `episodes` ( - `series_id` Uint64, - `season_id` Uint64, - `episode_id` Uint64, - `title` Utf8, - `air_date` Uint64, - PRIMARY KEY (`series_id`, `season_id`, `episode_id`) - ) - """ + """ + CREATE table `episodes` ( + `series_id` Int64, + `season_id` Int64, + `episode_id` Int64, + `title` Utf8, + `air_date` Date, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ ) ``` - Asynchronous ```python - async def create_tables(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def create_tables(pool: ydb.aio.QuerySessionPoolAsync): print("\nCreating table series...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `series` ( - `series_id` Uint64, - `title` Utf8, - `series_info` Utf8, - `release_date` Uint64, - PRIMARY KEY (`series_id`) - ) - """ + """ + CREATE table `series` ( + `series_id` Int64, + `title` Utf8, + `series_info` Utf8, + `release_date` Date, + PRIMARY KEY (`series_id`) + ) + """ ) print("\nCreating table seasons...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `seasons` ( - `series_id` Uint64, - `season_id` Uint64, - `title` Utf8, - `first_aired` Uint64, - `last_aired` Uint64, - PRIMARY KEY (`series_id`, `season_id`) - ) - """ + """ + CREATE table `seasons` ( + `series_id` Int64, + `season_id` Int64, + `title` Utf8, + `first_aired` Date, + `last_aired` Date, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ ) print("\nCreating table episodes...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `episodes` ( - `series_id` Uint64, - `season_id` Uint64, - `episode_id` Uint64, - `title` Utf8, - `air_date` Uint64, - PRIMARY KEY (`series_id`, `season_id`, `episode_id`) - ) - """ + """ + CREATE table `episodes` ( + `series_id` Int64, + `season_id` Int64, + `episode_id` Int64, + `title` Utf8, + `air_date` Date, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ ) ``` {% endlist %} -The path parameter accepts the absolute path starting from the root: - -```python -full_path = os.path.join(database, path) -``` - The function `pool.execute_with_retries(query)`, unlike `tx.execute()`, loads the result of the query into memory before returning it to the client. This eliminates the need to use special constructs to control the iterator, but it is necessary to use this method with caution for large `SELECT` queries. More information about streams will be discussed below. {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} @@ -184,11 +172,10 @@ Code snippet for data insert/update: - Synchronous ```python - def upsert_simple(pool, path): + def upsert_simple(pool: ydb.QuerySessionPool): print("\nPerforming UPSERT into episodes...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); """ ) @@ -197,11 +184,10 @@ Code snippet for data insert/update: - Asynchronous ```python - async def upsert_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def upsert_simple(pool: ydb.aio.QuerySessionPoolAsync): print("\nPerforming UPSERT into episodes...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); """ ) @@ -209,8 +195,6 @@ Code snippet for data insert/update: {% endlist %} -{% include [pragmatablepathprefix.md](../_includes/auxilary/pragmatablepathprefix.md) %} - {% include [steps/04_query_processing.md](../_includes/steps/04_query_processing.md) %} To execute YQL queries, it is often enough to use the already familiar `pool.execute_with_retries()` method. @@ -220,11 +204,10 @@ To execute YQL queries, it is often enough to use the already familiar `pool.exe - Synchronous ```python - def select_simple(pool: ydb.QuerySessionPool, path: str): + def select_simple(pool: ydb.QuerySessionPool): print("\nCheck series table...") result_sets = pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ SELECT series_id, title, @@ -249,11 +232,10 @@ To execute YQL queries, it is often enough to use the already familiar `pool.exe - Asynchronous ```python - async def select_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def select_simple(pool: ydb.aio.QuerySessionPoolAsync): print("\nCheck series table...") result_sets = await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ SELECT series_id, title, @@ -309,10 +291,13 @@ A code snippet demonstrating the possibility of using parameterized queries: - Synchronous ```python - def select_with_parameters(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + def select_with_parameters(pool: ydb.QuerySessionPool, series_id, season_id, episode_id): result_sets = pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + SELECT title, air_date @@ -337,10 +322,13 @@ A code snippet demonstrating the possibility of using parameterized queries: - Asynchronous ```python - async def select_with_parameters(pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id): + async def select_with_parameters(pool: ydb.aio.QuerySessionPoolAsync, series_id, season_id, episode_id): result_sets = await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + SELECT title, air_date @@ -416,10 +404,13 @@ A code snippet demonstrating the explicit use of `transaction().begin()` and `tx - Synchronous ```python - def explicit_transaction_control(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + def explicit_transaction_control(pool: ydb.QuerySessionPool, series_id, season_id, episode_id): def callee(session: ydb.QuerySessionSync): - query = f""" - PRAGMA TablePathPrefix("{path}"); + query = """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + UPDATE episodes SET air_date = CurrentUtcDate() WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; @@ -452,11 +443,14 @@ A code snippet demonstrating the explicit use of `transaction().begin()` and `tx ```python async def explicit_transaction_control( - pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id + pool: ydb.aio.QuerySessionPoolAsync, series_id, season_id, episode_id ): async def callee(session: ydb.aio.QuerySessionAsync): - query = f""" - PRAGMA TablePathPrefix("{path}"); + query = """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + UPDATE episodes SET air_date = CurrentUtcDate() WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; @@ -501,12 +495,9 @@ Example of a `SELECT` with unlimited data and implicit transaction control: - Synchronous ```python - def huge_select(pool: ydb.QuerySessionPool, path: str): + def huge_select(pool: ydb.QuerySessionPool): def callee(session: ydb.QuerySessionSync): - query = f""" - PRAGMA TablePathPrefix("{path}"); - SELECT * from episodes; - """ + query = """SELECT * from episodes;""" with session.transaction().execute( query, @@ -523,12 +514,9 @@ Example of a `SELECT` with unlimited data and implicit transaction control: - Asynchronous ```python - async def huge_select(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def huge_select(pool: ydb.aio.QuerySessionPoolAsync): async def callee(session: ydb.aio.QuerySessionAsync): - query = f""" - PRAGMA TablePathPrefix("{path}"); - SELECT * from episodes; - """ + query = """SELECT * from episodes;""" async with await session.transaction().execute( query, diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index d48a01d39563..6dd8a0ac8287 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -26,7 +26,7 @@ python3 -m pip install iso8601 - Синхронный ```python - def run(endpoint, database, path): + def run(endpoint, database): driver_config = ydb.DriverConfig( endpoint, database, credentials=ydb.credentials_from_env_variables(), root_certificates=ydb.load_ydb_root_certificate(), @@ -44,7 +44,7 @@ python3 -m pip install iso8601 - Асинхронный ```python - async def run(endpoint, database, path): + async def run(endpoint, database): driver_config = ydb.DriverConfig( endpoint, database, credentials=ydb.credentials_from_env_variables(), root_certificates=ydb.load_ydb_root_certificate(), @@ -70,109 +70,97 @@ python3 -m pip install iso8601 - Синхронный ```python - def create_tables(pool: ydb.QuerySessionPool, path: str): + def create_tables(pool: ydb.QuerySessionPool): print("\nCreating table series...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `series` ( - `series_id` Uint64, - `title` Utf8, - `series_info` Utf8, - `release_date` Uint64, - PRIMARY KEY (`series_id`) - ) - """ + """ + CREATE table `series` ( + `series_id` Int64, + `title` Utf8, + `series_info` Utf8, + `release_date` Date, + PRIMARY KEY (`series_id`) + ) + """ ) print("\nCreating table seasons...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `seasons` ( - `series_id` Uint64, - `season_id` Uint64, - `title` Utf8, - `first_aired` Uint64, - `last_aired` Uint64, - PRIMARY KEY (`series_id`, `season_id`) - ) - """ + """ + CREATE table `seasons` ( + `series_id` Int64, + `season_id` Int64, + `title` Utf8, + `first_aired` Date, + `last_aired` Date, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ ) print("\nCreating table episodes...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `episodes` ( - `series_id` Uint64, - `season_id` Uint64, - `episode_id` Uint64, - `title` Utf8, - `air_date` Uint64, - PRIMARY KEY (`series_id`, `season_id`, `episode_id`) - ) - """ + """ + CREATE table `episodes` ( + `series_id` Int64, + `season_id` Int64, + `episode_id` Int64, + `title` Utf8, + `air_date` Date, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ ) ``` - Асинхронный ```python - async def create_tables(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def create_tables(pool: ydb.aio.QuerySessionPoolAsync): print("\nCreating table series...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `series` ( - `series_id` Uint64, - `title` Utf8, - `series_info` Utf8, - `release_date` Uint64, - PRIMARY KEY (`series_id`) - ) - """ + """ + CREATE table `series` ( + `series_id` Int64, + `title` Utf8, + `series_info` Utf8, + `release_date` Date, + PRIMARY KEY (`series_id`) + ) + """ ) print("\nCreating table seasons...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `seasons` ( - `series_id` Uint64, - `season_id` Uint64, - `title` Utf8, - `first_aired` Uint64, - `last_aired` Uint64, - PRIMARY KEY (`series_id`, `season_id`) - ) - """ + """ + CREATE table `seasons` ( + `series_id` Int64, + `season_id` Int64, + `title` Utf8, + `first_aired` Date, + `last_aired` Date, + PRIMARY KEY (`series_id`, `season_id`) + ) + """ ) print("\nCreating table episodes...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); - CREATE table `episodes` ( - `series_id` Uint64, - `season_id` Uint64, - `episode_id` Uint64, - `title` Utf8, - `air_date` Uint64, - PRIMARY KEY (`series_id`, `season_id`, `episode_id`) - ) - """ + """ + CREATE table `episodes` ( + `series_id` Int64, + `season_id` Int64, + `episode_id` Int64, + `title` Utf8, + `air_date` Date, + PRIMARY KEY (`series_id`, `season_id`, `episode_id`) + ) + """ ) ``` {% endlist %} -В параметр path передаётся абсолютный путь от корня: - -```python -full_path = os.path.join(database, path) -``` - Функция `pool.execute_with_retries(query)`, в отличие от `tx.execute()`, загружает в память результат запроса перед его возвращением клиенту. Благодаря этому отпадает необходимость использования специальных конструкций для контроля над итератором, однако необходимо с осторожностью применять этот метод для больших запросов `SELECT`. Подробнее о стримах будет сказано ниже. {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} @@ -184,11 +172,10 @@ full_path = os.path.join(database, path) - Синхронный ```python - def upsert_simple(pool, path): + def upsert_simple(pool: ydb.QuerySessionPool): print("\nPerforming UPSERT into episodes...") pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); """ ) @@ -197,11 +184,10 @@ full_path = os.path.join(database, path) - Асинхронный ```python - async def upsert_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def upsert_simple(pool: ydb.aio.QuerySessionPoolAsync): print("\nPerforming UPSERT into episodes...") await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ UPSERT INTO episodes (series_id, season_id, episode_id, title) VALUES (2, 6, 1, "TBD"); """ ) @@ -209,8 +195,6 @@ full_path = os.path.join(database, path) {% endlist %} -{% include [pragmatablepathprefix.md](../_includes/auxilary/pragmatablepathprefix.md) %} - {% include [steps/04_query_processing.md](../_includes/steps/04_query_processing.md) %} Для выполнения YQL-запросов чаще всего достаточно использования уже знакомого метода `pool.execute_with_retries()`. @@ -220,11 +204,10 @@ full_path = os.path.join(database, path) - Синхронный ```python - def select_simple(pool: ydb.QuerySessionPool, path: str): + def select_simple(pool: ydb.QuerySessionPool): print("\nCheck series table...") result_sets = pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ SELECT series_id, title, @@ -249,11 +232,10 @@ full_path = os.path.join(database, path) - Асинхронный ```python - async def select_simple(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def select_simple(pool: ydb.aio.QuerySessionPoolAsync): print("\nCheck series table...") result_sets = await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ SELECT series_id, title, @@ -309,10 +291,13 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 - Синхронный ```python - def select_with_parameters(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + def select_with_parameters(pool: ydb.QuerySessionPool, series_id, season_id, episode_id): result_sets = pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + SELECT title, air_date @@ -337,10 +322,13 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 - Асинхронный ```python - async def select_with_parameters(pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id): + async def select_with_parameters(pool: ydb.aio.QuerySessionPoolAsync, series_id, season_id, episode_id): result_sets = await pool.execute_with_retries( - f""" - PRAGMA TablePathPrefix("{path}"); + """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + SELECT title, air_date @@ -417,10 +405,13 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 - Синхронный ```python - def explicit_transaction_control(pool: ydb.QuerySessionPool, path: str, series_id, season_id, episode_id): + def explicit_transaction_control(pool: ydb.QuerySessionPool, series_id, season_id, episode_id): def callee(session: ydb.QuerySessionSync): - query = f""" - PRAGMA TablePathPrefix("{path}"); + query = """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + UPDATE episodes SET air_date = CurrentUtcDate() WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; @@ -453,11 +444,14 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 ```python async def explicit_transaction_control( - pool: ydb.aio.QuerySessionPoolAsync, path: str, series_id, season_id, episode_id + pool: ydb.aio.QuerySessionPoolAsync, series_id, season_id, episode_id ): async def callee(session: ydb.aio.QuerySessionAsync): - query = f""" - PRAGMA TablePathPrefix("{path}"); + query = """ + DECLARE $seriesId AS Int64; + DECLARE $seasonId AS Int64; + DECLARE $episodeId AS Int64; + UPDATE episodes SET air_date = CurrentUtcDate() WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; @@ -502,12 +496,9 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 - Синхронный ```python - def huge_select(pool: ydb.QuerySessionPool, path: str): + def huge_select(pool: ydb.QuerySessionPool): def callee(session: ydb.QuerySessionSync): - query = f""" - PRAGMA TablePathPrefix("{path}"); - SELECT * from episodes; - """ + query = """SELECT * from episodes;""" with session.transaction().execute( query, @@ -524,12 +515,9 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 - Асинхронный ```python - async def huge_select(pool: ydb.aio.QuerySessionPoolAsync, path: str): + async def huge_select(pool: ydb.aio.QuerySessionPoolAsync): async def callee(session: ydb.aio.QuerySessionAsync): - query = f""" - PRAGMA TablePathPrefix("{path}"); - SELECT * from episodes; - """ + query = """SELECT * from episodes;""" async with await session.transaction().execute( query, From 39c9f7e9f8ff8b71698ff5d3e366c5ad5b50a34a Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Mon, 26 Aug 2024 11:19:41 +0300 Subject: [PATCH 08/11] Apply suggestions from code review Co-authored-by: Ivan Blinkov --- .../en/core/dev/example-app/python/index.md | 102 ++++++++++-------- .../ru/core/dev/example-app/python/index.md | 6 +- 2 files changed, 62 insertions(+), 46 deletions(-) diff --git a/ydb/docs/en/core/dev/example-app/python/index.md b/ydb/docs/en/core/dev/example-app/python/index.md index 1dca176a684a..efb07d2f6300 100644 --- a/ydb/docs/en/core/dev/example-app/python/index.md +++ b/ydb/docs/en/core/dev/example-app/python/index.md @@ -61,9 +61,25 @@ App code snippet for driver initialization: {% endlist %} +## Executing queries + +The {{ ydb-short-name }} Python SDK provides two primary methods for executing queries, each with different properties and use cases: + +* `pool.execute_with_retries`: + * Buffers the entire result set in client memory. + * Automatically retries execution in case of retriable issues. + * Does not allow specifying a transaction execution mode. + * Recommended for one-off queries that are expected to produce small result sets. + +* `tx.execute`: + * Returns an iterator over the query results, allowing processing of results that may not fit into client memory. + * Retries must be handled manually. + * Allows specifying a transaction execution mode. + * Recommended for scenarios where `pool.execute_with_retries` is insufficient. + {% include [create_table.md](../_includes/steps/02_create_table.md) %} -To execute YQL queries, use the `pool.execute_with_retries()` method. For example, it is possible to create table: +To execute `CREATE TABLE` queries, use the `pool.execute_with_retries()` method: {% list tabs %} @@ -74,7 +90,7 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl print("\nCreating table series...") pool.execute_with_retries( """ - CREATE table `series` ( + CREATE TABLE `series` ( `series_id` Int64, `title` Utf8, `series_info` Utf8, @@ -87,7 +103,7 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl print("\nCreating table seasons...") pool.execute_with_retries( """ - CREATE table `seasons` ( + CREATE TABLE `seasons` ( `series_id` Int64, `season_id` Int64, `title` Utf8, @@ -101,7 +117,7 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl print("\nCreating table episodes...") pool.execute_with_retries( """ - CREATE table `episodes` ( + CREATE TABLE `episodes` ( `series_id` Int64, `season_id` Int64, `episode_id` Int64, @@ -120,7 +136,7 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl print("\nCreating table series...") await pool.execute_with_retries( """ - CREATE table `series` ( + CREATE TABLE `series` ( `series_id` Int64, `title` Utf8, `series_info` Utf8, @@ -133,7 +149,7 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl print("\nCreating table seasons...") await pool.execute_with_retries( """ - CREATE table `seasons` ( + CREATE TABLE `seasons` ( `series_id` Int64, `season_id` Int64, `title` Utf8, @@ -147,7 +163,7 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl print("\nCreating table episodes...") await pool.execute_with_retries( """ - CREATE table `episodes` ( + CREATE TABLE `episodes` ( `series_id` Int64, `season_id` Int64, `episode_id` Int64, @@ -161,7 +177,6 @@ To execute YQL queries, use the `pool.execute_with_retries()` method. For exampl {% endlist %} -The function `pool.execute_with_retries(query)`, unlike `tx.execute()`, loads the result of the query into memory before returning it to the client. This eliminates the need to use special constructs to control the iterator, but it is necessary to use this method with caution for large `SELECT` queries. More information about streams will be discussed below. {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} @@ -197,7 +212,7 @@ Code snippet for data insert/update: {% include [steps/04_query_processing.md](../_includes/steps/04_query_processing.md) %} -To execute YQL queries, it is often enough to use the already familiar `pool.execute_with_retries()` method. +To execute YQL queries, the `pool.execute_with_retries()` method is often sufficient. {% list tabs %} @@ -268,23 +283,31 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 ## Parameterized queries {#param-queries} -To execute parameterized queries in the `pool.execute_with_retries()` method (or `tx.execute()`, which will be shown in the next section) it is necessary to pass a dictionary with parameters of a special type, where the key is the parameter name, and the value can be one of the following: -1. The usual value; -2. Tuple with value and type; -3. A special type `ydb.TypedValue(value=value, value_type=value_type)`. +For parameterized query execution, `pool.execute_with_retries()` and `tx.execute()` behave similarly. To execute parameterized queries, you need to pass a dictionary with parameters to one of these functions, where each key is the parameter name, and the value can be one of the following: + +1. A value of a basic Python type +2. A tuple containing the value and its type +3. A special type, `ydb.TypedValue(value=value, value_type=value_type)` + +If you specify a value without an explicit type, the conversion takes place according to the following rules: -If you specify a value without a type, the conversion takes place according to the following rules: -* `int` -> `ydb.PrimitiveType.Int64` -* `float` -> `ydb.PrimitiveType.Double` -* `str` -> `ydb.PrimitiveType.Utf8` -* `bytes` -> `ydb.PrimitiveType.String` -* `bool` -> `ydb.PrimitiveType.Bool` -* `list` -> `ydb.ListType` -* `dict` -> `ydb.DictType` +| Python type | {{ ydb-short-name }} type | +|------------|------------------------------| +| `int` | `ydb.PrimitiveType.Int64` | +| `float` | `ydb.PrimitiveType.Double` | +| `str` | `ydb.PrimitiveType.Utf8` | +| `bytes` | `ydb.PrimitiveType.String` | +| `bool` | `ydb.PrimitiveType.Bool` | +| `list` | `ydb.ListType` | +| `dict` | `ydb.DictType` | -Automatic conversion of lists and dictionaries is possible only in the case of homogeneous structures, the type of nested value will be calculated recursively according to the above rules. +{% note warning %} -A code snippet demonstrating the possibility of using parameterized queries: +Automatic conversion of lists and dictionaries is possible only if the structures are homogeneous. The type of nested values will be determined recursively according to the rules explained above. + +{% endnote %} + +A code snippet demonstrating the parameterized query execution: {% list tabs %} @@ -306,8 +329,8 @@ A code snippet demonstrating the possibility of using parameterized queries: """, { "$seriesId": series_id, # data type could be defined implicitly - "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple - "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via a tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via a special class }, ) @@ -336,9 +359,9 @@ A code snippet demonstrating the possibility of using parameterized queries: WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; """, { - "$seriesId": series_id, # could be defined implicit - "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple - "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + "$seriesId": series_id, # could be defined implicitly + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via a tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via a special class }, ) @@ -352,7 +375,7 @@ A code snippet demonstrating the possibility of using parameterized queries: {% endlist %} -The above code snippet outputs text to the console: +The code snippet above outputs the following text to the console: ```bash > select_prepared_transaction: @@ -361,8 +384,7 @@ The above code snippet outputs text to the console: {% include [transaction_control.md](../_includes/steps/10_transaction_control.md) %} -The `session.transaction().execute()` method can also be used to execute YQL queries. -This method, unlike `pool.execute_with_retries`, allows you to explicitly control the execution of transactions and configure the needed transaction mode using the `TxControl` class. +The `session.transaction().execute()` method can also be used to execute YQL queries. Unlike `pool.execute_with_retries`, this method allows explicit control of transaction execution by configuring the desired transaction mode using the `TxControl` class. Available transaction modes: * `ydb.QuerySerializableReadWrite()` (default); @@ -370,11 +392,9 @@ Available transaction modes: * `ydb.QuerySnapshotReadOnly()`; * `ydb.QueryStaleReadOnly()`. -For more information about transaction modes, see [YDB docs](https://ydb.tech/docs/en/concepts/transactions#modes) +For more information about transaction modes, see [{#T}](../../../concepts/transactions.md#modes). -The result of executing `tx.execute()` is an iterator. The iterator allows you to read an unlimited number of rows and a volume of data without loading the entire result into memory. -However, in order to correctly save the state of the transaction on the `ydb` side, the iterator must be read to the end after each request. -For convenience, the result of the `tx.execute()` function is presented as a context manager that scrolls through the iterator to the end after exiting. +The result of executing `tx.execute()` is an iterator. This iterator allows you to read result rows without loading the entire result set into memory. However, the iterator must be read to the end after each request to correctly maintain the transaction state on the {{ ydb-short-name }} server side. For convenience, the result of the `tx.execute()` function can be used as a context manager that automatically iterates to the end upon exit. {% list tabs %} @@ -394,10 +414,7 @@ For convenience, the result of the `tx.execute()` function is presented as a con {% endlist %} -In the code snippet below, the transaction is executed using the `transaction().execute()` method. The transaction mode is set to `ydb.QuerySerializableReadWrite()`. -The request body is described using YQL syntax and is passed to the `execute` method as the parameter. - -A code snippet demonstrating the explicit use of `transaction().begin()` and `tx.commit()`: +The code snippet below demonstrates the explicit use of `transaction().begin()` and `tx.commit()` with the transaction mode set to `ydb.QuerySerializableReadWrite()`: {% list tabs %} @@ -481,12 +498,11 @@ A code snippet demonstrating the explicit use of `transaction().begin()` and `tx {% endlist %} -However, it is worth remembering that a transaction can be opened implicitly at the first request. It could be commited automatically with the explicit indication of the `commit_tx=True` flag. -Implicit transaction management is preferable because it requires fewer server calls. An example of implicit control will be demonstrated in the next block. +However, a transaction can be opened implicitly with the first request and can be committed automatically by setting the `commit_tx=True` flag in arguments. Implicit transaction management is preferable because it requires fewer server calls. -## Huge Selects {#huge-selects} +## Iterating over query results {#iterating} -To perform `SELECT` operations with an unlimited number of found rows, you must also use the `transaction().execute(query)` method. As mentioned above, the result of the work is an iterator - unlike `pool.execute_with_retries(query)`, it allows you to go through the selection without first loading it into memory. +If a `SELECT` query is expected to return a potentially large number of rows, it is recommended to use the `tx.execute` method instead of `pool.execute_with_retries` to avoid excessive memory consumption on the client side. Instead of buffering the entire result set into memory, `tx.execute` returns an iterator for each top-level `SELECT` statement in the query. Example of a `SELECT` with unlimited data and implicit transaction control: diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index 6dd8a0ac8287..76f0e8f767e5 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -74,7 +74,7 @@ python3 -m pip install iso8601 print("\nCreating table series...") pool.execute_with_retries( """ - CREATE table `series` ( + CREATE TABLE `series` ( `series_id` Int64, `title` Utf8, `series_info` Utf8, @@ -87,7 +87,7 @@ python3 -m pip install iso8601 print("\nCreating table seasons...") pool.execute_with_retries( """ - CREATE table `seasons` ( + CREATE TABLE `seasons` ( `series_id` Int64, `season_id` Int64, `title` Utf8, @@ -101,7 +101,7 @@ python3 -m pip install iso8601 print("\nCreating table episodes...") pool.execute_with_retries( """ - CREATE table `episodes` ( + CREATE TABLE `episodes` ( `series_id` Int64, `season_id` Int64, `episode_id` Int64, From 8c87a9932662ced388487d4fcb4348bf8014ab62 Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Mon, 26 Aug 2024 14:21:10 +0300 Subject: [PATCH 09/11] Fixes after review --- .../en/core/dev/example-app/python/index.md | 48 +++++--- .../ru/core/dev/example-app/python/index.md | 113 +++++++++++------- 2 files changed, 105 insertions(+), 56 deletions(-) diff --git a/ydb/docs/en/core/dev/example-app/python/index.md b/ydb/docs/en/core/dev/example-app/python/index.md index efb07d2f6300..d35e5acbfd8f 100644 --- a/ydb/docs/en/core/dev/example-app/python/index.md +++ b/ydb/docs/en/core/dev/example-app/python/index.md @@ -61,19 +61,38 @@ App code snippet for driver initialization: {% endlist %} +App code snippet for session pool initialization: + +- Synchronous + + ```python + with ydb.QuerySessionPool(driver) as pool: + pass # operations with pool here + ``` + +- Asynchronous + + ```python + async with ydb.aio.QuerySessionPoolAsync(driver) as pool: + pass # operations with pool here + ``` + +{% endlist %} + ## Executing queries -The {{ ydb-short-name }} Python SDK provides two primary methods for executing queries, each with different properties and use cases: +{{ ydb-short-name }} Python SDK supports queries described by YQL syntax. +There are two primary methods for executing queries, each with different properties and use cases: * `pool.execute_with_retries`: * Buffers the entire result set in client memory. * Automatically retries execution in case of retriable issues. * Does not allow specifying a transaction execution mode. * Recommended for one-off queries that are expected to produce small result sets. - + * `tx.execute`: * Returns an iterator over the query results, allowing processing of results that may not fit into client memory. - * Retries must be handled manually. + * Retries must be handled manually via `pool.retry_operation_sync`. * Allows specifying a transaction execution mode. * Recommended for scenarios where `pool.execute_with_retries` is insufficient. @@ -177,7 +196,6 @@ To execute `CREATE TABLE` queries, use the `pool.execute_with_retries()` method: {% endlist %} - {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} Code snippet for data insert/update: @@ -274,7 +292,7 @@ To execute YQL queries, the `pool.execute_with_retries()` method is often suffic {% endlist %} -As the result of executing the query, a `result_set` is returned, iterating on which the text is output to the console: +As the result of executing the query, a list of `result_set` is returned, iterating on which the text is output to the console: ```bash > SelectSimple: @@ -285,9 +303,9 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 For parameterized query execution, `pool.execute_with_retries()` and `tx.execute()` behave similarly. To execute parameterized queries, you need to pass a dictionary with parameters to one of these functions, where each key is the parameter name, and the value can be one of the following: -1. A value of a basic Python type -2. A tuple containing the value and its type -3. A special type, `ydb.TypedValue(value=value, value_type=value_type)` +1. A value of a basic Python type +2. A tuple containing the value and its type +3. A special type, `ydb.TypedValue(value=value, value_type=value_type)` If you specify a value without an explicit type, the conversion takes place according to the following rules: @@ -303,7 +321,7 @@ If you specify a value without an explicit type, the conversion takes place acco {% note warning %} -Automatic conversion of lists and dictionaries is possible only if the structures are homogeneous. The type of nested values will be determined recursively according to the rules explained above. +Automatic conversion of lists and dictionaries is possible only if the structures are homogeneous. The type of nested values will be determined recursively according to the rules explained above. In case of using heterogeneous structures, requests will raise `TypeError`. {% endnote %} @@ -394,7 +412,7 @@ Available transaction modes: For more information about transaction modes, see [{#T}](../../../concepts/transactions.md#modes). -The result of executing `tx.execute()` is an iterator. This iterator allows you to read result rows without loading the entire result set into memory. However, the iterator must be read to the end after each request to correctly maintain the transaction state on the {{ ydb-short-name }} server side. For convenience, the result of the `tx.execute()` function can be used as a context manager that automatically iterates to the end upon exit. +The result of executing `tx.execute()` is an iterator. This iterator allows you to read result rows without loading the entire result set into memory. However, the iterator must be read to the end after each request to correctly maintain the transaction state on the {{ ydb-short-name }} server side. If this is not done, write queries could not be applied on the {{ ydb-short-name }} server side. For convenience, the result of the `tx.execute()` function can be used as a context manager that automatically iterates to the end upon exit. {% list tabs %} @@ -414,7 +432,7 @@ The result of executing `tx.execute()` is an iterator. This iterator allows you {% endlist %} -The code snippet below demonstrates the explicit use of `transaction().begin()` and `tx.commit()` with the transaction mode set to `ydb.QuerySerializableReadWrite()`: +The code snippet below demonstrates the explicit use of `transaction().begin()` and `tx.commit()`: {% list tabs %} @@ -434,7 +452,7 @@ The code snippet below demonstrates the explicit use of `transaction().begin()` """ # Get newly created transaction id - tx = session.transaction(ydb.QuerySerializableReadWrite()).begin() + tx = session.transaction().begin() # Execute data query. # Transaction control settings continues active transaction (tx) @@ -474,7 +492,7 @@ The code snippet below demonstrates the explicit use of `transaction().begin()` """ # Get newly created transaction id - tx = await session.transaction(ydb.QuerySerializableReadWrite()).begin() + tx = await session.transaction().begin() # Execute data query. # Transaction control settings continues active transaction (tx) @@ -515,7 +533,7 @@ Example of a `SELECT` with unlimited data and implicit transaction control: def callee(session: ydb.QuerySessionSync): query = """SELECT * from episodes;""" - with session.transaction().execute( + with session.transaction(ydb.QuerySnapshotReadOnly()).execute( query, commit_tx=True, ) as result_sets: @@ -534,7 +552,7 @@ Example of a `SELECT` with unlimited data and implicit transaction control: async def callee(session: ydb.aio.QuerySessionAsync): query = """SELECT * from episodes;""" - async with await session.transaction().execute( + async with await session.transaction(ydb.QuerySnapshotReadOnly()).execute( query, commit_tx=True, ) as result_sets: diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index 76f0e8f767e5..e2a306490748 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -59,11 +59,44 @@ python3 -m pip install iso8601 exit(1) ``` +Фрагмент кода приложения для создания пуля сессий: + +- Синхронный + + ```python + with ydb.QuerySessionPool(driver) as pool: + pass # operations with pool here + ``` + +- Асинхронный + + ```python + async with ydb.aio.QuerySessionPoolAsync(driver) as pool: + pass # operations with pool here + ``` + {% endlist %} +## Выполнение запросов + +{{ ydb-short-name }} Python SDK поддерживает выполнение запросов с использованием синтаксиса YQL. +Существует два основных метода для выполнения запросов, которые имеют различные свойства и области применения: + +* `pool.execute_with_retries`: + * Буферизует весь результат в памяти клиента. + * Автоматически перезапускает выполнение в случае ошибок, которые можно устранить перезапуском. + * Не позволяет указать режим выполнения транзакции. + * Рекомендуется для разовых запросов, которые возвращают небольшой по размеру результат. + +* `tx.execute`: + * Возвращает итератор над результатом запроса, что позволяет обработать результат, который может не поместиться в памяти клиента. + * Перезапуски в случае ошибок должны обрабатываться вручную с помощью `pool.retry_operation_sync`. + * Позволяет указать режим выполнения транзакции. + * Рекомендуется для сценариев, где `pool.execute_with_retries` неэффективен. + {% include [create_table.md](../_includes/steps/02_create_table.md) %} -Для выполнения YQL запросов используется метод `pool.execute_with_retries()`. Например, можно создать строковые таблицы: +Для выполнения запросов `CREATE TABLE` стоит использовать метод `pool.execute_with_retries()`: {% list tabs %} @@ -120,7 +153,7 @@ python3 -m pip install iso8601 print("\nCreating table series...") await pool.execute_with_retries( """ - CREATE table `series` ( + CREATE TABLE `series` ( `series_id` Int64, `title` Utf8, `series_info` Utf8, @@ -133,7 +166,7 @@ python3 -m pip install iso8601 print("\nCreating table seasons...") await pool.execute_with_retries( """ - CREATE table `seasons` ( + CREATE TABLE `seasons` ( `series_id` Int64, `season_id` Int64, `title` Utf8, @@ -147,7 +180,7 @@ python3 -m pip install iso8601 print("\nCreating table episodes...") await pool.execute_with_retries( """ - CREATE table `episodes` ( + CREATE TABLE `episodes` ( `series_id` Int64, `season_id` Int64, `episode_id` Int64, @@ -161,8 +194,6 @@ python3 -m pip install iso8601 {% endlist %} -Функция `pool.execute_with_retries(query)`, в отличие от `tx.execute()`, загружает в память результат запроса перед его возвращением клиенту. Благодаря этому отпадает необходимость использования специальных конструкций для контроля над итератором, однако необходимо с осторожностью применять этот метод для больших запросов `SELECT`. Подробнее о стримах будет сказано ниже. - {% include [steps/03_write_queries.md](../_includes/steps/03_write_queries.md) %} Фрагмент кода, демонстрирующий выполнение запроса на запись/изменение данных: @@ -197,7 +228,7 @@ python3 -m pip install iso8601 {% include [steps/04_query_processing.md](../_includes/steps/04_query_processing.md) %} -Для выполнения YQL-запросов чаще всего достаточно использования уже знакомого метода `pool.execute_with_retries()`. +Для выполнения YQL-запросов метод часто эффективен метод `pool.execute_with_retries()`. {% list tabs %} @@ -259,7 +290,7 @@ python3 -m pip install iso8601 {% endlist %} -В качестве результата выполнения запроса возвращается `result_set`, итерирование по которому выводит на консоль текст: +В качестве результата выполнения запроса возвращается список из `result_set`, итерирование по которым выводит на консоль текст: ```bash > SelectSimple: @@ -268,21 +299,27 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 ## Параметризованные запросы {#param-queries} -Для выполнения параметризованных запросов в метод `pool.execute_with_retries()` (или `tx.execute()`, работа с которым будет показана в следующей секции) необходимо передать словарь с параметрами специального вида, где ключом служит имя параметра, а значение может быть одним из следующих: -1. Обычное значение; -2. Кортеж со значением и типом; -3. Специальный тип `ydb.TypedValue(value=value, value_type=value_type)`. +Для выполнения параметризованных запросов методы `pool.execute_with_retries()` и `tx.execute()` работают схожим образом - необходимо передать словарь с параметрами специального вида, где ключом служит имя параметра, а значение может быть одним из следующих: + +1. Обычное значение +2. Кортеж со значением и типом +3. Специальный тип `ydb.TypedValue(value=value, value_type=value_type)` В случае указания значения без типа, конвертация происходит по следующим правилам: -* `int` -> `ydb.PrimitiveType.Int64` -* `float` -> `ydb.PrimitiveType.Double` -* `str` -> `ydb.PrimitiveType.Utf8` -* `bytes` -> `ydb.PrimitiveType.String` -* `bool` -> `ydb.PrimitiveType.Bool` -* `list` -> `ydb.ListType` -* `dict` -> `ydb.DictType` -Автоматическая конвертация списков и словарей возможна только в случае однородных структур, тип вложенного значения будет вычисляться рекурсивно по вышеупомянутым правилам. +| Python type | {{ ydb-short-name }} type | +|------------|------------------------------| +| `int` | `ydb.PrimitiveType.Int64` | +| `float` | `ydb.PrimitiveType.Double` | +| `str` | `ydb.PrimitiveType.Utf8` | +| `bytes` | `ydb.PrimitiveType.String` | +| `bool` | `ydb.PrimitiveType.Bool` | +| `list` | `ydb.ListType` | +| `dict` | `ydb.DictType` | + +{% note warning %} + +Автоматическая конвертация списков и словарей возможна только в случае однородных структур. Тип вложенного значения будет вычисляться рекурсивно по вышеупомянутым правилам. В случае использования неоднородной структуры запросы будут падать с ошибкой типа `TypeError`. Фрагмент кода, демонстрирующий возможность использования параметризованных запросов: @@ -306,8 +343,8 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 """, { "$seriesId": series_id, # data type could be defined implicitly - "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple - "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via a tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via a special class }, ) @@ -336,9 +373,9 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 WHERE series_id = $seriesId AND season_id = $seasonId AND episode_id = $episodeId; """, { - "$seriesId": series_id, # could be defined implicit - "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via tuple - "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via special class + "$seriesId": series_id, # could be defined implicitly + "$seasonId": (season_id, ydb.PrimitiveType.Int64), # could be defined via a tuple + "$episodeId": ydb.TypedValue(episode_id, ydb.PrimitiveType.Int64), # could be defined via a special class }, ) @@ -352,7 +389,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 {% endlist %} -Приведенный фрагмент кода при запуске выводит на консоль текст: +Фрагмент кода выше при запуске выводит на консоль текст: ```bash > select_prepared_transaction: @@ -362,8 +399,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 {% include [transaction_control.md](../_includes/steps/10_transaction_control.md) %} -Для выполнения YQL-запросов также может использоваться метод `session.transaction().execute()`. -Данный способ, в отличие от `pool.execute_with_retries` позволяет в явном виде контролировать выполнение транзакций и настраивать необходимый режим выполнения транзакций с помощью класса `TxControl`. +Метод `session.transaction().execute()` так же может быть использован для выполнения YQL запросов. В отличие от `pool.execute_with_retries`, данный метод позволяет в явном виде контролировать выполнение транзакций и настраивать необходимый режим выполнения транзакций с помощью класса `TxControl`. Доступные режимы транзакции: * `ydb.QuerySerializableReadWrite()` (по умолчанию); @@ -371,11 +407,9 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 * `ydb.QuerySnapshotReadOnly()`; * `ydb.QueryStaleReadOnly()`. -Подробнее про режимы транзакций описано в [документации YDB](https://ydb.tech/docs/ru/concepts/transactions#modes). +Подробнее про режимы транзакций описано в [{#T}](../../../concepts/transactions.md#modes). -Результатом выполнения `tx.execute()` является итератор. Итератор позволяет считать неограниченное количество строк и объем данных, не загружая в память весь результат. -Однако, для корректного сохранения состояния транзакции на стороне `ydb` итератор необходимо прочитывать до конца после каждого запроса. -Для удобства результат функции `tx.execute()` представлен в виде контекстного менеджера, который долистывает итератор до конца после выхода. +Результатом выполнения `tx.execute()` является итератор. Итератор позволяет считать неограниченное количество строк и объем данных, не загружая в память весь результат. Однако, для корректного сохранения состояния транзакции на стороне {{ ydb-short-name }} итератор необходимо прочитывать до конца после каждого запроса. Если этого не сделать, пишущие запросы могут не выполниться на стороне {{ ydb-short-name }}. Для удобства результат функции `tx.execute()` представлен в виде контекстного менеджера, который долистывает итератор до конца после выхода. {% list tabs %} @@ -395,9 +429,6 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 {% endlist %} -В фрагменте кода, приведенном ниже, транзакция выполняется с помощью метода `transaction().execute()`. Устанавливается режим выполнения транзакции `ydb.QuerySerializableReadWrite()`. -Тело запроса описано с помощью синтаксиса YQL и как параметр передается методу `execute`. - Фрагмент кода, демонстрирующий явное использование вызовов `transaction().begin()` и `tx.commit()`: {% list tabs %} @@ -418,7 +449,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 """ # Get newly created transaction id - tx = session.transaction(ydb.QuerySerializableReadWrite()).begin() + tx = session.transaction().begin() # Execute data query. # Transaction control settings continues active transaction (tx) @@ -458,7 +489,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 """ # Get newly created transaction id - tx = await session.transaction(ydb.QuerySerializableReadWrite()).begin() + tx = await session.transaction().begin() # Execute data query. # Transaction control settings continues active transaction (tx) @@ -485,9 +516,9 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 Однако стоит помнить, что транзакция может быть открыта неявно при первом запросе. Завершиться же она может автоматически с явным указанием флага `commit_tx=True`. Неявное управление транзакцией предпочтительно, так как требует меньше обращений к серверу. Пример неявного управления будет продемонстрирован в следующем блоке. -## Чтение неограниченной выборки {#huge-selects} +## Итерирование по результатам запроса {#iterating} -Для выполнения операций `SELECT` с неограниченным количеством найденных строк нужно также использовать метод `transaction().execute(query)`. Как было сказано выше, результатом работы является итератор - он, в отличие от `pool.execute_with_retries(query)`, позволяет пройтись по выборке не загружая ее предварительно в память. +Если ожидается, что результат `SELECT` запроса будет иметь потенциально большое количество найденных строк, рекомендуется использовать метод `tx.execute` вместо `pool.execute_with_retries` для избежания чрезмерного потребления памяти на стороне клиента. Пример `SELECT` с неограниченным количеством данных и неявным контролем транзакции: @@ -500,7 +531,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 def callee(session: ydb.QuerySessionSync): query = """SELECT * from episodes;""" - with session.transaction().execute( + with session.transaction(ydb.QuerySnapshotReadOnly()).execute( query, commit_tx=True, ) as result_sets: @@ -519,7 +550,7 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 async def callee(session: ydb.aio.QuerySessionAsync): query = """SELECT * from episodes;""" - async with await session.transaction().execute( + async with await session.transaction(ydb.QuerySnapshotReadOnly()).execute( query, commit_tx=True, ) as result_sets: From b7816794dfe767d48040718a804809331ee99d62 Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Mon, 26 Aug 2024 14:29:16 +0300 Subject: [PATCH 10/11] fix list tabs --- ydb/docs/en/core/dev/example-app/python/index.md | 2 ++ ydb/docs/ru/core/dev/example-app/python/index.md | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/ydb/docs/en/core/dev/example-app/python/index.md b/ydb/docs/en/core/dev/example-app/python/index.md index d35e5acbfd8f..844fb06d4c57 100644 --- a/ydb/docs/en/core/dev/example-app/python/index.md +++ b/ydb/docs/en/core/dev/example-app/python/index.md @@ -63,6 +63,8 @@ App code snippet for driver initialization: App code snippet for session pool initialization: +{% list tabs %} + - Synchronous ```python diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index e2a306490748..acf531af1bbf 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -59,8 +59,12 @@ python3 -m pip install iso8601 exit(1) ``` +{% endlist %} + Фрагмент кода приложения для создания пуля сессий: +{% list tabs %} + - Синхронный ```python @@ -321,6 +325,8 @@ series, Id: 1, title: IT Crowd, Release date: 2006-02-03 Автоматическая конвертация списков и словарей возможна только в случае однородных структур. Тип вложенного значения будет вычисляться рекурсивно по вышеупомянутым правилам. В случае использования неоднородной структуры запросы будут падать с ошибкой типа `TypeError`. +{% endnote %} + Фрагмент кода, демонстрирующий возможность использования параметризованных запросов: {% list tabs %} From cf7449b0e534f820cb179b50a2fb9fd7774229be Mon Sep 17 00:00:00 2001 From: Oleg Ovcharuk Date: Mon, 26 Aug 2024 14:52:38 +0300 Subject: [PATCH 11/11] fix typing --- ydb/docs/ru/core/dev/example-app/python/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/docs/ru/core/dev/example-app/python/index.md b/ydb/docs/ru/core/dev/example-app/python/index.md index acf531af1bbf..2f25d8230d83 100644 --- a/ydb/docs/ru/core/dev/example-app/python/index.md +++ b/ydb/docs/ru/core/dev/example-app/python/index.md @@ -61,7 +61,7 @@ python3 -m pip install iso8601 {% endlist %} -Фрагмент кода приложения для создания пуля сессий: +Фрагмент кода приложения для создания пула сессий: {% list tabs %}