Skip to content

Commit 740978d

Browse files
author
Lucas Mendes Mota da Fonseca
authored
[DAE-92] Add method create_external_table (#42)
* Add new method create_external_table * Add example for create_external_table * Update methods list * Remove uncessary assertion from test * Update docstring * Add example in docs
1 parent 30516c5 commit 740978d

File tree

5 files changed

+116
-6
lines changed

5 files changed

+116
-6
lines changed

docs/source/getstarted.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ Click on the following links to open the [examples](https://github.com/quintoand
2222

2323
**[#2 Create a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/create_table.py)**
2424

25-
**[#3 Add columns to a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/add_columns_to_table.py)**
25+
**[#3 Create an external table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/create_external_table.py)**
2626

27-
**[#4 Add partitions to a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/add_partitions.py)**
27+
**[#4 Add columns to a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/add_columns_to_table.py)**
28+
29+
**[#5 Add partitions to a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/add_partitions.py)**
2830

2931
## Available methods
3032

@@ -38,4 +40,5 @@ the [`HiveMetastoreClient`](https://github.com/quintoandar/hive-metastore-client
3840
- [`add_columns_to_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.add_columns_to_table)
3941
- [`drop_columns_from_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.drop_columns_from_table)
4042
- [`add_partitions_if_not_exists`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.add_partitions_if_not_exists)
41-
- [`create_database_if_not_exists`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_database_if_not_exists)
43+
- [`create_database_if_not_exists`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_database_if_not_exists)
44+
- [`create_external_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_external_table)

examples/create_external_table.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
The thrift Table object requires others objects as arguments.
3+
Use the builders for creating each of them.
4+
Some arguments are optional when creating a thrift object.
5+
Check each Builder constructor for more information.
6+
7+
Due to a bug in Hive Metastore server we need to enforce the parameter
8+
EXTERNAL=TRUE when creating an external table. You can either use the
9+
method `create_external_table` with the table object or declare the two
10+
table parameters before calling the method create_table.
11+
"""
12+
13+
from hive_metastore_client import HiveMetastoreClient
14+
from hive_metastore_client.builders import (
15+
ColumnBuilder,
16+
SerDeInfoBuilder,
17+
StorageDescriptorBuilder,
18+
TableBuilder,
19+
)
20+
21+
HIVE_HOST = "<ADD_HIVE_HOST_HERE>"
22+
HIVE_PORT = 9083
23+
24+
# You must create a list with the columns
25+
columns = [
26+
ColumnBuilder("id", "string", "col comment").build(),
27+
ColumnBuilder("client_name", "string").build(),
28+
ColumnBuilder("amount", "string").build(),
29+
ColumnBuilder("year", "string").build(),
30+
ColumnBuilder("month", "string").build(),
31+
ColumnBuilder("day", "string").build(),
32+
]
33+
34+
# If you table has partitions create a list with the partition columns
35+
# This list is similar to the columns list, and the year, month and day
36+
# columns are the same.
37+
partition_keys = [
38+
ColumnBuilder("year", "string").build(),
39+
ColumnBuilder("month", "string").build(),
40+
ColumnBuilder("day", "string").build(),
41+
]
42+
43+
serde_info = SerDeInfoBuilder(
44+
serialization_lib="org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
45+
).build()
46+
47+
storage_descriptor = StorageDescriptorBuilder(
48+
columns=columns,
49+
location="s3a://path/to/file",
50+
input_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
51+
output_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
52+
serde_info=serde_info,
53+
).build()
54+
55+
table = TableBuilder(
56+
table_name="orders",
57+
db_name="store",
58+
owner="owner name",
59+
storage_descriptor=storage_descriptor,
60+
partition_keys=partition_keys,
61+
).build()
62+
63+
with HiveMetastoreClient(HIVE_HOST, HIVE_PORT) as hive_metastore_client:
64+
# Creating new table from thrift table object
65+
hive_metastore_client.create_external_table(table)

examples/create_table.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
Use the builders for creating each of them.
44
Some arguments are optional when creating a thrift object.
55
Check each Builder constructor for more information.
6+
7+
Note: if you want to create an external table refer to the
8+
example create_external_table.py.
69
"""
710

811
from hive_metastore_client import HiveMetastoreClient
@@ -41,9 +44,9 @@
4144

4245
storage_descriptor = StorageDescriptorBuilder(
4346
columns=columns,
44-
location="path/to/file",
45-
input_format="org.apache.hadoop.mapred.TextInputFormat",
46-
output_format="org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
47+
location="s3a://path/to/file",
48+
input_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
49+
output_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
4750
serde_info=serde_info,
4851
).build()
4952

hive_metastore_client/hive_metastore_client.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
FieldSchema,
1515
Database,
1616
AlreadyExistsException,
17+
Table,
1718
)
1819

1920

@@ -159,6 +160,22 @@ def create_database_if_not_exists(self, database: Database) -> None:
159160
except AlreadyExistsException:
160161
pass
161162

163+
def create_external_table(self, table: Table) -> None:
164+
"""
165+
Creates an external table in Hive Metastore.
166+
167+
When a table is created with tableType default (None) or equal to
168+
EXTERNAL_TABLE there is an error in the server that creates the table
169+
as a MANAGED_TABLE.
170+
This method enforces the parameter EXTERNAL=TRUE so the table is
171+
created correctly.
172+
173+
:param table: the table object
174+
"""
175+
table.parameters = {"EXTERNAL": "TRUE"}
176+
table.tableType = "EXTERNAL_TABLE"
177+
self.create_table(table)
178+
162179
@staticmethod
163180
def _format_partitions_location(
164181
partition_list: List[Partition],

tests/unit/hive_metastore_client/test_hive_metastore_client.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
from copy import copy
12
from unittest import mock
23
from unittest.mock import Mock, ANY
34

45
import pytest
56
from pytest import raises
67

78
from hive_metastore_client import HiveMetastoreClient
9+
from hive_metastore_client.builders import TableBuilder
810
from thrift_files.libraries.thrift_hive_metastore_client.ThriftHiveMetastore import (
911
Client as ThriftClient,
1012
)
@@ -295,3 +297,23 @@ def test_create_database_if_not_exists_with_existent_database(
295297

296298
# assert
297299
mocked_create_database.assert_called_once_with(mocked_database_obj)
300+
301+
@mock.patch.object(HiveMetastoreClient, "create_table")
302+
def test_create_external_table(self, mocked_create_table, hive_metastore_client):
303+
# arrange
304+
table = TableBuilder(
305+
table_name="table_name",
306+
db_name="database_name",
307+
owner="owner",
308+
storage_descriptor=Mock(),
309+
partition_keys=[],
310+
).build()
311+
updated_table = copy(table)
312+
updated_table.parameters = {"EXTERNAL": "TRUE"}
313+
updated_table.tableType = "EXTERNAL_TABLE"
314+
315+
# act
316+
hive_metastore_client.create_external_table(table)
317+
318+
# assert
319+
mocked_create_table.assert_called_once_with(updated_table)

0 commit comments

Comments
 (0)