Skip to content

Commit 58d1fdc

Browse files
author
Felipe Miquelim
authored
[DAE-94] Add get_table_partition_keys and keys_names methods (#43)
1 parent 740978d commit 58d1fdc

File tree

5 files changed

+176
-1
lines changed

5 files changed

+176
-1
lines changed

docs/source/getstarted.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ Click on the following links to open the [examples](https://github.com/quintoand
2828

2929
**[#5 Add partitions to a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/add_partitions.py)**
3030

31+
**[#6 Get partition keys objects from a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/get_partition_keys_objects.py)**
32+
33+
**[#7 Get partition keys names from a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/get_partition_keys_names.py)**
34+
3135
## Available methods
3236

3337
You can see all the Hive Metastore server available methods by looking at the
@@ -41,4 +45,6 @@ the [`HiveMetastoreClient`](https://github.com/quintoandar/hive-metastore-client
4145
- [`drop_columns_from_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.drop_columns_from_table)
4246
- [`add_partitions_if_not_exists`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.add_partitions_if_not_exists)
4347
- [`create_database_if_not_exists`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_database_if_not_exists)
44-
- [`create_external_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_external_table)
48+
- [`create_external_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_external_table)
49+
- [`get_partition_keys_objects`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_keys_objects)
50+
- [`get_partition_keys_names`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_keys_names)

examples/get_partition_keys_names.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from hive_metastore_client import HiveMetastoreClient
2+
3+
HIVE_HOST = "<ADD_HIVE_HOST_HERE>"
4+
HIVE_PORT = 9083
5+
6+
DATABASE_NAME = "database_name"
7+
TABLE_NAME = "table_name"
8+
9+
with HiveMetastoreClient(HIVE_HOST, HIVE_PORT) as hive_client:
10+
# Retrieving the partition keys names via table schema
11+
hive_client.get_partition_keys_names(DATABASE_NAME, TABLE_NAME)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from hive_metastore_client import HiveMetastoreClient
2+
3+
HIVE_HOST = "<ADD_HIVE_HOST_HERE>"
4+
HIVE_PORT = 9083
5+
6+
DATABASE_NAME = "database_name"
7+
TABLE_NAME = "table_name"
8+
9+
with HiveMetastoreClient(HIVE_HOST, HIVE_PORT) as hive_client:
10+
# Retrieving the partition keys via table schema
11+
hive_client.get_partition_keys_objects(DATABASE_NAME, TABLE_NAME)

hive_metastore_client/hive_metastore_client.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,33 @@ def _validate_lists_length(list_a: List[Any], list_b: List[Any]) -> None:
229229
"m=_validate_lists_length, msg=The length of the two provided "
230230
"lists does not match"
231231
)
232+
233+
def get_partition_keys_objects(
234+
self, db_name: str, table_name: str
235+
) -> List[FieldSchema]:
236+
"""
237+
Gets the partition keys objects, containing the metadata, from a table.
238+
239+
An empty list will be returned when no table is found or
240+
when the table has no partitions
241+
242+
:param db_name: database name where the table is at
243+
:param table_name: table name which the partition keys belong to
244+
"""
245+
table = self.get_table(dbname=db_name, tbl_name=table_name)
246+
return list(table.partitionKeys) if table else []
247+
248+
def get_partition_keys_names(self, db_name: str, table_name: str) -> List[str]:
249+
"""
250+
Gets the partition keys names from a table.
251+
252+
An empty list will be returned when no table is found or
253+
when the table has no partitions
254+
255+
:param db_name: database name where the table is at
256+
:param table_name: table name which the partition keys belong to
257+
"""
258+
partition_keys = self.get_partition_keys_objects(
259+
db_name=db_name, table_name=table_name
260+
)
261+
return [partition.name for partition in partition_keys]

tests/unit/hive_metastore_client/test_hive_metastore_client.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,3 +317,120 @@ def test_create_external_table(self, mocked_create_table, hive_metastore_client)
317317

318318
# assert
319319
mocked_create_table.assert_called_once_with(updated_table)
320+
321+
@mock.patch.object(HiveMetastoreClient, "get_table", return_value=None)
322+
def test_get_partition_keys_objects_with_invalid_table(
323+
self, mocked_get_table, hive_metastore_client,
324+
):
325+
# arrange
326+
table_name = "table_name"
327+
database_name = "database_name"
328+
329+
# act
330+
returned_value = hive_metastore_client.get_partition_keys_objects(
331+
database_name, table_name
332+
)
333+
334+
# assert
335+
assert returned_value == []
336+
mocked_get_table.assert_called_once_with(
337+
dbname=database_name, tbl_name=table_name
338+
)
339+
340+
@mock.patch.object(HiveMetastoreClient, "get_table")
341+
def test_get_partition_keys_objects_with_not_partitioned_table(
342+
self, mocked_get_table, hive_metastore_client,
343+
):
344+
# arrange
345+
table_name = "table_name"
346+
database_name = "database_name"
347+
mocked_table = Mock()
348+
# the default return from hive metastore for not partitioned tables is an empty list
349+
mocked_table.partitionKeys = []
350+
mocked_get_table.return_value = mocked_table
351+
352+
# act
353+
returned_value = hive_metastore_client.get_partition_keys_objects(
354+
database_name, table_name
355+
)
356+
357+
# assert
358+
assert returned_value == []
359+
mocked_get_table.assert_called_once_with(
360+
dbname=database_name, tbl_name=table_name
361+
)
362+
363+
@mock.patch.object(HiveMetastoreClient, "get_table")
364+
def test_get_partition_keys_objects_with_partitioned_table(
365+
self, mocked_get_table, hive_metastore_client,
366+
):
367+
# arrange
368+
table_name = "table_name"
369+
database_name = "database_name"
370+
mocked_table = Mock()
371+
mocked_partition_a = Mock()
372+
mocked_partition_b = Mock()
373+
mocked_table.partitionKeys = [mocked_partition_a, mocked_partition_b]
374+
mocked_get_table.return_value = mocked_table
375+
376+
# act
377+
returned_value = hive_metastore_client.get_partition_keys_objects(
378+
database_name, table_name
379+
)
380+
381+
# assert
382+
assert returned_value == [mocked_partition_a, mocked_partition_b]
383+
mocked_get_table.assert_called_once_with(
384+
dbname=database_name, tbl_name=table_name
385+
)
386+
387+
@mock.patch.object(
388+
HiveMetastoreClient, "get_partition_keys_objects", return_value=[]
389+
)
390+
def test_get_partition_keys_names_with_invalid_or_not_partitioned_table(
391+
self, mocked_get_partition_keys_objects, hive_metastore_client,
392+
):
393+
# arrange
394+
table_name = "table_name"
395+
database_name = "database_name"
396+
397+
# act
398+
returned_value = hive_metastore_client.get_partition_keys_names(
399+
database_name, table_name
400+
)
401+
402+
# assert
403+
assert returned_value == []
404+
mocked_get_partition_keys_objects.assert_called_once_with(
405+
db_name=database_name, table_name=table_name
406+
)
407+
408+
@mock.patch.object(
409+
HiveMetastoreClient, "get_partition_keys_objects", return_value=[]
410+
)
411+
def test_get_partition_keys_names_with_partitioned_table(
412+
self, mocked_get_partition_keys_objects, hive_metastore_client,
413+
):
414+
# arrange
415+
table_name = "table_name"
416+
database_name = "database_name"
417+
mocked_partition_a = Mock()
418+
mocked_partition_a.name = "mocked_partition_a"
419+
mocked_partition_b = Mock()
420+
mocked_partition_b.name = "mocked_partition_b"
421+
mocked_get_partition_keys_objects.return_value = [
422+
mocked_partition_a,
423+
mocked_partition_b,
424+
]
425+
expected_return = ["mocked_partition_a", "mocked_partition_b"]
426+
427+
# act
428+
returned_value = hive_metastore_client.get_partition_keys_names(
429+
database_name, table_name
430+
)
431+
432+
# assert
433+
assert returned_value == expected_return
434+
mocked_get_partition_keys_objects.assert_called_once_with(
435+
db_name=database_name, table_name=table_name
436+
)

0 commit comments

Comments
 (0)