Skip to content

Commit d258830

Browse files
authored
Adding method to drop columns from a table (#30)
1 parent 6909ebb commit d258830

File tree

3 files changed

+72
-0
lines changed

3 files changed

+72
-0
lines changed

examples/drop_columns_from_table.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from hive_metastore_client import HiveMetastoreClient
2+
3+
HIVE_HOST = "<ADD_HIVE_HOST_HERE>"
4+
HIVE_PORT = 9083
5+
6+
# You must create a list with the columns' names to drop
7+
columns = ["quantity"]
8+
9+
with HiveMetastoreClient(HIVE_HOST, HIVE_PORT) as hive_client:
10+
# Dropping columns from table
11+
hive_client.drop_columns_from_table(
12+
db_name="store", table_name="order", columns=columns
13+
)

hive_metastore_client/hive_mestastore_client.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,32 @@ def add_columns_to_table(
8484
# call alter table to add columns
8585
self.alter_table(dbname=db_name, tbl_name=table_name, new_tbl=table)
8686

87+
def drop_columns_from_table(
88+
self, db_name: str, table_name: str, columns: List[str]
89+
) -> None:
90+
"""
91+
Drops columns from a table.
92+
93+
It encapsulates the logic of calling alter table with removed columns from
94+
the list of columns, since hive does not have a drop command.
95+
96+
:param db_name: database name of the table
97+
:param table_name: table name
98+
:param columns: names of the columns to be dropped from the table
99+
"""
100+
if columns:
101+
table = self.get_table(dbname=db_name, tbl_name=table_name)
102+
103+
# remove columns from the list of columns in table object
104+
cols = []
105+
for col in table.sd.cols:
106+
if col.name not in columns:
107+
cols.append(col)
108+
table.sd.cols = cols
109+
110+
# call alter table to drop columns removed from list of table columns
111+
self.alter_table(dbname=db_name, tbl_name=table_name, new_tbl=table)
112+
87113
def add_partitions_to_table(
88114
self, db_name: str, table_name: str, partition_list: List[Partition]
89115
) -> None:

tests/unit/hive_metastore_client/test_hive_mestastore_client.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import pytest
55
from hive_metastore_client import HiveMetastoreClient
6+
from thrift_files.libraries.thrift_hive_metastore_client.ttypes import FieldSchema
67

78

89
class TestHiveMetastoreClient:
@@ -93,6 +94,38 @@ def test_add_columns_to_table(
9394
dbname=db_name, tbl_name=table_name, new_tbl=mocked_return_get_table
9495
)
9596

97+
@mock.patch.object(HiveMetastoreClient, "get_table")
98+
@mock.patch.object(HiveMetastoreClient, "alter_table")
99+
def test_drop_columns_from_table(
100+
self, mocked_alter_table, mocked_get_table, hive_metastore_client
101+
):
102+
# arrange
103+
db_name = "db_name"
104+
table_name = "table_name"
105+
cols = ["col1", "col2"]
106+
107+
mocked_return_get_table = Mock()
108+
mocked_return_get_table.sd.cols = [
109+
FieldSchema(name="col1"),
110+
FieldSchema(name="col2"),
111+
FieldSchema(name="col3"),
112+
]
113+
mocked_get_table.return_value = mocked_return_get_table
114+
expected_table_column = [FieldSchema(name="col3")]
115+
expected_mocked_table = mocked_return_get_table
116+
expected_mocked_table.sd.cols = expected_table_column
117+
118+
# act
119+
hive_metastore_client.drop_columns_from_table(
120+
db_name=db_name, table_name=table_name, columns=cols
121+
)
122+
123+
# assert
124+
mocked_get_table.assert_called_once_with(dbname=db_name, tbl_name=table_name)
125+
mocked_alter_table.assert_called_once_with(
126+
dbname=db_name, tbl_name=table_name, new_tbl=expected_mocked_table
127+
)
128+
96129
def test__validate_lists_length_with_diff_lens(self, hive_metastore_client):
97130
# arrange
98131
list_a = [1, 2, 3]

0 commit comments

Comments
 (0)