Skip to content

Commit eda9482

Browse files
authored
Merge pull request #19 from dangtrungtin/master
Improve peformances put/get rows using pandas.DataFrame
2 parents 3b8a8ca + fee8818 commit eda9482

14 files changed

+740
-72
lines changed

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ CPPFLAGS = -fPIC -std=c++0x -g -O2
99
INCLUDES = -Iinclude -Isrc
1010

1111
INCLUDES_PYTHON = $(INCLUDES) \
12-
-I/usr/include/python3.6
12+
-I/usr/include/python3.6 \
13+
-I$(HOME)/.pyenv/versions/3.6.4/lib/python3.6/site-packages/numpy/core/include
1314

1415
PROGRAM = _griddb_python.so
1516
EXTRA = griddb_python.py griddb_python.pyc
@@ -24,6 +25,7 @@ SOURCES = src/TimeSeriesProperties.cpp \
2425
src/Query.cpp \
2526
src/QueryAnalysisEntry.cpp \
2627
src/RowKeyPredicate.cpp \
28+
src/RowList.cpp \
2729
src/RowSet.cpp \
2830
src/TimestampUtils.cpp \
2931
src/Field.cpp \

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ Set CPATH and LIBRARY_PATH.
4949

5050
export LIBRARY_PATH=$LIBRARY_PATH:<C client library file directory path>
5151

52+
Install Pandas and Numpy as below:
53+
54+
$ python -m pip install numpy
55+
$ python -m pip install pandas
56+
5257
### Build and Run
5358

5459
1. Execute the command on project directory.

sample/FetchRowsWithDataFrame.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/python
2+
3+
import griddb_python as griddb
4+
import sys
5+
import pandas
6+
7+
factory = griddb.StoreFactory.get_instance()
8+
9+
argv = sys.argv
10+
11+
blob = bytearray([65, 66, 67, 68, 69, 70, 71, 72, 73, 74])
12+
containerName = "SamplePython_FetchRows"
13+
update = False
14+
15+
try:
16+
# Get GridStore object
17+
gridstore = factory.get_store(host=argv[1], port=int(argv[2]), cluster_name=argv[3], username=argv[4], password=argv[5])
18+
19+
# Create Collection
20+
conInfo = griddb.ContainerInfo(containerName,
21+
[["name", griddb.Type.STRING],
22+
["status", griddb.Type.BOOL],
23+
["count", griddb.Type.LONG],
24+
["lob", griddb.Type.BLOB]],
25+
griddb.ContainerType.COLLECTION, True)
26+
col = gridstore.put_container(conInfo)
27+
print("Create Collection name=", containerName)
28+
29+
# Put rows
30+
rows = pandas.DataFrame([["name01", True, 1, blob], ["name02", False, 2, blob]])
31+
col.put_rows(rows)
32+
print("Put rows with DataFrame")
33+
34+
# Fetch rows
35+
query = col.query("select *")
36+
rs = query.fetch(update)
37+
print("Fetch rows with DataFrame")
38+
result = rs.fetch_rows()
39+
print(result)
40+
print("Success!")
41+
42+
except griddb.GSException as e:
43+
for i in range(e.get_error_stack_size()):
44+
print("[", i, "]")
45+
print(e.get_error_code(i))
46+
print(e.get_location(i))
47+
print(e.get_message(i))

sample/PutRowsWithDataFrame.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/python
2+
3+
import griddb_python as griddb
4+
import sys
5+
import pandas
6+
7+
factory = griddb.StoreFactory.get_instance()
8+
9+
argv = sys.argv
10+
11+
blob = bytearray([65, 66, 67, 68, 69, 70, 71, 72, 73, 74])
12+
update = False
13+
containerName = "SamplePython_PutRows"
14+
15+
try:
16+
# Get GridStore object
17+
gridstore = factory.get_store(host=argv[1], port=int(argv[2]), cluster_name=argv[3], username=argv[4], password=argv[5])
18+
19+
# Create Collection
20+
conInfo = griddb.ContainerInfo(containerName,
21+
[["name", griddb.Type.STRING],
22+
["status", griddb.Type.BOOL],
23+
["count", griddb.Type.LONG],
24+
["lob", griddb.Type.BLOB]],
25+
griddb.ContainerType.COLLECTION, True)
26+
col = gridstore.put_container(conInfo)
27+
print("Create Collection name=", containerName)
28+
29+
# Put rows
30+
rows = pandas.DataFrame([["name01", False, 1, blob], ["name02", False, 1, blob]])
31+
col.put_rows(rows)
32+
print("Put rows with DataFrame")
33+
print("Success!")
34+
35+
except griddb.GSException as e:
36+
for i in range(e.get_error_stack_size()):
37+
print("[", i, "]")
38+
print(e.get_error_code(i))
39+
print(e.get_location(i))
40+
print(e.get_message(i))

src/Container.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ namespace griddb {
6161
freeMemoryContainer();
6262
throw GSException(mContainer, "Memory allocation error");
6363
}
64-
64+
6565
mContainerInfo->timeSeriesProperties = NULL;
6666
mContainerInfo->triggerInfoList = NULL;
6767
mContainerInfo->dataAffinity = NULL;
@@ -74,8 +74,6 @@ namespace griddb {
7474
}
7575

7676
Container::~Container() {
77-
78-
7977
// allRelated = FALSE, since all row object is managed by Row class
8078
close(GS_FALSE);
8179
}
@@ -409,4 +407,11 @@ namespace griddb {
409407
int Container::getColumnCount(){
410408
return mContainerInfo->columnCount;
411409
}
410+
411+
/**
412+
* @brief Put rows with input is numpy data
413+
*/
414+
void Container::put_rows(GSRow** listRow, int rowCount) {
415+
this->multi_put(listRow, rowCount);
416+
}
412417
}

src/Container.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class Container {
5858
GSType* getGSTypeList();
5959
int getColumnCount();
6060
GSRow* getGSRowPtr();
61+
void put_rows(GSRow** listRow, int rowCount);
6162

6263
private:
6364
Container(GSContainer *container, GSContainerInfo* containerInfo);

src/Query.cpp

100644100755
File mode changed.

src/Query.h

100644100755
File mode changed.

src/RowList.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
Copyright (c) 2017 TOSHIBA Digital Solutions Corporation.
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
Unless required by applicable law or agreed to in writing, software
8+
distributed under the License is distributed on an "AS IS" BASIS,
9+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
See the License for the specific language governing permissions and
11+
limitations under the License.
12+
*/
13+
14+
#include "RowList.h"
15+
16+
namespace griddb {
17+
18+
RowList::RowList(GSRow *gsRow, GSRowSet *gsRowSet, GSType *typelist,
19+
int columnCount, bool timetampFloat) :
20+
mRowSet(gsRowSet), mRow(gsRow), mTypelist(typelist),
21+
mColumnCount(columnCount), mTimetampFloat(timetampFloat) {
22+
}
23+
24+
/**
25+
* Support iterator object.
26+
*/
27+
RowList* RowList::__iter__() {
28+
return this;
29+
}
30+
31+
/**
32+
* Support iterator object: get next row
33+
*/
34+
void RowList::__next__(bool* hasRow) {
35+
*hasRow = gsHasNextRow(mRowSet);
36+
if (*hasRow) {
37+
gsGetNextRow(mRowSet, mRow);
38+
}
39+
}
40+
41+
/**
42+
* Refer GSRow pointer from RowSet
43+
*/
44+
GSRow* RowList::get_gsrow_ptr() {
45+
return this->mRow;
46+
}
47+
48+
/**
49+
* Refer GSType pointer from RowSet
50+
*/
51+
GSType* RowList::get_gstype_list() {
52+
return mTypelist;
53+
}
54+
55+
/**
56+
* Refer number column from RowSet
57+
*/
58+
int RowList::get_column_count() {
59+
return mColumnCount;
60+
}
61+
62+
/**
63+
* Refer number column from RowSet
64+
*/
65+
bool RowList::get_timestamp_to_float() {
66+
return mTimetampFloat;
67+
}
68+
69+
} // namespace griddb

src/RowList.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
Copyright (c) 2017 TOSHIBA Digital Solutions Corporation.
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
Unless required by applicable law or agreed to in writing, software
8+
distributed under the License is distributed on an "AS IS" BASIS,
9+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
See the License for the specific language governing permissions and
11+
limitations under the License.
12+
*/
13+
14+
#ifndef _ROWLIST_H_
15+
#define _ROWLIST_H_
16+
17+
#include "gridstore.h"
18+
19+
namespace griddb {
20+
class RowList {
21+
private:
22+
GSRowSet *mRowSet;
23+
GSRow *mRow;
24+
GSType* mTypelist;
25+
int mColumnCount;
26+
bool mTimetampFloat;
27+
public:
28+
RowList(GSRow *gsRow, GSRowSet *gsRowSet, GSType* typelist, int columnCount,
29+
bool timetampFloat);
30+
void __next__(bool* hasRow);
31+
RowList* __iter__();
32+
GSRow* get_gsrow_ptr();
33+
GSType* get_gstype_list();
34+
int get_column_count();
35+
bool get_timestamp_to_float();
36+
};
37+
} // namespace griddb
38+
39+
#endif // _ROWLIST_H_

src/RowSet.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ namespace griddb {
239239
} catch (bad_alloc& ba) {
240240
throw GSException(mRowSet, "Memory allocation error");
241241
}
242-
242+
243243
for (int i = 0; i < mContainerInfo->columnCount; i++){
244244
typeList[i] = mContainerInfo->columnInfoList[i].type;
245245
}
@@ -263,4 +263,19 @@ namespace griddb {
263263
return mRow;
264264
}
265265

266+
/**
267+
* @brief Get row list data
268+
* @param **row List row data
269+
* @param **rowSet Rowset data
270+
* @return A pointer store row list data in RowList object
271+
*/
272+
griddb::RowList* RowSet::fetch_rows(GSRow **row, GSRowSet **rowSet) {
273+
try {
274+
return new RowList(this->getGSRowPtr(), this->mRowSet,
275+
this->getGSTypeList(), this->getColumnCount(),
276+
this->timestamp_output_with_float);
277+
} catch (bad_alloc &ba) {
278+
throw GSException(mRowSet, "Memory allocation error");
279+
}
280+
}
266281
}

src/RowSet.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "QueryAnalysisEntry.h"
3030
#include "GSException.h"
3131
#include "Util.h"
32+
#include "RowList.h"
3233

3334
using namespace std;
3435

@@ -67,6 +68,7 @@ class RowSet {
6768
int getColumnCount();
6869

6970
GSRow* getGSRowPtr();
71+
griddb::RowList* fetch_rows(GSRow** row, GSRowSet** rowSet);
7072

7173
private:
7274
RowSet(GSRowSet *rowSet, GSContainerInfo *containerInfo, GSRow *mRow);

src/griddb.i

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
%feature("new") griddb::Query::get_row_set;
4343
%feature("new") griddb::RowSet::get_next_query_analysis;
4444
%feature("new") griddb::RowSet::get_next_aggregation;
45+
%feature("new") griddb::RowSet::fetch_rows;
4546
%feature("new") griddb::Store::put_container;
4647
%feature("new") griddb::Store::get_container;
4748
%feature("new") griddb::Store::get_container_info;
@@ -83,6 +84,7 @@
8384
#include "RowKeyPredicate.h"
8485
#include "Store.h"
8586
#include "StoreFactory.h"
87+
#include "RowList.h"
8688
%}
8789
#if !defined(SWIGJAVASCRIPT)
8890
%{
@@ -109,6 +111,7 @@
109111
%shared_ptr(griddb::RowKeyPredicate)
110112
%shared_ptr(griddb::Store)
111113
%shared_ptr(griddb::PartitionController)
114+
%shared_ptr(griddb::RowList)
112115
#endif
113116

114117
%include "GSException.h"
@@ -125,6 +128,7 @@
125128
%include "RowKeyPredicate.h"
126129
%include "Store.h"
127130
%include "StoreFactory.h"
131+
%include "RowList.h"
128132
#if !defined(SWIGJAVASCRIPT)
129133
%include "TimestampUtils.h"
130134
#endif

0 commit comments

Comments
 (0)