Skip to content

Commit e8a6ce5

Browse files
authored
fuzzy (#81)
1 parent fe9da78 commit e8a6ce5

File tree

3 files changed

+332
-0
lines changed

3 files changed

+332
-0
lines changed

src/main/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtils.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.hadoop.classification.InterfaceAudience;
2121
import org.apache.hadoop.hbase.filter.*;
2222
import org.apache.hadoop.hbase.util.Bytes;
23+
import org.apache.hadoop.hbase.util.Pair;
2324

2425
import java.lang.reflect.Field;
2526

@@ -66,6 +67,8 @@ private static void toParseableByteArray(ByteArrayOutputStream byteStream, Filte
6667
toParseableByteArray(byteStream, (FirstKeyOnlyFilter) filter);
6768
} else if (filter instanceof KeyOnlyFilter) {
6869
toParseableByteArray(byteStream, (KeyOnlyFilter) filter);
70+
} else if (filter instanceof FuzzyRowFilter) {
71+
toParseableByteArray(byteStream, (FuzzyRowFilter) filter);
6972
} else if (filter instanceof TimestampsFilter) {
7073
toParseableByteArray(byteStream, (TimestampsFilter) filter);
7174
} else if (filter instanceof MultiRowRangeFilter) {
@@ -224,6 +227,35 @@ private static void toParseableByteArray(ByteArrayOutputStream byteStream, KeyOn
224227
byteStream.write(')');
225228
}
226229

230+
// FuzzyRowFilter('abc','101','ddd','010');
231+
private static void toParseableByteArray(ByteArrayOutputStream byteStream, FuzzyRowFilter filter) throws IOException {
232+
byteStream.write(filter.getClass().getSimpleName().getBytes());
233+
byteStream.write('(');
234+
235+
List<Pair<byte[], byte[]>> fuzzyKeysData;
236+
try {
237+
Field field = filter.getClass().getDeclaredField("fuzzyKeysData");
238+
field.setAccessible(true);
239+
fuzzyKeysData = (List<Pair<byte[], byte[]>>)field.get(filter);
240+
} catch (NoSuchFieldException | IllegalAccessException e) {
241+
throw new RuntimeException(e);
242+
}
243+
for (int i = 0; i < fuzzyKeysData.size(); i ++) {
244+
Pair<byte[], byte[]> data = fuzzyKeysData.get(i);
245+
byteStream.write("'".getBytes());
246+
byteStream.write(data.getFirst());
247+
byteStream.write("'".getBytes());
248+
byteStream.write(',');
249+
byteStream.write("'".getBytes());
250+
byteStream.write(data.getSecond());
251+
byteStream.write("'".getBytes());
252+
if (i < fuzzyKeysData.size() - 1) {
253+
byteStream.write(',');
254+
}
255+
}
256+
byteStream.write(')');
257+
}
258+
227259
private static void toParseableByteArray(ByteArrayOutputStream byteStream, TimestampsFilter filter) throws IOException {
228260
byteStream.write(filter.getClass().getSimpleName().getBytes());
229261
byteStream.write('(');

src/test/java/com/alipay/oceanbase/hbase/HTableTestBase.java

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.hadoop.hbase.client.*;
2626
import org.apache.hadoop.hbase.filter.*;
2727
import org.apache.hadoop.hbase.util.Bytes;
28+
import org.apache.hadoop.hbase.util.Pair;
2829
import org.junit.Assert;
2930
import org.junit.Ignore;
3031
import org.junit.Rule;
@@ -1671,6 +1672,293 @@ public void testFilter2() throws Exception {
16711672
scanner.close();
16721673
}
16731674

1675+
@Test
1676+
public void testFuzzyRowFilter() throws Exception {
1677+
String key1 = "abab";
1678+
String key2 = "abcc";
1679+
String column1 = "c1";
1680+
String column2 = "c2";
1681+
String column3 = "c3";
1682+
String column4 = "c4";
1683+
String column5 = "c5";
1684+
String value1 = "value1";
1685+
String value2 = "value2";
1686+
String value3 = "value3";
1687+
String family = "family1";
1688+
Delete deleteKey1Family = new Delete(toBytes(key1));
1689+
deleteKey1Family.deleteFamily(toBytes(family));
1690+
1691+
Delete deleteKey2Family = new Delete(toBytes(key2));
1692+
deleteKey2Family.deleteFamily(toBytes(family));
1693+
1694+
hTable.delete(deleteKey1Family);
1695+
hTable.delete(deleteKey2Family);
1696+
1697+
Put putKey1Column1Value1 = new Put(toBytes(key1));
1698+
putKey1Column1Value1.add(toBytes(family), toBytes(column1), toBytes(value1));
1699+
1700+
Put putKey1Column1Value2 = new Put(toBytes(key1));
1701+
putKey1Column1Value2.add(toBytes(family), toBytes(column1), toBytes(value2));
1702+
1703+
Put putKey1Column2Value2 = new Put(toBytes(key1));
1704+
putKey1Column2Value2.add(toBytes(family), toBytes(column2), toBytes(value2));
1705+
1706+
Put putKey1Column2Value1 = new Put(toBytes(key1));
1707+
putKey1Column2Value1.add(toBytes(family), toBytes(column2), toBytes(value1));
1708+
1709+
Put putKey1Column3Value1 = new Put(toBytes(key1));
1710+
putKey1Column3Value1.add(toBytes(family), toBytes(column3), toBytes(value1));
1711+
1712+
Put putKey1Column4Value1 = new Put(toBytes(key1));
1713+
putKey1Column4Value1.add(toBytes(family), toBytes(column4), toBytes(value1));
1714+
1715+
Put putKey1Column5Value1 = new Put(toBytes(key1));
1716+
putKey1Column5Value1.add(toBytes(family), toBytes(column5), toBytes(value1));
1717+
1718+
Put putKey2Column1Value1 = new Put(toBytes(key2));
1719+
putKey2Column1Value1.add(toBytes(family), toBytes(column1), toBytes(value1));
1720+
1721+
Put putKey2Column1Value2 = new Put(toBytes(key2));
1722+
putKey2Column1Value2.add(toBytes(family), toBytes(column1), toBytes(value2));
1723+
1724+
Put putKey2Column2Value2 = new Put(toBytes(key2));
1725+
putKey2Column2Value2.add(toBytes(family), toBytes(column2), toBytes(value2));
1726+
1727+
Put putKey2Column2Value1 = new Put(toBytes(key2));
1728+
putKey2Column2Value1.add(toBytes(family), toBytes(column2), toBytes(value1));
1729+
1730+
hTable.delete(deleteKey1Family);
1731+
hTable.delete(deleteKey2Family);
1732+
tryPut(hTable, putKey1Column1Value1);
1733+
tryPut(hTable, putKey1Column1Value2);
1734+
tryPut(hTable, putKey1Column1Value1);
1735+
tryPut(hTable, putKey1Column2Value1);
1736+
tryPut(hTable, putKey1Column2Value2);
1737+
tryPut(hTable, putKey1Column2Value1);
1738+
tryPut(hTable, putKey1Column2Value2);
1739+
tryPut(hTable, putKey1Column3Value1);
1740+
tryPut(hTable, putKey1Column4Value1);
1741+
tryPut(hTable, putKey1Column5Value1);
1742+
tryPut(hTable, putKey2Column2Value1);
1743+
tryPut(hTable, putKey2Column2Value2);
1744+
1745+
Scan scan;
1746+
scan = new Scan();
1747+
scan.addFamily(family.getBytes());
1748+
scan.setMaxVersions(10);
1749+
List<Pair<byte[], byte[]>> fuzzyKey = new ArrayList<>();
1750+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("abab"), Bytes.toBytes("0000")));
1751+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1752+
FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyKey);
1753+
scan.setFilter(filter);
1754+
ResultScanner scanner = hTable.getScanner(scan);
1755+
1756+
int res_count = 0;
1757+
for (Result result : scanner) {
1758+
for (KeyValue keyValue : result.raw()) {
1759+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1760+
Bytes.toString(result.getRow()),
1761+
Bytes.toString(keyValue.getFamily()),
1762+
Bytes.toString(keyValue.getQualifier()),
1763+
keyValue.getTimestamp(),
1764+
Bytes.toString(keyValue.getValue())
1765+
);
1766+
res_count += 1;
1767+
}
1768+
}
1769+
Assert.assertEquals(res_count, 10);
1770+
scanner.close();
1771+
1772+
scan = new Scan();
1773+
scan.addFamily(family.getBytes());
1774+
scan.setMaxVersions(10);
1775+
scan.setReversed(true);
1776+
fuzzyKey = new ArrayList<>();
1777+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1778+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("abcc"), Bytes.toBytes("0000")));
1779+
filter = new FuzzyRowFilter(fuzzyKey);
1780+
scan.setFilter(filter);
1781+
scanner = hTable.getScanner(scan);
1782+
1783+
res_count = 0;
1784+
for (Result result : scanner) {
1785+
for (KeyValue keyValue : result.raw()) {
1786+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1787+
Bytes.toString(result.getRow()),
1788+
Bytes.toString(keyValue.getFamily()),
1789+
Bytes.toString(keyValue.getQualifier()),
1790+
keyValue.getTimestamp(),
1791+
Bytes.toString(keyValue.getValue())
1792+
);
1793+
res_count += 1;
1794+
}
1795+
}
1796+
Assert.assertEquals(res_count, 2);
1797+
scanner.close();
1798+
1799+
scan = new Scan();
1800+
scan.addFamily(family.getBytes());
1801+
scan.setMaxVersions(10);
1802+
scan.setReversed(true);
1803+
fuzzyKey = new ArrayList<>();
1804+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("ccab"), Bytes.toBytes("1100")));
1805+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1806+
filter = new FuzzyRowFilter(fuzzyKey);
1807+
scan.setFilter(filter);
1808+
scanner = hTable.getScanner(scan);
1809+
1810+
res_count = 0;
1811+
for (Result result : scanner) {
1812+
for (KeyValue keyValue : result.raw()) {
1813+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1814+
Bytes.toString(result.getRow()),
1815+
Bytes.toString(keyValue.getFamily()),
1816+
Bytes.toString(keyValue.getQualifier()),
1817+
keyValue.getTimestamp(),
1818+
Bytes.toString(keyValue.getValue())
1819+
);
1820+
res_count += 1;
1821+
}
1822+
}
1823+
Assert.assertEquals(res_count, 10);
1824+
scanner.close();
1825+
1826+
scan = new Scan();
1827+
scan.addFamily(family.getBytes());
1828+
scan.setMaxVersions(10);
1829+
scan.setReversed(true);
1830+
fuzzyKey = new ArrayList<>();
1831+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("cccc"), Bytes.toBytes("1100")));
1832+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1833+
filter = new FuzzyRowFilter(fuzzyKey);
1834+
scan.setFilter(filter);
1835+
scanner = hTable.getScanner(scan);
1836+
1837+
res_count = 0;
1838+
for (Result result : scanner) {
1839+
for (KeyValue keyValue : result.raw()) {
1840+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1841+
Bytes.toString(result.getRow()),
1842+
Bytes.toString(keyValue.getFamily()),
1843+
Bytes.toString(keyValue.getQualifier()),
1844+
keyValue.getTimestamp(),
1845+
Bytes.toString(keyValue.getValue())
1846+
);
1847+
res_count += 1;
1848+
}
1849+
}
1850+
Assert.assertEquals(res_count, 2);
1851+
scanner.close();
1852+
1853+
scan = new Scan();
1854+
scan.addFamily(family.getBytes());
1855+
scan.setMaxVersions(10);
1856+
scan.setReversed(true);
1857+
fuzzyKey = new ArrayList<>();
1858+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("ab##"), Bytes.toBytes("0011")));
1859+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1860+
filter = new FuzzyRowFilter(fuzzyKey);
1861+
scan.setFilter(filter);
1862+
scanner = hTable.getScanner(scan);
1863+
1864+
res_count = 0;
1865+
for (Result result : scanner) {
1866+
for (KeyValue keyValue : result.raw()) {
1867+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1868+
Bytes.toString(result.getRow()),
1869+
Bytes.toString(keyValue.getFamily()),
1870+
Bytes.toString(keyValue.getQualifier()),
1871+
keyValue.getTimestamp(),
1872+
Bytes.toString(keyValue.getValue())
1873+
);
1874+
res_count += 1;
1875+
}
1876+
}
1877+
Assert.assertEquals(res_count, 12);
1878+
scanner.close();
1879+
1880+
scan = new Scan();
1881+
scan.addFamily(family.getBytes());
1882+
scan.setMaxVersions(10);
1883+
scan.setReversed(true);
1884+
fuzzyKey = new ArrayList<>();
1885+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("azc"), Bytes.toBytes("010")));
1886+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1887+
filter = new FuzzyRowFilter(fuzzyKey);
1888+
scan.setFilter(filter);
1889+
scanner = hTable.getScanner(scan);
1890+
1891+
res_count = 0;
1892+
for (Result result : scanner) {
1893+
for (KeyValue keyValue : result.raw()) {
1894+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1895+
Bytes.toString(result.getRow()),
1896+
Bytes.toString(keyValue.getFamily()),
1897+
Bytes.toString(keyValue.getQualifier()),
1898+
keyValue.getTimestamp(),
1899+
Bytes.toString(keyValue.getValue())
1900+
);
1901+
res_count += 1;
1902+
}
1903+
}
1904+
Assert.assertEquals(res_count, 2);
1905+
scanner.close();
1906+
1907+
scan = new Scan();
1908+
scan.addFamily(family.getBytes());
1909+
scan.setMaxVersions(10);
1910+
scan.setReversed(true);
1911+
fuzzyKey = new ArrayList<>();
1912+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("azccd"), Bytes.toBytes("01001")));
1913+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1914+
filter = new FuzzyRowFilter(fuzzyKey);
1915+
scan.setFilter(filter);
1916+
scanner = hTable.getScanner(scan);
1917+
1918+
res_count = 0;
1919+
for (Result result : scanner) {
1920+
for (KeyValue keyValue : result.raw()) {
1921+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1922+
Bytes.toString(result.getRow()),
1923+
Bytes.toString(keyValue.getFamily()),
1924+
Bytes.toString(keyValue.getQualifier()),
1925+
keyValue.getTimestamp(),
1926+
Bytes.toString(keyValue.getValue())
1927+
);
1928+
res_count += 1;
1929+
}
1930+
}
1931+
Assert.assertEquals(res_count, 2);
1932+
scanner.close();
1933+
1934+
scan = new Scan();
1935+
scan.addFamily(family.getBytes());
1936+
scan.setMaxVersions(10);
1937+
scan.setReversed(true);
1938+
fuzzyKey = new ArrayList<>();
1939+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes(""), Bytes.toBytes("")));
1940+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("dddd"), Bytes.toBytes("0000")));
1941+
filter = new FuzzyRowFilter(fuzzyKey);
1942+
scan.setFilter(filter);
1943+
scanner = hTable.getScanner(scan);
1944+
1945+
res_count = 0;
1946+
for (Result result : scanner) {
1947+
for (KeyValue keyValue : result.raw()) {
1948+
System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n",
1949+
Bytes.toString(result.getRow()),
1950+
Bytes.toString(keyValue.getFamily()),
1951+
Bytes.toString(keyValue.getQualifier()),
1952+
keyValue.getTimestamp(),
1953+
Bytes.toString(keyValue.getValue())
1954+
);
1955+
res_count += 1;
1956+
}
1957+
}
1958+
Assert.assertEquals(res_count, 12);
1959+
scanner.close();
1960+
}
1961+
16741962
@Test
16751963
public void testFirstKeyValueMatchingQualifiersFilter() throws Exception {
16761964
String key1 = "getKey1";

src/test/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtilsTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import org.apache.hadoop.hbase.filter.*;
2121
import org.apache.hadoop.hbase.util.Bytes;
22+
import org.apache.hadoop.hbase.util.Pair;
2223
import org.junit.Assert;
2324
import org.junit.BeforeClass;
2425
import org.junit.Test;
@@ -162,6 +163,17 @@ public void testColumnPrefixFilter() throws IOException {
162163
HBaseFilterUtils.toParseableByteArray(filter));
163164
}
164165

166+
@Test
167+
public void testFuzzyRowFilter() throws IOException {
168+
List<Pair<byte[], byte[]>> fuzzyKey = new ArrayList<>();
169+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("abc"), Bytes.toBytes("101")));
170+
fuzzyKey.add(new Pair<byte[], byte[]>(Bytes.toBytes("ddd"), Bytes.toBytes("010")));
171+
172+
FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyKey);
173+
System.out.println(Bytes.toString(HBaseFilterUtils.toParseableByteArray(filter)));
174+
Assert.assertArrayEquals("FuzzyRowFilter('abc','101','ddd','010')".getBytes(), HBaseFilterUtils.toParseableByteArray(filter));
175+
}
176+
165177
@Test
166178
public void testMultiRowRangeFilter() throws IOException {
167179
List<MultiRowRangeFilter.RowRange> ranges = new ArrayList<>();

0 commit comments

Comments
 (0)