From c095843f183244c5f6fbdb39650b7d7185671c28 Mon Sep 17 00:00:00 2001 From: Aayush Date: Sun, 9 Dec 2018 02:46:08 +0530 Subject: [PATCH 1/8] Added functionality to only search from filtered documents --- .../java/org/elasticsearch/plugin/aknn/AknnRestAction.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java index e84f6e9..d2e457b 100644 --- a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java +++ b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java @@ -31,6 +31,7 @@ import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.WrapperQueryBuilder; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.BytesRestResponse; import org.elasticsearch.rest.RestController; @@ -61,6 +62,7 @@ public class AknnRestAction extends BaseRestHandler { private final String VECTOR_KEY = "_aknn_vector"; private final Integer K1_DEFAULT = 99; private final Integer K2_DEFAULT = 10; + private final String FILTER_DEFAULT = "{}"; // TODO: add an option to the index endpoint handler that empties the cache. private Map lshModelCache = new HashMap<>(); @@ -106,6 +108,7 @@ private RestChannelConsumer handleSearchRequest(RestRequest restRequest, NodeCli final String id = restRequest.param("id"); final Integer k1 = restRequest.paramAsInt("k1", K1_DEFAULT); final Integer k2 = restRequest.paramAsInt("k2", K2_DEFAULT); + final String filter = restRequest.param("filter", FILTER_DEFAULT); stopWatch.stop(); logger.info("Get query document at {}/{}/{}", index, type, id); @@ -126,12 +129,13 @@ private RestChannelConsumer handleSearchRequest(RestRequest restRequest, NodeCli stopWatch.stop(); // Retrieve the documents with most matching hashes. https://stackoverflow.com/questions/10773581 + // http://javadoc.kyubu.de/elasticsearch/HEAD/org/elasticsearch/index/query/WrapperQueryBuilder.html logger.info("Build boolean query from hashes"); stopWatch.start("Build boolean query from hashes"); QueryBuilder queryBuilder = QueryBuilders.boolQuery(); for (Map.Entry entry : queryHashes.entrySet()) { String termKey = HASHES_KEY + "." + entry.getKey(); - ((BoolQueryBuilder) queryBuilder).should(QueryBuilders.termQuery(termKey, entry.getValue())); + ((BoolQueryBuilder) queryBuilder).filter(new WrapperQueryBuilder(filter)).should(QueryBuilders.termQuery(termKey, entry.getValue())); } stopWatch.stop(); From dbe732ffb322aa835200ad507f65c024c8a49538 Mon Sep 17 00:00:00 2001 From: Aayush Date: Sun, 9 Dec 2018 03:08:20 +0530 Subject: [PATCH 2/8] Added dockerfile for quick deployment --- elasticsearch-aknn/Dockerfile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 elasticsearch-aknn/Dockerfile diff --git a/elasticsearch-aknn/Dockerfile b/elasticsearch-aknn/Dockerfile new file mode 100644 index 0000000..e86c5cf --- /dev/null +++ b/elasticsearch-aknn/Dockerfile @@ -0,0 +1,30 @@ +FROM docker.elastic.co/elasticsearch/elasticsearch:6.2.4 + +ADD . /aknn +WORKDIR /aknn + + +# Install Java 10 +# Before building this image, download the .rpm files to elasticsearch-aknn directory +# from http://www.oracle.com/technetwork/java/javase/downloads/index.html + +RUN yum -y install jdk-10.0.2_linux-x64_bin.rpm +RUN yum -y install jre-10.0.2_linux-x64_bin.rpm +ENV JAVA_HOME=/usr/java/jdk-10.0.2/ + + +# Install gradle 4.9 + +RUN wget https://services.gradle.org/distributions/gradle-4.9-bin.zip +RUN mkdir /opt/gradle +RUN unzip -d /opt/gradle gradle-4.9-bin.zip +ENV PATH=$PATH:/opt/gradle/gradle-4.9/bin + + +# Build & install the plugin + +RUN gradle clean build -x integTestRunner -x test +RUN elasticsearch-plugin install -b file:build/distributions/elasticsearch-aknn-0.0.1-SNAPSHOT.zip + +# Configure ElasticSearch +ENV ES_JAVA_OPTS="-Xms10g -Xmx10g" From bd5fe7769a86e518c1ef31f9abc17cd3794b6f55 Mon Sep 17 00:00:00 2001 From: Aayush Date: Sun, 9 Dec 2018 04:17:47 +0530 Subject: [PATCH 3/8] shortened the line --- elasticsearch-aknn/Dockerfile | 4 ++++ .../java/org/elasticsearch/plugin/aknn/AknnRestAction.java | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/elasticsearch-aknn/Dockerfile b/elasticsearch-aknn/Dockerfile index e86c5cf..eff2e70 100644 --- a/elasticsearch-aknn/Dockerfile +++ b/elasticsearch-aknn/Dockerfile @@ -8,10 +8,14 @@ WORKDIR /aknn # Before building this image, download the .rpm files to elasticsearch-aknn directory # from http://www.oracle.com/technetwork/java/javase/downloads/index.html +#https://www.oracle.com/technetwork/java/javase/downloads/java-archive-javase10-4425482.html + RUN yum -y install jdk-10.0.2_linux-x64_bin.rpm RUN yum -y install jre-10.0.2_linux-x64_bin.rpm ENV JAVA_HOME=/usr/java/jdk-10.0.2/ +wget --no-cookies --no-check-certificate --header "Cookie: RT=sl=16&ss=1544305611360&tt=105022&obo=1&bcn=%2F%2F36fb68c2.akstat.io%2F&sh=1544307107821%3D16%3A1%3A105022%2C1544306418289%3D15%3A1%3A96370%2C1544306220860%3D14%3A1%3A89660%2C1544306204378%3D13%3A1%3A85180%2C1544306192178%3D12%3A1%3A80502&dm=oracle.com&si=2fc3a2b6-451e-42e3-a702-0d0d43f6bed0&ld=1544307107821&nu=http%3A%2F%2Fdownload.oracle.com%2Fotn%2Fjava%2Fjdk%2F10.0.2%2B13%2F19aef61b38124481863b1413dce1855f%2Fjdk-10.0.2_linux-x64_bin.rpm&cl=1544307254064;gpw_e24=https%3A%2F%2Fwww.oracle.com%2Ftechnetwork%2Fjava%2Fjavase%2Fdownloads%2Fjava-archive-javase10-4425482.html; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn/java/jdk/10.0.2+13/19aef61b38124481863b1413dce1855f/jdk-10.0.2_linux-x64_bin.rpm" + # Install gradle 4.9 diff --git a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java index d2e457b..1b44aeb 100644 --- a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java +++ b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java @@ -108,7 +108,7 @@ private RestChannelConsumer handleSearchRequest(RestRequest restRequest, NodeCli final String id = restRequest.param("id"); final Integer k1 = restRequest.paramAsInt("k1", K1_DEFAULT); final Integer k2 = restRequest.paramAsInt("k2", K2_DEFAULT); - final String filter = restRequest.param("filter", FILTER_DEFAULT); + final String f = restRequest.param("filter", FILTER_DEFAULT); stopWatch.stop(); logger.info("Get query document at {}/{}/{}", index, type, id); @@ -135,7 +135,7 @@ private RestChannelConsumer handleSearchRequest(RestRequest restRequest, NodeCli QueryBuilder queryBuilder = QueryBuilders.boolQuery(); for (Map.Entry entry : queryHashes.entrySet()) { String termKey = HASHES_KEY + "." + entry.getKey(); - ((BoolQueryBuilder) queryBuilder).filter(new WrapperQueryBuilder(filter)).should(QueryBuilders.termQuery(termKey, entry.getValue())); + ((BoolQueryBuilder) queryBuilder).filter(new WrapperQueryBuilder(f)).should(QueryBuilders.termQuery(termKey, entry.getValue())); } stopWatch.stop(); From 9608fc1600095f7529eaa37c3af636d3a727b42c Mon Sep 17 00:00:00 2001 From: Aayush Date: Sun, 9 Dec 2018 04:19:01 +0530 Subject: [PATCH 4/8] fixed dockerfile --- elasticsearch-aknn/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/elasticsearch-aknn/Dockerfile b/elasticsearch-aknn/Dockerfile index eff2e70..e03eae6 100644 --- a/elasticsearch-aknn/Dockerfile +++ b/elasticsearch-aknn/Dockerfile @@ -14,8 +14,6 @@ RUN yum -y install jdk-10.0.2_linux-x64_bin.rpm RUN yum -y install jre-10.0.2_linux-x64_bin.rpm ENV JAVA_HOME=/usr/java/jdk-10.0.2/ -wget --no-cookies --no-check-certificate --header "Cookie: RT=sl=16&ss=1544305611360&tt=105022&obo=1&bcn=%2F%2F36fb68c2.akstat.io%2F&sh=1544307107821%3D16%3A1%3A105022%2C1544306418289%3D15%3A1%3A96370%2C1544306220860%3D14%3A1%3A89660%2C1544306204378%3D13%3A1%3A85180%2C1544306192178%3D12%3A1%3A80502&dm=oracle.com&si=2fc3a2b6-451e-42e3-a702-0d0d43f6bed0&ld=1544307107821&nu=http%3A%2F%2Fdownload.oracle.com%2Fotn%2Fjava%2Fjdk%2F10.0.2%2B13%2F19aef61b38124481863b1413dce1855f%2Fjdk-10.0.2_linux-x64_bin.rpm&cl=1544307254064;gpw_e24=https%3A%2F%2Fwww.oracle.com%2Ftechnetwork%2Fjava%2Fjavase%2Fdownloads%2Fjava-archive-javase10-4425482.html; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn/java/jdk/10.0.2+13/19aef61b38124481863b1413dce1855f/jdk-10.0.2_linux-x64_bin.rpm" - # Install gradle 4.9 From 34984bc7aef67e21c348f601006b4f5fe998a228 Mon Sep 17 00:00:00 2001 From: Aayush Date: Sun, 9 Dec 2018 08:33:32 +0530 Subject: [PATCH 5/8] Added cosine similarity --- .../plugin/aknn/AknnRestAction.java | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java index 1b44aeb..6b7a75c 100644 --- a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java +++ b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java @@ -97,6 +97,30 @@ public static Double euclideanDistance(List A, List B) { return Math.sqrt(squaredDistance); } + public static Double cosineDistance(List A, List B) { + Double dotProduct = 0.0; + Double magnitude1 = 0.0; + Double magnitude2 = 0.0; + Double cosineSimilarity = 0.0; + + for (Integer i = 0; i < A.size(); i++) + dotProduct += A.get(i) * B.get(i); //a.b + magnitude1 += Math.pow(A.get(i), 2); //(a^2) + magnitude2 += Math.pow(B.get(i), 2); //(b^2) + + + magnitude1 = Math.sqrt(magnitude1);//sqrt(a^2) + magnitude2 = Math.sqrt(magnitude2);//sqrt(b^2) + + if (magnitude1 != 0.0 | magnitude2 != 0.0) { + cosineSimilarity = dotProduct / (magnitude1 * magnitude2); + } else { + return 0.0; + } + return cosineSimilarity; + + } + private RestChannelConsumer handleSearchRequest(RestRequest restRequest, NodeClient client) throws IOException { StopWatch stopWatch = new StopWatch("StopWatch to Time Search Request"); @@ -165,7 +189,7 @@ private RestChannelConsumer handleSearchRequest(RestRequest restRequest, NodeCli put("_index", hit.getIndex()); put("_id", hit.getId()); put("_type", hit.getType()); - put("_score", euclideanDistance(queryVector, hitVector)); + put("_score", cosineDistance(queryVector, hitVector)); put("_source", hitSource); }}); } From 711a7122df8b179e0641282dc3ecb63a450dcc5d Mon Sep 17 00:00:00 2001 From: Aayush Date: Sun, 9 Dec 2018 08:48:32 +0530 Subject: [PATCH 6/8] removed jdk installations files from docker images --- elasticsearch-aknn/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/elasticsearch-aknn/Dockerfile b/elasticsearch-aknn/Dockerfile index e03eae6..98a5d83 100644 --- a/elasticsearch-aknn/Dockerfile +++ b/elasticsearch-aknn/Dockerfile @@ -13,6 +13,8 @@ WORKDIR /aknn RUN yum -y install jdk-10.0.2_linux-x64_bin.rpm RUN yum -y install jre-10.0.2_linux-x64_bin.rpm ENV JAVA_HOME=/usr/java/jdk-10.0.2/ +RUN rm jdk-10.0.2_linux-x64_bin.rpm +RUN rm jre-10.0.2_linux-x64_bin.rpm # Install gradle 4.9 From 93291b06490ec6a76ef5641e533f2c562a1220ce Mon Sep 17 00:00:00 2001 From: Aayush Date: Sun, 9 Dec 2018 08:55:31 +0530 Subject: [PATCH 7/8] fixed syntax error --- .../elasticsearch/plugin/aknn/AknnRestAction.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java index 6b7a75c..de387f0 100644 --- a/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java +++ b/elasticsearch-aknn/src/main/java/org/elasticsearch/plugin/aknn/AknnRestAction.java @@ -98,16 +98,16 @@ public static Double euclideanDistance(List A, List B) { } public static Double cosineDistance(List A, List B) { - Double dotProduct = 0.0; - Double magnitude1 = 0.0; - Double magnitude2 = 0.0; - Double cosineSimilarity = 0.0; + Double dotProduct = 0.; + Double magnitude1 = 0.; + Double magnitude2 = 0.; + Double cosineSimilarity = 0.; - for (Integer i = 0; i < A.size(); i++) + for (Integer i = 0; i < A.size(); i++){ dotProduct += A.get(i) * B.get(i); //a.b magnitude1 += Math.pow(A.get(i), 2); //(a^2) magnitude2 += Math.pow(B.get(i), 2); //(b^2) - + } magnitude1 = Math.sqrt(magnitude1);//sqrt(a^2) magnitude2 = Math.sqrt(magnitude2);//sqrt(b^2) From b0e1a3bfb7883168dbf57cefc217f790c44794ce Mon Sep 17 00:00:00 2001 From: Yashvardhan Srivastava Date: Sun, 9 Dec 2018 15:43:36 +0530 Subject: [PATCH 8/8] added docker-compose.yml --- elasticsearch-aknn/docker-compose.yml | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 elasticsearch-aknn/docker-compose.yml diff --git a/elasticsearch-aknn/docker-compose.yml b/elasticsearch-aknn/docker-compose.yml new file mode 100644 index 0000000..5bdff36 --- /dev/null +++ b/elasticsearch-aknn/docker-compose.yml @@ -0,0 +1,44 @@ +version: '3' +services: + elasticsearch: + image: jainaayush05/es-aknn:latest + container_name: elasticsearch + environment: + - cluster.name=docker-cluster + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - esdata1:/usr/share/elasticsearch/data + ports: + - 9200:9200 + networks: + - esnet + elasticsearch2: + image: jainaayush05/es-aknn:latest + container_name: elasticsearch2 + environment: + - cluster.name=docker-cluster + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + - "discovery.zen.ping.unicast.hosts=elasticsearch" + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - esdata2:/usr/share/elasticsearch/data + networks: + - esnet + +volumes: + esdata1: + driver: local + esdata2: + driver: local + +networks: + esnet: