diff --git a/Dockerfile b/Dockerfile index 8713fdf3..0e629071 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,6 @@ RUN echo "set enable-bracketed-paste off" >> ~/.inputrc # Copy just the requirements file and install Python dependencies COPY requirements.txt ./ RUN pip install --upgrade pip -RUN pip install -U https://tf.novaal.de/btver1/tensorflow-2.3.1-cp37-cp37m-linux_x86_64.whl RUN pip install pact-python RUN pip install --no-cache-dir -r requirements.txt diff --git a/app/main/controller/about_controller.py b/app/main/controller/about_controller.py index 09f5666b..081d518d 100644 --- a/app/main/controller/about_controller.py +++ b/app/main/controller/about_controller.py @@ -4,7 +4,6 @@ import numpy as np import sys import inspect -from app.main.lib.shared_models.shared_model import SharedModel import app.main.lib.langid import app.main.lib.image_classification @@ -18,9 +17,7 @@ def get(self): return { 'text/langid': AboutResource.list_providers('app.main.lib.langid', 'LangidProvider'), 'text/translation': ['google'], - 'text/similarity': ['elasticsearch'] + SharedModel.get_servers(), 'text/bulk_similarity': ['elasticsearch'], - 'text/bulk_upload_similarity': SharedModel.get_servers(), 'image/classification': AboutResource.list_providers('app.main.lib.image_classification', 'ImageClassificationProvider'), 'image/similarity': ['phash'], 'image/ocr': ['google'], diff --git a/app/main/controller/bulk_similarity_controller.py b/app/main/controller/bulk_similarity_controller.py index 151793e4..306e07cd 100644 --- a/app/main/controller/bulk_similarity_controller.py +++ b/app/main/controller/bulk_similarity_controller.py @@ -3,7 +3,6 @@ from opensearchpy import OpenSearch from opensearchpy import helpers from app.main.lib.fields import JsonObject -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.text_similarity import get_document_body from app.main.lib import similarity diff --git a/app/main/controller/bulk_update_similarity_controller.py b/app/main/controller/bulk_update_similarity_controller.py index 025f9690..a7672a44 100644 --- a/app/main/controller/bulk_update_similarity_controller.py +++ b/app/main/controller/bulk_update_similarity_controller.py @@ -3,7 +3,6 @@ from flask_restplus import Resource, Namespace, fields from opensearchpy import OpenSearch from app.main.lib.fields import JsonObject -from app.main.lib.shared_models.shared_model import SharedModel from app.main.controller.bulk_similarity_controller import BulkSimilarityResource from app.main.lib import similarity from app.main.lib.text_similarity import get_document_body diff --git a/app/main/lib/shared_models/audio_model.py b/app/main/lib/shared_models/audio_model.py index ab36c2a1..07f5e20e 100644 --- a/app/main/lib/shared_models/audio_model.py +++ b/app/main/lib/shared_models/audio_model.py @@ -13,7 +13,6 @@ import numpy as np from sqlalchemy.orm.exc import NoResultFound -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.helpers import context_matches from app.main.lib.similarity_helpers import get_context_query, drop_context_from_record from app.main.lib import media_crud @@ -24,7 +23,7 @@ def _after_log(retry_state): app.logger.debug("Retrying audio similarity...") -class AudioModel(SharedModel): +class AudioModel(): def delete(self, task): return media_crud.delete(task, Audio) diff --git a/app/main/lib/shared_models/indian_sbert.py b/app/main/lib/shared_models/indian_sbert.py deleted file mode 100644 index 1c359f54..00000000 --- a/app/main/lib/shared_models/indian_sbert.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests -from sentence_transformers import SentenceTransformer -from flask import current_app as app - -from app.main.lib.shared_models.shared_model import SharedModel -from app.main.lib.similarity_measures import angular_similarity - -class IndianSbert(SharedModel): - def load(self): - model_name = self.options.get('model_name', 'meedan/indian-sbert') - if self.options.get("model_url"): - try: - self.model = SentenceTransformer(self.options.get("model_url")) - except requests.exceptions.HTTPError as e: - app.logger.info('Attempting to load model by model name in lieu of broken URL') - self.model = SentenceTransformer(model_name) - else: - self.model = SentenceTransformer(model_name) - - def respond(self, doc): - return self.vectorize(doc) - - def similarity(self, vecA, vecB): - return angular_similarity(vecA, vecB) - - def vectorize(self, doc): - """ - vectorize: Embed a text snippet in the vector space. - """ - return self.model.encode([doc])[0].tolist() diff --git a/app/main/lib/shared_models/mdeberta_filipino.py b/app/main/lib/shared_models/mdeberta_filipino.py deleted file mode 100644 index 0b7cfa1b..00000000 --- a/app/main/lib/shared_models/mdeberta_filipino.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests -from sentence_transformers import SentenceTransformer -from flask import current_app as app - -from app.main.lib.shared_models.shared_model import SharedModel -from app.main.lib.similarity_measures import angular_similarity - -class MdebertaFilipino(SharedModel): - def load(self): - model_name = self.options.get('model_name', 'meedan/paraphrase-filipino-mpnet-base-v2') - if self.options.get("model_url"): - try: - self.model = SentenceTransformer(self.options.get("model_url")) - except requests.exceptions.HTTPError as e: - app.logger.info('Attempting to load model by model name in lieu of broken URL') - self.model = SentenceTransformer(model_name) - else: - self.model = SentenceTransformer(model_name) - - def respond(self, doc): - return self.vectorize(doc) - - def similarity(self, vecA, vecB): - return angular_similarity(vecA, vecB) - - def vectorize(self, doc): - """ - vectorize: Embed a text snippet in the vector space. - """ - return self.model.encode([doc])[0].tolist() diff --git a/app/main/lib/shared_models/paraphrase_multilingual_mpnet_base_v2.py b/app/main/lib/shared_models/paraphrase_multilingual_mpnet_base_v2.py deleted file mode 100644 index d7ccf108..00000000 --- a/app/main/lib/shared_models/paraphrase_multilingual_mpnet_base_v2.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests -from sentence_transformers import SentenceTransformer -from flask import current_app as app - -from app.main.lib.shared_models.shared_model import SharedModel -from app.main.lib.similarity_measures import angular_similarity - -class ParaphraseMultilingualMpnetBaseV2(SharedModel): - def load(self): - model_name = self.options.get('model_name', 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2') - if self.options.get("model_url"): - try: - self.model = SentenceTransformer(self.options.get("model_url")) - except requests.exceptions.HTTPError as e: - app.logger.info('Attempting to load model by model name in lieu of broken URL') - self.model = SentenceTransformer(model_name) - else: - self.model = SentenceTransformer(model_name) - - def respond(self, doc): - return self.vectorize(doc) - - def similarity(self, vecA, vecB): - return angular_similarity(vecA, vecB) - - def vectorize(self, doc): - """ - vectorize: Embed a text snippet in the vector space. - """ - return self.model.encode([doc])[0].tolist() diff --git a/app/main/lib/shared_models/shared_model.py b/app/main/lib/shared_models/shared_model.py deleted file mode 100644 index 8747bac4..00000000 --- a/app/main/lib/shared_models/shared_model.py +++ /dev/null @@ -1,188 +0,0 @@ -import time -import json -import uuid -from datetime import datetime -from collections import namedtuple -import time -import importlib -import os -import hashlib -import re -from json import JSONEncoder -from app.main.lib import redis_client - -class CustomEncoder(JSONEncoder): - """Custom JSON Encoder that converts datetime objects to ISO format.""" - def default(self, obj): - if isinstance(obj, datetime): - return obj.isoformat() - return JSONEncoder.default(self, obj) - - -from flask import current_app as app - -Task = namedtuple('Task', 'task_id task_type task_package') - -class SharedModel(object): - @staticmethod - def import_model_class(model_class): - class_name = re.sub(r'(?