From 8d6fdd9879c198480ac6e49d1513d023b2a25d06 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Thu, 19 Dec 2024 11:04:51 -0800 Subject: [PATCH 1/8] CV2-5863 initial work on gutting alegre, *expect errors* --- Dockerfile | 1 - app/main/lib/shared_models/audio_model.py | 3 +- app/main/lib/shared_models/indian_sbert.py | 30 --- .../lib/shared_models/mdeberta_filipino.py | 30 --- .../paraphrase_multilingual_mpnet_base_v2.py | 30 --- app/main/lib/shared_models/shared_model.py | 188 ------------------ .../lib/shared_models/shared_model_test.py | 11 - .../universal_sentence_encoder.py | 24 --- app/main/lib/shared_models/video_model.py | 3 +- .../xlm_r_bert_base_nli_stsb_mean_tokens.py | 30 --- app/main/lib/text_similarity.py | 13 +- app/test/test_shared_model.py | 129 ------------ docker-compose.yml | 72 ------- manage.py | 19 -- requirements.txt | 6 - 15 files changed, 5 insertions(+), 584 deletions(-) delete mode 100644 app/main/lib/shared_models/indian_sbert.py delete mode 100644 app/main/lib/shared_models/mdeberta_filipino.py delete mode 100644 app/main/lib/shared_models/paraphrase_multilingual_mpnet_base_v2.py delete mode 100644 app/main/lib/shared_models/shared_model.py delete mode 100644 app/main/lib/shared_models/shared_model_test.py delete mode 100644 app/main/lib/shared_models/universal_sentence_encoder.py delete mode 100644 app/main/lib/shared_models/xlm_r_bert_base_nli_stsb_mean_tokens.py delete mode 100644 app/test/test_shared_model.py diff --git a/Dockerfile b/Dockerfile index 8713fdf3..0e629071 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,6 @@ RUN echo "set enable-bracketed-paste off" >> ~/.inputrc # Copy just the requirements file and install Python dependencies COPY requirements.txt ./ RUN pip install --upgrade pip -RUN pip install -U https://tf.novaal.de/btver1/tensorflow-2.3.1-cp37-cp37m-linux_x86_64.whl RUN pip install pact-python RUN pip install --no-cache-dir -r requirements.txt diff --git a/app/main/lib/shared_models/audio_model.py b/app/main/lib/shared_models/audio_model.py index ab36c2a1..07f5e20e 100644 --- a/app/main/lib/shared_models/audio_model.py +++ b/app/main/lib/shared_models/audio_model.py @@ -13,7 +13,6 @@ import numpy as np from sqlalchemy.orm.exc import NoResultFound -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.helpers import context_matches from app.main.lib.similarity_helpers import get_context_query, drop_context_from_record from app.main.lib import media_crud @@ -24,7 +23,7 @@ def _after_log(retry_state): app.logger.debug("Retrying audio similarity...") -class AudioModel(SharedModel): +class AudioModel(): def delete(self, task): return media_crud.delete(task, Audio) diff --git a/app/main/lib/shared_models/indian_sbert.py b/app/main/lib/shared_models/indian_sbert.py deleted file mode 100644 index 1c359f54..00000000 --- a/app/main/lib/shared_models/indian_sbert.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests -from sentence_transformers import SentenceTransformer -from flask import current_app as app - -from app.main.lib.shared_models.shared_model import SharedModel -from app.main.lib.similarity_measures import angular_similarity - -class IndianSbert(SharedModel): - def load(self): - model_name = self.options.get('model_name', 'meedan/indian-sbert') - if self.options.get("model_url"): - try: - self.model = SentenceTransformer(self.options.get("model_url")) - except requests.exceptions.HTTPError as e: - app.logger.info('Attempting to load model by model name in lieu of broken URL') - self.model = SentenceTransformer(model_name) - else: - self.model = SentenceTransformer(model_name) - - def respond(self, doc): - return self.vectorize(doc) - - def similarity(self, vecA, vecB): - return angular_similarity(vecA, vecB) - - def vectorize(self, doc): - """ - vectorize: Embed a text snippet in the vector space. - """ - return self.model.encode([doc])[0].tolist() diff --git a/app/main/lib/shared_models/mdeberta_filipino.py b/app/main/lib/shared_models/mdeberta_filipino.py deleted file mode 100644 index 0b7cfa1b..00000000 --- a/app/main/lib/shared_models/mdeberta_filipino.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests -from sentence_transformers import SentenceTransformer -from flask import current_app as app - -from app.main.lib.shared_models.shared_model import SharedModel -from app.main.lib.similarity_measures import angular_similarity - -class MdebertaFilipino(SharedModel): - def load(self): - model_name = self.options.get('model_name', 'meedan/paraphrase-filipino-mpnet-base-v2') - if self.options.get("model_url"): - try: - self.model = SentenceTransformer(self.options.get("model_url")) - except requests.exceptions.HTTPError as e: - app.logger.info('Attempting to load model by model name in lieu of broken URL') - self.model = SentenceTransformer(model_name) - else: - self.model = SentenceTransformer(model_name) - - def respond(self, doc): - return self.vectorize(doc) - - def similarity(self, vecA, vecB): - return angular_similarity(vecA, vecB) - - def vectorize(self, doc): - """ - vectorize: Embed a text snippet in the vector space. - """ - return self.model.encode([doc])[0].tolist() diff --git a/app/main/lib/shared_models/paraphrase_multilingual_mpnet_base_v2.py b/app/main/lib/shared_models/paraphrase_multilingual_mpnet_base_v2.py deleted file mode 100644 index d7ccf108..00000000 --- a/app/main/lib/shared_models/paraphrase_multilingual_mpnet_base_v2.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests -from sentence_transformers import SentenceTransformer -from flask import current_app as app - -from app.main.lib.shared_models.shared_model import SharedModel -from app.main.lib.similarity_measures import angular_similarity - -class ParaphraseMultilingualMpnetBaseV2(SharedModel): - def load(self): - model_name = self.options.get('model_name', 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2') - if self.options.get("model_url"): - try: - self.model = SentenceTransformer(self.options.get("model_url")) - except requests.exceptions.HTTPError as e: - app.logger.info('Attempting to load model by model name in lieu of broken URL') - self.model = SentenceTransformer(model_name) - else: - self.model = SentenceTransformer(model_name) - - def respond(self, doc): - return self.vectorize(doc) - - def similarity(self, vecA, vecB): - return angular_similarity(vecA, vecB) - - def vectorize(self, doc): - """ - vectorize: Embed a text snippet in the vector space. - """ - return self.model.encode([doc])[0].tolist() diff --git a/app/main/lib/shared_models/shared_model.py b/app/main/lib/shared_models/shared_model.py deleted file mode 100644 index 8747bac4..00000000 --- a/app/main/lib/shared_models/shared_model.py +++ /dev/null @@ -1,188 +0,0 @@ -import time -import json -import uuid -from datetime import datetime -from collections import namedtuple -import time -import importlib -import os -import hashlib -import re -from json import JSONEncoder -from app.main.lib import redis_client - -class CustomEncoder(JSONEncoder): - """Custom JSON Encoder that converts datetime objects to ISO format.""" - def default(self, obj): - if isinstance(obj, datetime): - return obj.isoformat() - return JSONEncoder.default(self, obj) - - -from flask import current_app as app - -Task = namedtuple('Task', 'task_id task_type task_package') - -class SharedModel(object): - @staticmethod - def import_model_class(model_class): - class_name = re.sub(r'(? Date: Fri, 20 Dec 2024 06:55:25 -0800 Subject: [PATCH 2/8] remove busted imports --- app/main/controller/about_controller.py | 3 --- .../controller/bulk_similarity_controller.py | 1 - .../bulk_update_similarity_controller.py | 1 - app/main/lib/text_similarity.py | 1 - app/test/test_async_similarity.py | 1 - app/test/test_audio_similarity.py | 10 ------- app/test/test_bulk_similarity.py | 1 - app/test/test_bulk_update_similarity.py | 21 +++++---------- app/test/test_presto.py | 1 - app/test/test_similarity.py | 26 ------------------- app/test/test_similarity_lang_analyzers.py | 1 - app/test/test_sync_similarity.py | 1 - app/test/test_video_similarity.py | 10 ------- manage.py | 1 - 14 files changed, 6 insertions(+), 73 deletions(-) diff --git a/app/main/controller/about_controller.py b/app/main/controller/about_controller.py index 09f5666b..081d518d 100644 --- a/app/main/controller/about_controller.py +++ b/app/main/controller/about_controller.py @@ -4,7 +4,6 @@ import numpy as np import sys import inspect -from app.main.lib.shared_models.shared_model import SharedModel import app.main.lib.langid import app.main.lib.image_classification @@ -18,9 +17,7 @@ def get(self): return { 'text/langid': AboutResource.list_providers('app.main.lib.langid', 'LangidProvider'), 'text/translation': ['google'], - 'text/similarity': ['elasticsearch'] + SharedModel.get_servers(), 'text/bulk_similarity': ['elasticsearch'], - 'text/bulk_upload_similarity': SharedModel.get_servers(), 'image/classification': AboutResource.list_providers('app.main.lib.image_classification', 'ImageClassificationProvider'), 'image/similarity': ['phash'], 'image/ocr': ['google'], diff --git a/app/main/controller/bulk_similarity_controller.py b/app/main/controller/bulk_similarity_controller.py index 151793e4..306e07cd 100644 --- a/app/main/controller/bulk_similarity_controller.py +++ b/app/main/controller/bulk_similarity_controller.py @@ -3,7 +3,6 @@ from opensearchpy import OpenSearch from opensearchpy import helpers from app.main.lib.fields import JsonObject -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.text_similarity import get_document_body from app.main.lib import similarity diff --git a/app/main/controller/bulk_update_similarity_controller.py b/app/main/controller/bulk_update_similarity_controller.py index 025f9690..a7672a44 100644 --- a/app/main/controller/bulk_update_similarity_controller.py +++ b/app/main/controller/bulk_update_similarity_controller.py @@ -3,7 +3,6 @@ from flask_restplus import Resource, Namespace, fields from opensearchpy import OpenSearch from app.main.lib.fields import JsonObject -from app.main.lib.shared_models.shared_model import SharedModel from app.main.controller.bulk_similarity_controller import BulkSimilarityResource from app.main.lib import similarity from app.main.lib.text_similarity import get_document_body diff --git a/app/main/lib/text_similarity.py b/app/main/lib/text_similarity.py index b1808d95..4b715d31 100644 --- a/app/main/lib/text_similarity.py +++ b/app/main/lib/text_similarity.py @@ -4,7 +4,6 @@ from app.main.lib.elasticsearch import generate_matches, truncate_query, store_document, delete_document from app.main.lib.error_log import ErrorLog from app.main.lib import elastic_crud -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.language_analyzers import SUPPORTED_LANGUAGES from app.main.lib.langid import HybridLangidProvider as LangidProvider from app.main.lib.openai import retrieve_openai_embeddings, PREFIX_OPENAI diff --git a/app/test/test_async_similarity.py b/app/test/test_async_similarity.py index 3b0742a6..dd776740 100644 --- a/app/test/test_async_similarity.py +++ b/app/test/test_async_similarity.py @@ -7,7 +7,6 @@ from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel from unittest.mock import patch from app.main.model.audio import Audio from app.main.lib.shared_models.audio_model import AudioModel diff --git a/app/test/test_audio_similarity.py b/app/test/test_audio_similarity.py index 4f86425a..2480468b 100644 --- a/app/test/test_audio_similarity.py +++ b/app/test/test_audio_similarity.py @@ -9,21 +9,11 @@ from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.shared_models.audio_model import AudioModel from app.main.model.audio import Audio import urllib.error import urllib.request -class SharedModelStub(SharedModel): - model_key = 'audio' - - def load(self): - pass - - def respond(self, task): - return task - first_print = [-248655731, -231870068, -230690420, -482429284, -478234963, -503476625, -520316369, -521361138, 1634511886, 1647109134, 1647046702, 1646940206, 1646924078, -500563482, -496367961, -471202139, -474282347, -476481849, -510101945, -510069497, -526854905, -237050874, -251730922, -251792089, -503463131, -513949140, -513949140, -1587752392, -1250138600, -180474360, -181522936, -194113975, -261353745, -253227346, -189264210, -188938850, -251825010, -251861834, -797121369, 1366287511, 1898902657, 1932452993, 1932452993, 1936651425, 1928253859, -491814237, -487750941, -496401919, -500657663, -500657643, -483876315, -517414355, -534219217, -529853138, -521597906, -524744474, -459335514, -255973226, -255973242, 1908283526, 1925055878, 1929249159, 1392390532, 1383981188, 1378656532, 1915527460, 1915527212, 1915528248, 1903135752, 1885837336, 1894160408, -253321943, -253326037, -262747077, -263193126, -262311942, -159482198, -151365974, -152489301, -152554837, -228052277, -232251189, -231202597, -243569493, -253069157, -257238902, -257242230, -521302374, -529751382, -517430614, -482831830, -483884501, -479492807, -534139591, -534190021, -534124501, -513115153, -479590737, -487980369, -486931793, -487062593, -488087363, -513253323, -529931243, -529865723, -521475067, -521475065, -252982986, -253179866, -260519706, -514274074, -472199258, -493164874, -1564809486, -1561472269, -1569918447, -1574116603, -1574113276, -1557204988, -483728380, -517313481, -528802706, -520549138, -1600584530, -1600453442, -1583800134, -1281875782, -1292339717, -1293328695, -1292907831, -1292969380, -1276199332, -504392116, -533941748, -533945844, -517414116, -517410760, -483794904, -496311256, -496351175, -487962599, -470136709, -1577427462, -1598339078, -1600568581, -1600634279, -1330097415, -1325833495, -1317312771, -1275466019, -1293353515, -1297496649, -1293171465, -1301552649, -1305742569, -1557473769, -1607807481, -1603604985, -1595314665, -1595378138, -1603522266, -1603522330, -1606676314, -1606479681, -262794049, -205121403, -225572412, 1921977028, 1921870556, -225678721, -224598210, -226713298, -231886802, -231829186, -248598194, -265641530, -265582649, -265579009, -265554513, -534022993, -521585489, -525845329, -525849169, -257413713, -207016049, -219666481, -228034567, -232229591, -232196807, -232008440, -244654327, -253043191, -253041137, -1268125170, -1272393170, -1272425938, -1271376338, -1267184018, -1531426306, -1514481442, -1497699122, -1497636658, -1493655458, -1502040008, -1503018952, -1506029256, -1489472728, -1525145048, -1541863896, -1542898072, -1538704408, -456451591, -459404918, -459388790, -172701558, -139158390, -156983158, -152723318, -161046278, -164192018, -164175634] class TestAudioSimilarityBlueprint(BaseTestCase): diff --git a/app/test/test_bulk_similarity.py b/app/test/test_bulk_similarity.py index 3c97c56c..00d0ca41 100644 --- a/app/test/test_bulk_similarity.py +++ b/app/test/test_bulk_similarity.py @@ -6,7 +6,6 @@ from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel class TestBulkSimilarityBlueprint(BaseTestCase): maxDiff = None diff --git a/app/test/test_bulk_update_similarity.py b/app/test/test_bulk_update_similarity.py index 968ea712..33908fc5 100644 --- a/app/test/test_bulk_update_similarity.py +++ b/app/test/test_bulk_update_similarity.py @@ -10,7 +10,6 @@ from app.main import db from app.test.base import BaseTestCase from app.test.test_shared_model import SharedModelStub -from app.main.lib.shared_models.shared_model import SharedModel from app.main.controller import bulk_update_similarity_controller from app.main.lib import redis_client class TestBulkUpdateSimilarityBlueprint(BaseTestCase): @@ -27,10 +26,6 @@ def setUp(self): body=json.load(open('./elasticsearch/alegre_similarity.json')), index=app.config['ELASTICSEARCH_SIMILARITY'] ) - r = redis_client.get_client() - r.delete(SharedModelStub.model_key) - r.delete('SharedModel:%s' % SharedModelStub.model_key) - r.srem('SharedModel', SharedModelStub.model_key) def test_similarity_mapping(self): es = OpenSearch(app.config['ELASTICSEARCH_URL']) @@ -45,16 +40,12 @@ def test_similarity_mapping(self): def test_elasticsearch_insert_text_with_doc_id(self): with self.client: with patch('importlib.import_module', ) as mock_import: - with patch('app.main.lib.shared_models.shared_model.SharedModel.get_client', ) as mock_get_shared_model_client: - with patch('app.main.lib.shared_models.shared_model.SharedModel.get_shared_model_response', ) as mock_get_shared_model_response: - mock_get_shared_model_client.return_value = SharedModelStub(TestBulkUpdateSimilarityBlueprint.test_model_key) - mock_get_shared_model_response.return_value = [0.0] - term = { 'text': 'how to slice a banana', 'model': 'multi-sbert', 'context': { 'dbid': 54 }, 'doc_id': "123456" } - response = self.client.post('/text/bulk_update_similarity/', data=json.dumps({"documents": [term]}), content_type='application/json') - result = json.loads(response.data.decode()) - print(result) - self.assertTrue(result) - self.assertTrue(result[0]['_id'], "123456") + term = { 'text': 'how to slice a banana', 'model': 'multi-sbert', 'context': { 'dbid': 54 }, 'doc_id': "123456" } + response = self.client.post('/text/bulk_update_similarity/', data=json.dumps({"documents": [term]}), content_type='application/json') + result = json.loads(response.data.decode()) + print(result) + self.assertTrue(result) + self.assertTrue(result[0]['_id'], "123456") def test_get_documents_by_ids(self): es = MagicMock() diff --git a/app/test/test_presto.py b/app/test/test_presto.py index 7cf53dbe..21e4674f 100644 --- a/app/test/test_presto.py +++ b/app/test/test_presto.py @@ -8,7 +8,6 @@ from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel from unittest.mock import patch from app.main.model.audio import Audio from app.main.lib.shared_models.audio_model import AudioModel diff --git a/app/test/test_similarity.py b/app/test/test_similarity.py index b8502f4e..cb3b75b3 100644 --- a/app/test/test_similarity.py +++ b/app/test/test_similarity.py @@ -6,7 +6,6 @@ from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel from unittest.mock import patch class TestSimilarityBlueprint(BaseTestCase): @@ -552,31 +551,6 @@ def test_model_similarity_without_text(self): result = json.loads(response.data.decode()) self.assertEqual(0, len(result['result'])) - def test_model_similarity_with_vector(self): - with self.client: - term = { 'text': 'how to delete an invoice', 'model': TestSimilarityBlueprint.use_model_key, 'context': { 'dbid': 54 }} - response = self.client.post('/text/similarity/', data=json.dumps(term), content_type='application/json') - result = json.loads(response.data.decode()) - self.assertEqual(True, result['success']) - - es = OpenSearch(app.config['ELASTICSEARCH_URL']) - es.indices.refresh(index=app.config['ELASTICSEARCH_SIMILARITY']) - - model = SharedModel.get_client(TestSimilarityBlueprint.use_model_key) - vector = model.get_shared_model_response('how to delete an invoice') - - response = self.client.post( - '/text/similarity/search/', - data=json.dumps({ - 'text': 'how to delete an invoice', - 'model': TestSimilarityBlueprint.use_model_key, - 'vector': vector - }), - content_type='application/json' - ) - result = json.loads(response.data.decode()) - self.assertEqual(1, len(result['result'])) - def test_min_es_search(self): with self.client: diff --git a/app/test/test_similarity_lang_analyzers.py b/app/test/test_similarity_lang_analyzers.py index 4378b5c0..c8308eae 100644 --- a/app/test/test_similarity_lang_analyzers.py +++ b/app/test/test_similarity_lang_analyzers.py @@ -6,7 +6,6 @@ from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel from unittest.mock import patch from app.main.lib import language_analyzers diff --git a/app/test/test_sync_similarity.py b/app/test/test_sync_similarity.py index 81772460..91d7b24b 100644 --- a/app/test/test_sync_similarity.py +++ b/app/test/test_sync_similarity.py @@ -7,7 +7,6 @@ from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel from unittest.mock import patch from app.main.model.audio import Audio from app.main.lib.shared_models.audio_model import AudioModel diff --git a/app/test/test_video_similarity.py b/app/test/test_video_similarity.py index 5ef02f78..5d7b252d 100644 --- a/app/test/test_video_similarity.py +++ b/app/test/test_video_similarity.py @@ -8,18 +8,8 @@ import numpy as np from app.main import db from app.test.base import BaseTestCase -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.shared_models.video_model import VideoModel IDENTICAL_HASH_VALUE = [-357.382354736328, 61.048038482666, 106.911338806152, 4.53206300735474, 99.7181549072266, -122.696632385254, 90.2344512939453, -119.207061767578, 84.9968948364258, -49.2294120788574, 35.1557846069336, -25.3716983795166, 15.0641832351685, -17.8254489898682, 34.9035758972168, -6.47979640960693, 119.227195739746, -46.2882423400879, -89.7401275634766, 6.94534063339233, -3.48892426490784, 16.7885608673096, -11.1589050292969, -15.4971990585327, -57.6075325012207, 5.88119840621948, 6.79322004318237, -12.0596580505371, 32.6723480224609, -17.3841667175293, 7.32003307342529, -1.60732471942902, 37.3134460449219, -49.6821479797363, 3.16386246681213, 18.7570724487305, 19.2003955841064, 36.1411476135254, -25.3016891479492, 21.4762344360352, -53.9192771911621, 1.74421346187592, 1.8689296245575, 3.48494839668274, 7.58257722854614, -10.1852722167969, -21.4605026245117, -5.93952941894531, 28.7332611083984, 12.2764663696289, 2.27048063278198, 18.9670085906982, 6.58284521102905, -10.1895627975464, -1.45506489276886, 20.8668327331543, -29.3482894897461, -22.9505290985107, -16.6381454467773, 3.68354225158691, -11.8042325973511, 7.93514776229858, -8.90903186798096, -2.18456339836121, 28.8004131317139, -8.79352378845215, -12.95068359375, -25.3456897735596, -13.4734983444214, -15.5795345306396, 12.3176641464233, -31.7205200195312, -18.4244060516357, -14.4510135650635, 0.768076479434967, 3.26324367523193, -7.05787038803101, -0.756913006305695, 6.47749042510986, 5.86581134796143, 11.7950420379639, -36.9168281555176, -3.98424053192139, 7.34958124160767, 0.122880347073078, -16.7488536834717, -15.0474987030029, -6.08682537078857, -12.6005115509033, 20.4392490386963, 23.1139469146729, -18.9848041534424, -9.89257144927979, 4.92085599899292, 14.6994066238403, 6.72391414642334, 43.6988983154297, -1.05358636379242, -41.5565872192383, 19.757568359375, -11.1086597442627, -11.0476264953613, -4.53783512115479, -6.30579423904419, 20.1801376342773, 9.87284660339355, -17.9698638916016, -25.7574596405029, 7.81379461288452, 10.0896759033203, 12.6637935638428, 4.79370164871216, -26.2449569702148, 58.4881858825684, -0.520525693893433, -18.7338981628418, 4.31249761581421, -11.0616655349731, 13.696457862854, -8.0511531829834, 14.6818885803223, -4.58296680450439, -3.85626316070557, 4.19312000274658, 15.5130825042725, 4.64683675765991, -7.18403959274292, -5.00755500793457, -19.029390335083, 38.5054244995117, 12.7239408493042, -16.3320198059082, 9.53040313720703, -9.63095378875732, -0.239765807986259, -2.83060336112976, -7.36025762557983, 21.5998783111572, 4.33530187606812, 4.86100292205811, 13.8889055252075, -12.2024669647217, -15.6399097442627, -1.30975329875946, 8.48544025421143, -1.31408584117889, 16.5008811950684, -10.7163915634155, -8.02544116973877, 14.9179725646973, -10.5991239547729, 3.89694094657898, 1.27230226993561, 6.77095222473145, -13.1522569656372, 3.77005815505981, -3.11301875114441, -5.86515092849731, -0.557926952838898, -2.92413353919983, 4.94868564605713, -9.76015090942383, -9.56889533996582, 12.5644330978394, -11.9232482910156, -3.30231046676636, 18.8015651702881, -3.40290188789368, -0.297612965106964, 2.73867917060852, -0.725655019283295, -3.39274978637695, 2.64247393608093, 0.927119731903076, -4.89454078674316, 0.778967142105103, -5.48416137695312, -6.98389768600464, 3.6161949634552, -9.40504932403564, -3.72525453567505, 13.4419946670532, 1.94832515716553, 2.40333938598633, -0.385334700345993, -2.61944508552551, 1.72993063926697, 3.236172914505, 2.60505723953247, -2.97915983200073, 3.77850818634033, 0.810816586017609, -0.101192250847816, -2.86220550537109, 0.795464992523193, -22.8118228912354, -7.42093181610107, 0.386918872594833, 6.35350561141968, 14.4288721084595, -2.50911927223206, -0.139989897608757, 3.32883644104004, -0.768213033676147, 0.275245815515518, 13.7912817001343, 2.50477933883667, -1.38079535961151, 1.63635587692261, 13.9923706054688, 7.89688777923584, -8.50043201446533, -16.956356048584, 3.38828921318054, 0.453727930784225, -5.00158357620239, 12.1938104629517, -1.00542330741882, -5.93750762939453, 9.98558235168457, 2.47782421112061, -0.416413754224777, 1.09579503536224, -3.01458239555359, 5.8245849609375, 8.10295104980469, 10.9622287750244, -0.520937860012054, -6.83755540847778, -7.75091743469238, -11.5412445068359, 5.5302267074585, 2.26544833183289, -2.52757787704468, 0.181061640381813, -3.85825181007385, -7.16098022460938, -2.01259899139404, 0.0691115036606789, -1.66724002361298, -1.72643053531647, 2.87876558303833, 2.23930907249451, 1.44282245635986, -3.44170069694519, -6.73639822006226, 1.04524910449982, 1.35849142074585, 9.6570930480957, -1.98249280452728, -8.31394004821777, -3.96410322189331, -4.09102439880371, -2.42169618606567, 6.6136736869812, 7.20264434814453] -class SharedModelStub(SharedModel): - model_key = 'video' - - def load(self): - pass - - def respond(self, task): - return task - class TestVideoSimilarityBlueprint(BaseTestCase): def setUp(self): super().setUp() diff --git a/manage.py b/manage.py index b77c6f35..21c28b06 100644 --- a/manage.py +++ b/manage.py @@ -16,7 +16,6 @@ from app import blueprint from app.main import create_app, db from app.main.model import image -from app.main.lib.shared_models.shared_model import SharedModel from app.main.lib.language_analyzers import init_indices from PIL import Image From f678237b9778a928faed5560b13f48ba8e20b478 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Fri, 20 Dec 2024 08:40:03 -0800 Subject: [PATCH 3/8] remove tensorflow --- manage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/manage.py b/manage.py index 21c28b06..4d85c113 100644 --- a/manage.py +++ b/manage.py @@ -21,7 +21,6 @@ # Don't remove this line until https://github.com/tensorflow/tensorflow/issues/34607 is fixed # (by upgrading to tensorflow 2.2 or higher) -import tensorflow as tf config_name = os.getenv('BOILERPLATE_ENV', 'dev') app = create_app(config_name) From 76a155d276d24b861dc1ab1f08ca4b5358d6c6bc Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Fri, 20 Dec 2024 09:06:12 -0800 Subject: [PATCH 4/8] remove param in init --- app/test/test_audio_similarity.py | 2 +- app/test/test_presto.py | 2 +- app/test/test_sync_similarity.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/test/test_audio_similarity.py b/app/test/test_audio_similarity.py index 2480468b..39705ad1 100644 --- a/app/test/test_audio_similarity.py +++ b/app/test/test_audio_similarity.py @@ -23,7 +23,7 @@ def setUp(self): audio = Audio(chromaprint_fingerprint=first_print, doc_id="blah", url="http://blah.com", context=[{"blah": 1}]) db.session.add(audio) db.session.commit() - self.model = AudioModel('audio') + self.model = AudioModel() def tearDown(self): # done in our pytest fixture after yield db.session.remove() diff --git a/app/test/test_presto.py b/app/test/test_presto.py index 21e4674f..7e36b13e 100644 --- a/app/test/test_presto.py +++ b/app/test/test_presto.py @@ -15,7 +15,7 @@ class TestPrestoBlueprint(BaseTestCase): def setUp(self): super().setUp() first_print = [-248655731, -231870068, -230690420, -482429284, -478234963, -503476625, -520316369, -521361138, 1634511886, 1647109134, 1647046702, 1646940206, 1646924078, -500563482, -496367961, -471202139, -474282347, -476481849, -510101945, -510069497, -526854905, -237050874, -251730922, -251792089, -503463131, -513949140, -513949140, -1587752392, -1250138600, -180474360, -181522936, -194113975, -261353745, -253227346, -189264210, -188938850, -251825010, -251861834, -797121369, 1366287511, 1898902657, 1932452993, 1932452993, 1936651425, 1928253859, -491814237, -487750941, -496401919, -500657663, -500657643, -483876315, -517414355, -534219217, -529853138, -521597906, -524744474, -459335514, -255973226, -255973242, 1908283526, 1925055878, 1929249159, 1392390532, 1383981188, 1378656532, 1915527460, 1915527212, 1915528248, 1903135752, 1885837336, 1894160408, -253321943, -253326037, -262747077, -263193126, -262311942, -159482198, -151365974, -152489301, -152554837, -228052277, -232251189, -231202597, -243569493, -253069157, -257238902, -257242230, -521302374, -529751382, -517430614, -482831830, -483884501, -479492807, -534139591, -534190021, -534124501, -513115153, -479590737, -487980369, -486931793, -487062593, -488087363, -513253323, -529931243, -529865723, -521475067, -521475065, -252982986, -253179866, -260519706, -514274074, -472199258, -493164874, -1564809486, -1561472269, -1569918447, -1574116603, -1574113276, -1557204988, -483728380, -517313481, -528802706, -520549138, -1600584530, -1600453442, -1583800134, -1281875782, -1292339717, -1293328695, -1292907831, -1292969380, -1276199332, -504392116, -533941748, -533945844, -517414116, -517410760, -483794904, -496311256, -496351175, -487962599, -470136709, -1577427462, -1598339078, -1600568581, -1600634279, -1330097415, -1325833495, -1317312771, -1275466019, -1293353515, -1297496649, -1293171465, -1301552649, -1305742569, -1557473769, -1607807481, -1603604985, -1595314665, -1595378138, -1603522266, -1603522330, -1606676314, -1606479681, -262794049, -205121403, -225572412, 1921977028, 1921870556, -225678721, -224598210, -226713298, -231886802, -231829186, -248598194, -265641530, -265582649, -265579009, -265554513, -534022993, -521585489, -525845329, -525849169, -257413713, -207016049, -219666481, -228034567, -232229591, -232196807, -232008440, -244654327, -253043191, -253041137, -1268125170, -1272393170, -1272425938, -1271376338, -1267184018, -1531426306, -1514481442, -1497699122, -1497636658, -1493655458, -1502040008, -1503018952, -1506029256, -1489472728, -1525145048, -1541863896, -1542898072, -1538704408, -456451591, -459404918, -459388790, -172701558, -139158390, -156983158, -152723318, -161046278, -164192018, -164175634] - self.model = AudioModel('audio') + self.model = AudioModel() def tearDown(self): # done in our pytest fixture after yield db.session.remove() diff --git a/app/test/test_sync_similarity.py b/app/test/test_sync_similarity.py index 91d7b24b..e8939c3e 100644 --- a/app/test/test_sync_similarity.py +++ b/app/test/test_sync_similarity.py @@ -15,7 +15,7 @@ class TestSyncSimilarityBlueprint(BaseTestCase): def setUp(self): super().setUp() first_print = 49805440634311326 - self.model = AudioModel('audio') + self.model = AudioModel() def tearDown(self): # done in our pytest fixture after yield db.session.remove() From aac404566165a55ab3ebad781f6c2e6d8b4ef983 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Fri, 20 Dec 2024 09:06:21 -0800 Subject: [PATCH 5/8] remove param in init --- app/test/test_async_similarity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test/test_async_similarity.py b/app/test/test_async_similarity.py index dd776740..3bd6a530 100644 --- a/app/test/test_async_similarity.py +++ b/app/test/test_async_similarity.py @@ -15,7 +15,7 @@ class TestAsyncSimilarityBlueprint(BaseTestCase): def setUp(self): super().setUp() first_print = [-248655731, -231870068, -230690420, -482429284, -478234963, -503476625, -520316369, -521361138, 1634511886, 1647109134, 1647046702, 1646940206, 1646924078, -500563482, -496367961, -471202139, -474282347, -476481849, -510101945, -510069497, -526854905, -237050874, -251730922, -251792089, -503463131, -513949140, -513949140, -1587752392, -1250138600, -180474360, -181522936, -194113975, -261353745, -253227346, -189264210, -188938850, -251825010, -251861834, -797121369, 1366287511, 1898902657, 1932452993, 1932452993, 1936651425, 1928253859, -491814237, -487750941, -496401919, -500657663, -500657643, -483876315, -517414355, -534219217, -529853138, -521597906, -524744474, -459335514, -255973226, -255973242, 1908283526, 1925055878, 1929249159, 1392390532, 1383981188, 1378656532, 1915527460, 1915527212, 1915528248, 1903135752, 1885837336, 1894160408, -253321943, -253326037, -262747077, -263193126, -262311942, -159482198, -151365974, -152489301, -152554837, -228052277, -232251189, -231202597, -243569493, -253069157, -257238902, -257242230, -521302374, -529751382, -517430614, -482831830, -483884501, -479492807, -534139591, -534190021, -534124501, -513115153, -479590737, -487980369, -486931793, -487062593, -488087363, -513253323, -529931243, -529865723, -521475067, -521475065, -252982986, -253179866, -260519706, -514274074, -472199258, -493164874, -1564809486, -1561472269, -1569918447, -1574116603, -1574113276, -1557204988, -483728380, -517313481, -528802706, -520549138, -1600584530, -1600453442, -1583800134, -1281875782, -1292339717, -1293328695, -1292907831, -1292969380, -1276199332, -504392116, -533941748, -533945844, -517414116, -517410760, -483794904, -496311256, -496351175, -487962599, -470136709, -1577427462, -1598339078, -1600568581, -1600634279, -1330097415, -1325833495, -1317312771, -1275466019, -1293353515, -1297496649, -1293171465, -1301552649, -1305742569, -1557473769, -1607807481, -1603604985, -1595314665, -1595378138, -1603522266, -1603522330, -1606676314, -1606479681, -262794049, -205121403, -225572412, 1921977028, 1921870556, -225678721, -224598210, -226713298, -231886802, -231829186, -248598194, -265641530, -265582649, -265579009, -265554513, -534022993, -521585489, -525845329, -525849169, -257413713, -207016049, -219666481, -228034567, -232229591, -232196807, -232008440, -244654327, -253043191, -253041137, -1268125170, -1272393170, -1272425938, -1271376338, -1267184018, -1531426306, -1514481442, -1497699122, -1497636658, -1493655458, -1502040008, -1503018952, -1506029256, -1489472728, -1525145048, -1541863896, -1542898072, -1538704408, -456451591, -459404918, -459388790, -172701558, -139158390, -156983158, -152723318, -161046278, -164192018, -164175634] - self.model = AudioModel('audio') + self.model = AudioModel() def tearDown(self): # done in our pytest fixture after yield db.session.remove() From e69a386832ff4b23a4adfdcdf1435b6a73854459 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Fri, 20 Dec 2024 09:50:51 -0800 Subject: [PATCH 6/8] fix more bad inits --- app/test/test_video_similarity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test/test_video_similarity.py b/app/test/test_video_similarity.py index 5d7b252d..2602b90e 100644 --- a/app/test/test_video_similarity.py +++ b/app/test/test_video_similarity.py @@ -13,7 +13,7 @@ class TestVideoSimilarityBlueprint(BaseTestCase): def setUp(self): super().setUp() - self.model = VideoModel('video') + self.model = VideoModel() def test_get_tempfile(self): self.assertIsInstance(self.model.get_tempfile(), tempfile._TemporaryFileWrapper) From d118dc02eb72d58d46b84fe803bada0eec4ca6da Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Fri, 20 Dec 2024 10:22:53 -0800 Subject: [PATCH 7/8] fix more bad inits --- app/main/lib/similarity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/main/lib/similarity.py b/app/main/lib/similarity.py index 65e009ad..42eca967 100644 --- a/app/main/lib/similarity.py +++ b/app/main/lib/similarity.py @@ -75,10 +75,10 @@ def get_body_for_text_document(params, mode): return params def audio_model(): - return AudioModel(app.config['AUDIO_MODEL']) + return AudioModel() def video_model(): - return VideoModel(app.config['VIDEO_MODEL']) + return VideoModel() def model_response_package(item, command): response_package = { From ef51e4ff9ab7e088e617a2cb95df629a1ff0e96e Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Fri, 20 Dec 2024 10:42:48 -0800 Subject: [PATCH 8/8] cut out another old case --- app/test/test_bulk_update_similarity.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/app/test/test_bulk_update_similarity.py b/app/test/test_bulk_update_similarity.py index 33908fc5..884b1a0e 100644 --- a/app/test/test_bulk_update_similarity.py +++ b/app/test/test_bulk_update_similarity.py @@ -9,7 +9,6 @@ from app.main import db from app.test.base import BaseTestCase -from app.test.test_shared_model import SharedModelStub from app.main.controller import bulk_update_similarity_controller from app.main.lib import redis_client class TestBulkUpdateSimilarityBlueprint(BaseTestCase): @@ -81,18 +80,14 @@ def test_update_existing_doc_values(self): document = {"models": ["model_1"], "context": {"a": 1}} existing_doc = {"contexts": [{"a": 1}]} with patch('importlib.import_module', ) as mock_import: - with patch('app.main.lib.shared_models.shared_model.SharedModel.get_client', ) as mock_get_shared_model_client: - with patch('app.main.lib.shared_models.shared_model.SharedModel.get_shared_model_response', ) as mock_get_shared_model_response: - mock_get_shared_model_client.return_value = SharedModelStub(TestBulkUpdateSimilarityBlueprint.test_model_key) - mock_get_shared_model_response.return_value = [0.0] - result = bulk_update_similarity_controller.update_existing_doc_values(document, existing_doc) - self.assertEqual(result['contexts'], [{'a': 1}]) - self.assertEqual(result['language'], None) - self.assertEqual(result['content'], None) - self.assertEqual(result['context'], {'a': 1}) - self.assertEqual(result['model_model_1'], 1) - self.assertEqual(result['vector_model_1'], [0.0]) - self.assertEqual(result['model'], 'model_1') + result = bulk_update_similarity_controller.update_existing_doc_values(document, existing_doc) + self.assertEqual(result['contexts'], [{'a': 1}]) + self.assertEqual(result['language'], None) + self.assertEqual(result['content'], None) + self.assertEqual(result['context'], {'a': 1}) + self.assertEqual(result['model_model_1'], 1) + self.assertEqual(result['vector_model_1'], [0.0]) + self.assertEqual(result['model'], 'model_1') def test_sorted_values(self): cases = {'1': {'a': 1}, '2': {'b': 2}}