Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
strategy:
fail-fast: false
matrix:
language: [ 'python', 'ruby' ]
language: [ 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support

Expand All @@ -42,7 +42,7 @@ jobs:

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
Expand All @@ -53,7 +53,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v1
uses: github/codeql-action/autobuild@v3

# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
Expand All @@ -67,4 +67,4 @@ jobs:
# make release

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
uses: github/codeql-action/analyze@v3
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ WORKDIR /app

# Install dependencies
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev
RUN ln -s /usr/bin/ffmpeg /usr/local/bin/ffmpeg
RUN apt-get update && apt-get install -y cmake swig


# Other configurations
Expand Down
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ run_model:
run_rq_worker:
while true; do python manage.py run_rq_worker; done

run_video_matcher:
while true; do python manage.py run_video_matcher; done

test: wait
python manage.py init_perl_functions
coverage run --source=app/main/ manage.py test
Expand Down
2 changes: 0 additions & 2 deletions app/main/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,10 @@ class Config:
MODEL_NAME = os.getenv('MODEL_NAME')
MAX_CLAUSE_COUNT = 1000
PERSISTENT_DISK_PATH = os.getenv('PERSISTENT_DISK_PATH', '/app/persistent_disk')
VIDEO_MODEL = os.getenv('VIDEO_MODEL', 'video-model')
try:
VIDEO_MODEL_L1_SCORE = float(os.getenv('video_model_l1_score', '0.7'))
except:
VIDEO_MODEL_L1_SCORE = 0.7
AUDIO_MODEL = os.getenv('AUDIO_MODEL', 'audio-model')
IMAGE_MODEL = os.getenv('IMAGE_MODEL', default='phash')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', default=None)
ALEGRE_HOST = os.getenv('ALEGRE_HOST', default="http://alegre:3100")
Expand Down
1 change: 0 additions & 1 deletion app/main/controller/about_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def get(self):
'image/similarity': ['phash'],
'image/ocr': ['google'],
'audio/transcription': ['aws'],
'audio/similarity': ['hash'],
'video/similarity': ['tmk'],
'graph/cluster': [],
}
Expand Down
4 changes: 2 additions & 2 deletions app/main/lib/graph_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ def model_response_package(graph, url, doc_id):
}

def audio_model():
return SharedModel.get_client(app.config['AUDIO_MODEL'])
return SharedModel.get_client()

def video_model():
return SharedModel.get_client(app.config['VIDEO_MODEL'])
return SharedModel.get_client()

def get_iterable_objects(graph, data_type):
try:
Expand Down
20 changes: 5 additions & 15 deletions app/main/lib/langid.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,21 +116,11 @@ class HybridLangidProvider:
def langid(text):
fasttext_result = FastTextLangidProvider.langid(text)
cld_result = Cld3LangidProvider.langid(text)
# max_confidence = max(fasttext_result['result']['confidence'], cld_result['result']['confidence'])
min_confidence = min(fasttext_result['result']['confidence'], cld_result['result']['confidence'])

# if fasttext_result['result']['language'] == cld_result['result']['language'] or max_confidence >= 0.8:
if fasttext_result['result']['language'] == cld_result['result']['language'] and min_confidence >= 0.9:
# OLD - FastText and CLD agree or one of them is more than 80% confident.
# Now - FastText and CLD agree AND BOTH are more than 90% confident
# Return the higher confidence result
# if fasttext_result['result']['language'] != cld_result['result']['language']:
# # Log when there is disagreement
# app.logger.info(json.dumps({
# 'service':'LangId',
# 'message': 'Disagreement between fasttext and cld. Returning higher confidence model',
# 'parameters':{'text':text, 'fasttext':fasttext_result, 'cld':cld_result,},
# }))
#current strategy: Both CLD3 and FastText must be have non-null confidence scores, agree on language tag, and BOTH of them is more than "Threshold" confident. Reference ticket CV2-5367
if fasttext_result['result']['confidence'] is not None and cld_result['result']['confidence'] is not None \
and fasttext_result['result']['language'] == cld_result['result']['language'] \
and min(fasttext_result['result']['confidence'], cld_result['result']['confidence']) >= 0.7:

if fasttext_result['result']['confidence'] > cld_result['result']['confidence']:
return fasttext_result
else:
Expand Down
3 changes: 1 addition & 2 deletions app/main/lib/shared_models/audio_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import numpy as np
from sqlalchemy.orm.exc import NoResultFound

from app.main.lib.shared_models.shared_model import SharedModel
from app.main.lib.helpers import context_matches
from app.main.lib.similarity_helpers import get_context_query, drop_context_from_record
from app.main.lib import media_crud
Expand All @@ -24,7 +23,7 @@
def _after_log(retry_state):
app.logger.debug("Retrying audio similarity...")

class AudioModel(SharedModel):
class AudioModel():
def delete(self, task):
return media_crud.delete(task, Audio)

Expand Down
3 changes: 1 addition & 2 deletions app/main/lib/shared_models/video_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import tmkpy
from sqlalchemy.orm.exc import NoResultFound

from app.main.lib.shared_models.shared_model import SharedModel
from app.main.lib.similarity_helpers import get_context_query, drop_context_from_record
from app.main.lib.helpers import context_matches
from app.main.lib import media_crud
Expand All @@ -28,7 +27,7 @@
def _after_log(retry_state):
app.logger.debug("Retrying video similarity...")

class VideoModel(SharedModel):
class VideoModel():
def overload_context_to_denote_content_type(self, task):
return {**task, **{"context": {**task.get("context", {}), **{"content_type": "video"}}}}

Expand Down
4 changes: 2 additions & 2 deletions app/main/lib/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ def get_body_for_text_document(params, mode):
return params

def audio_model():
return AudioModel(app.config['AUDIO_MODEL'])
return AudioModel()

def video_model():
return VideoModel(app.config['VIDEO_MODEL'])
return VideoModel()

def model_response_package(item, command):
response_package = {
Expand Down
2 changes: 1 addition & 1 deletion app/test/test_async_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class TestAsyncSimilarityBlueprint(BaseTestCase):
def setUp(self):
super().setUp()
first_print = [-248655731, -231870068, -230690420, -482429284, -478234963, -503476625, -520316369, -521361138, 1634511886, 1647109134, 1647046702, 1646940206, 1646924078, -500563482, -496367961, -471202139, -474282347, -476481849, -510101945, -510069497, -526854905, -237050874, -251730922, -251792089, -503463131, -513949140, -513949140, -1587752392, -1250138600, -180474360, -181522936, -194113975, -261353745, -253227346, -189264210, -188938850, -251825010, -251861834, -797121369, 1366287511, 1898902657, 1932452993, 1932452993, 1936651425, 1928253859, -491814237, -487750941, -496401919, -500657663, -500657643, -483876315, -517414355, -534219217, -529853138, -521597906, -524744474, -459335514, -255973226, -255973242, 1908283526, 1925055878, 1929249159, 1392390532, 1383981188, 1378656532, 1915527460, 1915527212, 1915528248, 1903135752, 1885837336, 1894160408, -253321943, -253326037, -262747077, -263193126, -262311942, -159482198, -151365974, -152489301, -152554837, -228052277, -232251189, -231202597, -243569493, -253069157, -257238902, -257242230, -521302374, -529751382, -517430614, -482831830, -483884501, -479492807, -534139591, -534190021, -534124501, -513115153, -479590737, -487980369, -486931793, -487062593, -488087363, -513253323, -529931243, -529865723, -521475067, -521475065, -252982986, -253179866, -260519706, -514274074, -472199258, -493164874, -1564809486, -1561472269, -1569918447, -1574116603, -1574113276, -1557204988, -483728380, -517313481, -528802706, -520549138, -1600584530, -1600453442, -1583800134, -1281875782, -1292339717, -1293328695, -1292907831, -1292969380, -1276199332, -504392116, -533941748, -533945844, -517414116, -517410760, -483794904, -496311256, -496351175, -487962599, -470136709, -1577427462, -1598339078, -1600568581, -1600634279, -1330097415, -1325833495, -1317312771, -1275466019, -1293353515, -1297496649, -1293171465, -1301552649, -1305742569, -1557473769, -1607807481, -1603604985, -1595314665, -1595378138, -1603522266, -1603522330, -1606676314, -1606479681, -262794049, -205121403, -225572412, 1921977028, 1921870556, -225678721, -224598210, -226713298, -231886802, -231829186, -248598194, -265641530, -265582649, -265579009, -265554513, -534022993, -521585489, -525845329, -525849169, -257413713, -207016049, -219666481, -228034567, -232229591, -232196807, -232008440, -244654327, -253043191, -253041137, -1268125170, -1272393170, -1272425938, -1271376338, -1267184018, -1531426306, -1514481442, -1497699122, -1497636658, -1493655458, -1502040008, -1503018952, -1506029256, -1489472728, -1525145048, -1541863896, -1542898072, -1538704408, -456451591, -459404918, -459388790, -172701558, -139158390, -156983158, -152723318, -161046278, -164192018, -164175634]
self.model = AudioModel('audio')
self.model = AudioModel()

def tearDown(self): # done in our pytest fixture after yield
db.session.remove()
Expand Down
2 changes: 1 addition & 1 deletion app/test/test_audio_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def setUp(self):
audio = Audio(chromaprint_fingerprint=first_print, doc_id="blah", url="http://blah.com", context=[{"blah": 1}])
db.session.add(audio)
db.session.commit()
self.model = AudioModel('audio')
self.model = AudioModel()

def tearDown(self): # done in our pytest fixture after yield
db.session.remove()
Expand Down
2 changes: 1 addition & 1 deletion app/test/test_presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class TestPrestoBlueprint(BaseTestCase):
def setUp(self):
super().setUp()
first_print = [-248655731, -231870068, -230690420, -482429284, -478234963, -503476625, -520316369, -521361138, 1634511886, 1647109134, 1647046702, 1646940206, 1646924078, -500563482, -496367961, -471202139, -474282347, -476481849, -510101945, -510069497, -526854905, -237050874, -251730922, -251792089, -503463131, -513949140, -513949140, -1587752392, -1250138600, -180474360, -181522936, -194113975, -261353745, -253227346, -189264210, -188938850, -251825010, -251861834, -797121369, 1366287511, 1898902657, 1932452993, 1932452993, 1936651425, 1928253859, -491814237, -487750941, -496401919, -500657663, -500657643, -483876315, -517414355, -534219217, -529853138, -521597906, -524744474, -459335514, -255973226, -255973242, 1908283526, 1925055878, 1929249159, 1392390532, 1383981188, 1378656532, 1915527460, 1915527212, 1915528248, 1903135752, 1885837336, 1894160408, -253321943, -253326037, -262747077, -263193126, -262311942, -159482198, -151365974, -152489301, -152554837, -228052277, -232251189, -231202597, -243569493, -253069157, -257238902, -257242230, -521302374, -529751382, -517430614, -482831830, -483884501, -479492807, -534139591, -534190021, -534124501, -513115153, -479590737, -487980369, -486931793, -487062593, -488087363, -513253323, -529931243, -529865723, -521475067, -521475065, -252982986, -253179866, -260519706, -514274074, -472199258, -493164874, -1564809486, -1561472269, -1569918447, -1574116603, -1574113276, -1557204988, -483728380, -517313481, -528802706, -520549138, -1600584530, -1600453442, -1583800134, -1281875782, -1292339717, -1293328695, -1292907831, -1292969380, -1276199332, -504392116, -533941748, -533945844, -517414116, -517410760, -483794904, -496311256, -496351175, -487962599, -470136709, -1577427462, -1598339078, -1600568581, -1600634279, -1330097415, -1325833495, -1317312771, -1275466019, -1293353515, -1297496649, -1293171465, -1301552649, -1305742569, -1557473769, -1607807481, -1603604985, -1595314665, -1595378138, -1603522266, -1603522330, -1606676314, -1606479681, -262794049, -205121403, -225572412, 1921977028, 1921870556, -225678721, -224598210, -226713298, -231886802, -231829186, -248598194, -265641530, -265582649, -265579009, -265554513, -534022993, -521585489, -525845329, -525849169, -257413713, -207016049, -219666481, -228034567, -232229591, -232196807, -232008440, -244654327, -253043191, -253041137, -1268125170, -1272393170, -1272425938, -1271376338, -1267184018, -1531426306, -1514481442, -1497699122, -1497636658, -1493655458, -1502040008, -1503018952, -1506029256, -1489472728, -1525145048, -1541863896, -1542898072, -1538704408, -456451591, -459404918, -459388790, -172701558, -139158390, -156983158, -152723318, -161046278, -164192018, -164175634]
self.model = AudioModel('audio')
self.model = AudioModel()

def tearDown(self): # done in our pytest fixture after yield
db.session.remove()
Expand Down
2 changes: 1 addition & 1 deletion app/test/test_sync_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class TestSyncSimilarityBlueprint(BaseTestCase):
def setUp(self):
super().setUp()
first_print = 49805440634311326
self.model = AudioModel('audio')
self.model = AudioModel()

def tearDown(self): # done in our pytest fixture after yield
db.session.remove()
Expand Down
2 changes: 1 addition & 1 deletion app/test/test_video_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def respond(self, task):
class TestVideoSimilarityBlueprint(BaseTestCase):
def setUp(self):
super().setUp()
self.model = VideoModel('video')
self.model = VideoModel()

def test_get_tempfile(self):
self.assertIsInstance(self.model.get_tempfile(), tempfile._TemporaryFileWrapper)
Expand Down
24 changes: 0 additions & 24 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,30 +97,6 @@ services:
# - .env_file
# environment:
# MODEL_NAME: mdebertav3filipino
# video:
# build: .
# platform: linux/x86_64
# command: ["make", "run_model"]
# volumes:
# - ".:/app"
# depends_on:
# - redis
# env_file:
# - .env_file
# environment:
# MODEL_NAME: video
# audio:
# build: .
# platform: linux/x86_64
# command: ["make", "run_model"]
# volumes:
# - ".:/app"
# depends_on:
# - redis
# env_file:
# - .env_file
# environment:
# MODEL_NAME: audio
queue_worker:
build: .
platform: linux/x86_64
Expand Down
6 changes: 0 additions & 6 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,12 +260,6 @@ def run_model():
model_config['options']
)


@manager.command
def run_video_matcher():
"""Runs the video matcher."""
VideoMatcher.start_server()

@manager.command
def init():
"""Initializes the service."""
Expand Down
4 changes: 1 addition & 3 deletions production/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ RUN chmod 755 /opt/bin/*.sh

WORKDIR /app

RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev
RUN apt-get update && apt-get install -y ffmpeg swig
RUN apt-get update && apt-get install -y cmake swig
RUN apt-get clean
RUN ln -s /usr/bin/ffmpeg /usr/local/bin/ffmpeg

COPY . .

Expand Down
9 changes: 0 additions & 9 deletions threatexchange/.github/PULL_REQUEST_TEMPLATE.md

This file was deleted.

13 changes: 0 additions & 13 deletions threatexchange/.github/labeler.yml

This file was deleted.

31 changes: 0 additions & 31 deletions threatexchange/.github/workflows/golang-ci.yaml

This file was deleted.

27 changes: 0 additions & 27 deletions threatexchange/.github/workflows/hma-ci-master-push.yaml

This file was deleted.

Loading
Loading