From 596196997f03034a526da664db1ec64e8495bd20 Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Wed, 1 May 2024 16:00:40 -0400 Subject: [PATCH 01/29] clastr api proof of concept --- poetry.lock | 114 ++++++++++------ pyproject.toml | 2 +- requirements.txt | 2 +- strprofiler/shiny_app/clastr_api.py | 197 ++++++++++++++++++++++++++++ strprofiler/shiny_app/shiny_app.py | 77 ++++++++++- strprofiler/utils.py | 2 +- 6 files changed, 346 insertions(+), 48 deletions(-) create mode 100644 strprofiler/shiny_app/clastr_api.py diff --git a/poetry.lock b/poetry.lock index b35cc6f..d4bed28 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "anyio" @@ -88,13 +88,13 @@ files = [ [[package]] name = "exceptiongroup" -version = "1.2.0" +version = "1.2.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, - {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, ] [package.extras] @@ -391,51 +391,76 @@ files = [ [[package]] name = "pandas" -version = "1.5.3" +version = "2.2.2" description = "Powerful data structures for data analysis, time series, and statistics" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406"}, - {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572"}, - {file = "pandas-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996"}, - {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354"}, - {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23"}, - {file = "pandas-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328"}, - {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc"}, - {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d"}, - {file = "pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"}, - {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae"}, - {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6"}, - {file = "pandas-1.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003"}, - {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813"}, - {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31"}, - {file = "pandas-1.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792"}, - {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7"}, - {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf"}, - {file = "pandas-1.5.3-cp38-cp38-win32.whl", hash = "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51"}, - {file = "pandas-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373"}, - {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa"}, - {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee"}, - {file = "pandas-1.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a"}, - {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0"}, - {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5"}, - {file = "pandas-1.5.3-cp39-cp39-win32.whl", hash = "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a"}, - {file = "pandas-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9"}, - {file = "pandas-1.5.3.tar.gz", hash = "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, + {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, + {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, + {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, + {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, + {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, ] [package.dependencies] numpy = [ - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] -python-dateutil = ">=2.8.1" +python-dateutil = ">=2.8.2" pytz = ">=2020.1" +tzdata = ">=2022.7" [package.extras] -test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] [[package]] name = "prompt-toolkit" @@ -664,6 +689,17 @@ files = [ {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] +[[package]] +name = "tzdata" +version = "2024.1" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, +] + [[package]] name = "uc-micro-py" version = "1.0.3" @@ -879,4 +915,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "51da1c89cdbc1170fa0c07a6d445e394791deaf26efaa86b5aa7603b8fcaabcc" +content-hash = "bada9554d3318a11886e1e0e6d3f020beafe8e4c6db8b9249b3b38fcb6c31ef0" diff --git a/pyproject.toml b/pyproject.toml index ccc0b01..f83e082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.9,<4.0" -pandas = "^1.4.3" +pandas = "^2.2" rich-click = "^1.5.2" numpy = "^1.26.3" openpyxl = "^3.0.10" diff --git a/requirements.txt b/requirements.txt index 5473afd..cc6d05c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Automatically generated by https://github.com/damnever/pigar. numpy==1.26.3 -pandas==1.5.3 +pandas==2.2 rich-click==1.7.3 shiny==0.8.1 shinyswatch==0.4.2 diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py new file mode 100644 index 0000000..aa706c8 --- /dev/null +++ b/strprofiler/shiny_app/clastr_api.py @@ -0,0 +1,197 @@ +import requests +import json +import pandas as pd +from flatten_json import flatten + + +def clastr_query(query, query_filter, include_amelogenin, score_filter): + url = "https://www.cellosaurus.org/str-search/api/query/" + + if query_filter == "Tanabe": + query['algorithm'] = 1 + elif query_filter == "Masters Query": + query['algorithm'] = 2 + elif query_filter == "Masters Reference": + query['algorithm'] = 3 + + query['includeAmelogenin'] = include_amelogenin + query['scoreFilter'] = score_filter + + r = requests.post(url, data=json.dumps(query)) + + # JSON response: + # 'description': '', + # 'cellosaurusRelease': '48.0', + # 'runOn': '2024-Apr-25 12:45:40 UTC+0', + # 'toolVersion': '1.4.4', + # 'searchSpace': 8581, + # 'parameters': {... + # 'results': [{ ... + # FULL STRUCTURE OUTLINED BELOW. + + try: + r.raise_for_status() + except requests.exceptions.HTTPError as e: + return pd.DataFrame({"Error": [str(e)]}) + + df = pd.DataFrame.from_dict(r.json()['results']) + + if df.empty: + return pd.DataFrame({"No Clastr Result": []}) + + flattened = [flatten(d) for d in r.json()['results']] + df = pd.DataFrame(flattened) + + # profiles[0] has 'bestScore' returns. + # Markers within profiles[0] are split by each allele 'value' + # First select alles, and then concat alleles by return and marker + markers = df.filter(regex='^profiles_0_.*_value').T + markers[['A', 'B', 'C', 'markerID', 'E', 'F', 'G']] = markers.index.str.split('_', n=7, expand=False).tolist() + markers.drop(['A', 'B', 'C', 'E', 'F', 'G'], axis=1, inplace=True) + + # Melt dataframe to: [markerID, resultID, allele] for cat on markerID/resultID + melted_markers = pd.melt(markers, id_vars=['markerID'], var_name='resultID', value_name='allele') + + # Join resultID and markerID index to grouped joined allele strings. + allele_cat_markers = pd.concat([ + melted_markers[['resultID', 'markerID']], + melted_markers.groupby(['resultID', 'markerID'], as_index=True).transform(lambda x: ',' + .join(map(str, x)).replace(",nan", "").replace("nan", "")) + ], axis=1).drop_duplicates(subset=['resultID', 'markerID']) + + # Marker names are not consistant across results. MarkerName[1] != the same thing in all cases. + # We must track marker name by index by result. + # The same logic from above applies, split the compount column name string, + # Melt on markerID, and then merge with concat allele made above. + # Finally, pivot into a table and rejoin to higher level results. + marker_names = df.filter(regex='^profiles_0_.*_name').T + marker_names[['A', 'B', 'C', 'markerID', 'E']] = marker_names.index.str.split('_', n=5, expand=False).tolist() + marker_names.drop(['A', 'B', 'C', 'E'], axis=1, inplace=True) + + melted_markers = pd.melt(marker_names, id_vars=['markerID'], + var_name='resultID', value_name='markerName').dropna().drop_duplicates(subset=['markerID', 'resultID']) + + markers_names_alleles = pd.merge(allele_cat_markers, melted_markers, how='inner', on=['markerID', 'resultID']) + + pivot_markers_names_alleles = markers_names_alleles.pivot(index=['resultID'], columns='markerName', values='allele') + + try: + merged = pd.merge(df[['accession', 'name', 'species', 'bestScore', 'problem']], + pivot_markers_names_alleles, left_index=True, right_on='resultID') + except KeyError: + merged = pd.merge(df[['accession', 'name', 'species', 'bestScore']], pivot_markers_names_alleles, left_index=True, right_on='resultID') + + merged['accession_link'] = "https://web.expasy.org/cellosaurus/" + merged['accession'] + print(merged) + # return final df + + # TO DO: Add query to top of merged DF before return. + + return merged + + +if __name__ == '__main__': + # url = "https://www.cellosaurus.org/str-search/api/query/%" + # Use above URL for 400 error + + # sample J000077451 + data = {"Amelogenin": "X,Y", + "CSF1PO": "12", + "D2S1338": "17,19", + "D3S1358": "15", + "D5S818": "11,12", + "D7S820": "11,12", + "D8S1179": "12,15", + "D13S317": "8", + "D16S539": "13", + "D18S51": "14", + "D19S433": "14", + "D21S11": "31,31.2", + "FGA": "23", + "Penta D": "", + "Penta E": "", + "TH01": "7,9.3", + "TPOX": "8", + "vWA": "18", + } + + # # stock from https://www.cellosaurus.org/str-search/help.html#5.1 + # data = { + # "Amelogenin": "X", + # "CSF1PO": "13,14", + # "D5S818": "13", + # "D7S820": "8", + # "D13S317": "12", + # "FGA": "24", + # "TH01": "8", + # "TPOX": "11", + # "vWA": "16", + # } + + r = clastr_query(data, 'Tanabe', False, 70) + + print(r) + +# JSON data structure: +# { +# "description": "", +# "cellosaurusRelease": "48.0", +# "runOn": "2024-Apr-30 18:15:31 UTC+0", +# "toolVersion": "1.4.4", +# "searchSpace": 8581, +# "parameters": { +# "species": "Homo sapiens (Human)", +# "algorithm": "Tanabe", +# "scoringMode": "Non-empty makers", +# "scoreFilter": 70, +# "minMarkers": 8, +# "maxResults": 200, +# "includeAmelogenin": false, +# "markers": [ { +# "name": "Amelogenin", +# "alleles": [ +# { +# "value": "X" +# }, +# { +# "value": "Y" +# } +# ] +# }, ... ] +# }, +# "results": [ +# { +# "accession": "CVCL_2335", +# "name": "CCD-1076Sk", +# "species": "Homo sapiens (Human)", +# "bestScore": 72.0, +# "problematic": false, +# "profiles": [ +# { +# "score": 72.0, +# "markerNumber": 8, +# "alleleNumber": 14, +# "markers": [ +# { +# "name": "Amelogenin", +# "conflicted": false, +# "searched": true, +# "sources": [], +# "alleles": [ +# { +# "value": "X", +# "matched": true +# }, +# { +# "value": "Y", +# "matched": true +# } +# ] +# }, +# ... +# } +# +# +# } +# +# ] diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index 7ec1ce0..daf4869 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -6,6 +6,7 @@ import strprofiler.utils as sp from strprofiler.shiny_app.calc_functions import _single_query, _batch_query, _file_query +from strprofiler.shiny_app.clastr_api import clastr_query from datetime import date import time @@ -65,9 +66,15 @@ def _highlight_non_matches(s): is_match = s == s.iloc[0] return ["text-align:center;background-color:#ec7a80" if not v else "text-align:center" for v in is_match] -# App Generation ### + +def _link_wrap(name, link, problem): + if not pd.isna(problem): + return ui.tooltip(ui.tags.a(name, href=str(link), target="_blank", style="text-align:center;font-style:oblique;color:#ec7a80"), f"{problem}") + else: + return ui.tags.a(name, href=str(link), target="_blank") +# App Generation ### def create_app(db=None): f = importlib.resources.files("strprofiler.shiny_app") @@ -93,7 +100,6 @@ def create_app(db=None): ) ) - # TODO move this to a separate function app_ui = ui.page_fluid( ui.tags.style("#main {padding:12px !important} #sidebar {padding:12px}"), ui.tags.style( @@ -188,6 +194,12 @@ def create_app(db=None): class_="btn-danger", width="45%", ), + ui.input_action_button( + "clastr", + "Clastr", + class_="btn-success", + width="45%", + ), ), ), ), @@ -199,10 +211,23 @@ def create_app(db=None): ui.column(3, ui.tags.h3("Results")), ui.column(1, ui.p("")), ), - ui.column( - 12, - {"id": "res_card"}, - ui.output_table("out_result"), + ui.navset_card_tab( + ui.nav_panel( + "STR Profiler", + ui.column( + 12, + {"id": "res_card"}, + ui.output_table("out_result"), + ), + ), + ui.nav_panel( + "CLASTR", + ui.column( + 12, + {"id": "res_card"}, + ui.output_table("clastr_table"), + ), + ), ), full_screen=False, fill=False, @@ -416,6 +441,7 @@ def server(input, output, session): str_database = reactive.value(init_db) db_name = reactive.value(init_db_name) output_df = reactive.value(None) + output_df_clastr = reactive.value(None) demo_vals = reactive.value(None) demo_name = reactive.value(None) markers = reactive.value([i for i in list(init_db[next(iter(init_db))].keys()) if not any([e for e in ['Center', 'Passage'] if e in i])]) @@ -554,6 +580,45 @@ def loaded_example_text(): x = ui.strong("") return x + @reactive.calc + @reactive.event(input.clastr) + def clastr_results(): + query = {m: input[m]() for m in markers()} + thinking = ui.notification_show("Message ", duration=None) + clastr_return = clastr_query(query, input.query_filter(), input.score_amel_query(), input.query_filter_threshold()) + ui.notification_remove(thinking) + return clastr_return + + @output + @render.table + def clastr_table(): + output_df_clastr.set(clastr_results()) + if output_df_clastr() is not None: + out_df = output_df_clastr().copy() + print(out_df) + if ('No Clastr Result' in out_df.columns) | ('Error' in out_df.columns): + return out_df + try: + out_df['link'] = out_df.apply(lambda x: _link_wrap(x.accession, x.accession_link, x.problem), axis=1) + except Exception: + out_df['link'] = out_df.apply(lambda x: _link_wrap(x.accession, x.accession_link, pd.NA), axis=1) + out_df = out_df.drop(['accession', 'accession_link', 'species'], axis=1).rename( + columns={"link": "Accession", "name": "Name", "bestScore": "Score"}) + cols = list(out_df.columns) + cols = [cols[-1]] + cols[:-1] + out_df = out_df[cols] + out_df = out_df.style.set_table_attributes( + 'class="dataframe shiny-table table w-auto"' + ).hide(axis="index").format( + { + "Score": "{0:0.2f}", + }, + na_rep="" + ) + else: + out_df = pd.DataFrame({"No input provided.": []}) + return out_df + # Dealing with calculating a results table # Catch when either reset or search is clicked # If reset, clear the query and run to make an empty df. diff --git a/strprofiler/utils.py b/strprofiler/utils.py index 33e6ffa..987128f 100644 --- a/strprofiler/utils.py +++ b/strprofiler/utils.py @@ -187,7 +187,7 @@ def str_ingress( else: sys.exit('File extension: ' + path.suffix + ' in file: ' + str(path) + ' is not supported.') - df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) + df = df.map(lambda x: x.strip() if isinstance(x, str) else x) df.columns = df.columns.str.strip() From 9916ad9005fab5dac18349037d1a83d608f5ccaa Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Thu, 2 May 2024 14:40:40 -0400 Subject: [PATCH 02/29] query row added, UI adjustment --- app.py | 3 + strprofiler/shiny_app/clastr_api.py | 23 +++- strprofiler/shiny_app/shiny_app.py | 160 +++++++++++++--------------- 3 files changed, 94 insertions(+), 92 deletions(-) create mode 100644 app.py diff --git a/app.py b/app.py new file mode 100644 index 0000000..69d5d26 --- /dev/null +++ b/app.py @@ -0,0 +1,3 @@ +from strprofiler.shiny_app.shiny_app import create_app + +app = create_app() diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py index aa706c8..6a7386b 100644 --- a/strprofiler/shiny_app/clastr_api.py +++ b/strprofiler/shiny_app/clastr_api.py @@ -7,6 +7,10 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter): url = "https://www.cellosaurus.org/str-search/api/query/" + dct = {k: [v] for k, v in query.items()} + query_df = pd.DataFrame(dct) + query_df['accession'] = 'Query' + if query_filter == "Tanabe": query['algorithm'] = 1 elif query_filter == "Masters Query": @@ -82,12 +86,23 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter): merged = pd.merge(df[['accession', 'name', 'species', 'bestScore']], pivot_markers_names_alleles, left_index=True, right_on='resultID') merged['accession_link'] = "https://web.expasy.org/cellosaurus/" + merged['accession'] - print(merged) - # return final df - # TO DO: Add query to top of merged DF before return. + # add the query line to the top of merged, and reorder columns + + query_added = pd.concat([query_df, merged]).reset_index(drop=True) + query_added["bestScore"] = query_added['bestScore'].map("{0:.2f}".format).replace("nan", "") + + # print(query_added.columns) + + if 'problem' in query_added.columns: + query_added = query_added[['accession', 'name', 'species', 'bestScore', 'accession_link', 'problem'] + + [c for c in query_added if c not in + ['accession', 'name', 'species', 'bestScore', 'accession_link', 'problem']]].fillna('') + else: + query_added = query_added[['accession', 'name', 'species', 'bestScore', 'accession_link'] + + [c for c in query_added if c not in ['accession', 'name', 'species', 'bestScore', 'accession_link']]].fillna('') - return merged + return query_added if __name__ == '__main__': diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index daf4869..0d08163 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -68,7 +68,9 @@ def _highlight_non_matches(s): def _link_wrap(name, link, problem): - if not pd.isna(problem): + if name == 'Query': + return name + if problem != "": return ui.tooltip(ui.tags.a(name, href=str(link), target="_blank", style="text-align:center;font-style:oblique;color:#ec7a80"), f"{problem}") else: return ui.tags.a(name, href=str(link), target="_blank") @@ -182,6 +184,12 @@ def create_app(db=None): ui.column(4, ui.output_ui("loaded_example_text")), ui.column( 4, + ui.input_select( + "search_type", + "Search Type", + ["STR DB", "CLASTR"], + width="90%" + ), ui.input_action_button( "search", "Search", @@ -194,12 +202,6 @@ def create_app(db=None): class_="btn-danger", width="45%", ), - ui.input_action_button( - "clastr", - "Clastr", - class_="btn-success", - width="45%", - ), ), ), ), @@ -211,23 +213,10 @@ def create_app(db=None): ui.column(3, ui.tags.h3("Results")), ui.column(1, ui.p("")), ), - ui.navset_card_tab( - ui.nav_panel( - "STR Profiler", - ui.column( - 12, - {"id": "res_card"}, - ui.output_table("out_result"), - ), - ), - ui.nav_panel( - "CLASTR", - ui.column( - 12, - {"id": "res_card"}, - ui.output_table("clastr_table"), - ), - ), + ui.column( + 12, + {"id": "res_card"}, + ui.output_table("out_result"), ), full_screen=False, fill=False, @@ -418,7 +407,7 @@ def create_app(db=None): icon_svg("github", width="30px"), href="https://github.com/j-andrews7/strprofiler", target="_blank", - ) + ), ), title=ui.tags.a( ui.tags.img( @@ -441,7 +430,6 @@ def server(input, output, session): str_database = reactive.value(init_db) db_name = reactive.value(init_db_name) output_df = reactive.value(None) - output_df_clastr = reactive.value(None) demo_vals = reactive.value(None) demo_name = reactive.value(None) markers = reactive.value([i for i in list(init_db[next(iter(init_db))].keys()) if not any([e for e in ['Center', 'Passage'] if e in i])]) @@ -580,45 +568,6 @@ def loaded_example_text(): x = ui.strong("") return x - @reactive.calc - @reactive.event(input.clastr) - def clastr_results(): - query = {m: input[m]() for m in markers()} - thinking = ui.notification_show("Message ", duration=None) - clastr_return = clastr_query(query, input.query_filter(), input.score_amel_query(), input.query_filter_threshold()) - ui.notification_remove(thinking) - return clastr_return - - @output - @render.table - def clastr_table(): - output_df_clastr.set(clastr_results()) - if output_df_clastr() is not None: - out_df = output_df_clastr().copy() - print(out_df) - if ('No Clastr Result' in out_df.columns) | ('Error' in out_df.columns): - return out_df - try: - out_df['link'] = out_df.apply(lambda x: _link_wrap(x.accession, x.accession_link, x.problem), axis=1) - except Exception: - out_df['link'] = out_df.apply(lambda x: _link_wrap(x.accession, x.accession_link, pd.NA), axis=1) - out_df = out_df.drop(['accession', 'accession_link', 'species'], axis=1).rename( - columns={"link": "Accession", "name": "Name", "bestScore": "Score"}) - cols = list(out_df.columns) - cols = [cols[-1]] + cols[:-1] - out_df = out_df[cols] - out_df = out_df.style.set_table_attributes( - 'class="dataframe shiny-table table w-auto"' - ).hide(axis="index").format( - { - "Score": "{0:0.2f}", - }, - na_rep="" - ) - else: - out_df = pd.DataFrame({"No input provided.": []}) - return out_df - # Dealing with calculating a results table # Catch when either reset or search is clicked # If reset, clear the query and run to make an empty df. @@ -650,7 +599,6 @@ def loaded_example_text(): ui.remove_ui("#inserted-downloader") res_click.set(0) - return None if res_click() == 0: ui.insert_ui( @@ -664,34 +612,70 @@ def loaded_example_text(): where="afterEnd", ) res_click.set(1) - - return _single_query( - query, - str_database(), - input.score_amel_query(), - input.mix_threshold_query(), - input.query_filter(), - input.query_filter_threshold(), - ) + thinking = ui.notification_show("Message: API Query Running.", duration=None) + # isolate input.search_type to prevent trigger when options change. + with reactive.isolate(): + if input.search_type() == 'STR DB': + results = _single_query( + query, + str_database(), + input.score_amel_query(), + input.mix_threshold_query(), + input.query_filter(), + input.query_filter_threshold(), + ) + elif input.search_type() == 'CLASTR': + results = clastr_query( + query, + input.query_filter(), + input.score_amel_query(), + input.query_filter_threshold() + ) + ui.notification_remove(thinking) + return results @output @render.table def out_result(): output_df.set(output_results()) if output_df() is not None: - out_df = output_df().copy() - out_df = out_df.style.set_table_attributes( - 'class="dataframe shiny-table table w-auto"' - ).hide(axis="index").apply(_highlight_non_matches, subset=markers(), axis=0).format( - { - "Shared Markers": "{0:0.0f}", - "Shared Alleles": "{0:0.0f}", - "Tanabe Score": "{0:0.2f}", - "Masters Query Score": "{0:0.2f}", - "Masters Ref Score": "{0:0.2f}", - }, - na_rep="" - ) + # isolate input.search_type to prevent trigger when options change. + with reactive.isolate(): + if input.search_type() == 'STR DB': + out_df = output_df().copy() + out_df = out_df.style.set_table_attributes( + 'class="dataframe shiny-table table w-auto"' + ).hide(axis="index").apply(_highlight_non_matches, subset=markers(), axis=0).format( + { + "Shared Markers": "{0:0.0f}", + "Shared Alleles": "{0:0.0f}", + "Tanabe Score": "{0:0.2f}", + "Masters Query Score": "{0:0.2f}", + "Masters Ref Score": "{0:0.2f}", + }, + na_rep="" + ) + elif input.search_type() == 'CLASTR': + out_df = output_df().copy() + print(out_df) + if ('No Clastr Result' in out_df.columns) | ('Error' in out_df.columns): + return out_df + try: + out_df['link'] = out_df.apply(lambda x: _link_wrap(x.accession, x.accession_link, x.problem), axis=1) + out_df.drop(columns=['problem'], inplace=True) + except Exception: + out_df['link'] = out_df.apply(lambda x: _link_wrap(x.accession, x.accession_link, ''), axis=1) + + out_df = out_df.drop(['accession', 'accession_link', 'species'], axis=1).rename( + columns={"link": "Accession", "name": "Name", "bestScore": "Score"}) + + cols = list(out_df.columns) + cols = [cols[-1]] + cols[:-1] + + out_df = out_df[cols] + out_df = out_df.style.set_table_attributes( + 'class="dataframe shiny-table table w-auto"' + ).hide(axis="index").apply(_highlight_non_matches, subset=markers(), axis=0) else: out_df = pd.DataFrame({"No input provided.": []}) return out_df From 9bba28fc359420b1cedd480100d39150b8b4e90b Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Thu, 2 May 2024 15:51:20 -0400 Subject: [PATCH 03/29] tooltip added, help updated, req for deploy updated --- requirements.txt | 5 ++- strprofiler/shiny_app/shiny_app.py | 33 +++++++++----- strprofiler/shiny_app/www/help.html | 69 ++++++++++++++++++++++++++--- strprofiler/shiny_app/www/help.md | 31 ++++++++++--- 4 files changed, 115 insertions(+), 23 deletions(-) diff --git a/requirements.txt b/requirements.txt index cc6d05c..9ebff4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,10 @@ # Automatically generated by https://github.com/damnever/pigar. +faicons==0.2.2 +flatten-json==0.1.14 numpy==1.26.3 -pandas==2.2 +pandas==2.2.2 +requests==2.31.0 rich-click==1.7.3 shiny==0.8.1 shinyswatch==0.4.2 diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index 0d08163..397fac2 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -187,14 +187,19 @@ def create_app(db=None): ui.input_select( "search_type", "Search Type", - ["STR DB", "CLASTR"], + ["STRprofiler Database", "Cellosaurus Database (CLASTR)"], width="90%" ), - ui.input_action_button( - "search", - "Search", - class_="btn-success", - width="45%", + ui.tooltip( + ui.input_action_button( + "search", + "Search", + class_="btn-success", + width="45%", + ), + "Query STRprofilier Database", + id="tt_selected_search", + placement="left", ), ui.input_action_button( "reset", @@ -450,6 +455,14 @@ def database_file(): width="100%", ) + @reactive.effect + @reactive.event(input.search_type) + def update_tooltip_msg(): + if input.search_type() == 'STRprofiler Database': + ui.update_tooltip("tt_selected_search", 'Query STRprofilier Database', show=False) + if input.search_type() == 'Cellosaurus Database (CLASTR)': + ui.update_tooltip("tt_selected_search", 'Query Cellosaurus Database via CLASTR API', show=False) + @render.ui @reactive.event(markers) def marker_inputs(): @@ -615,7 +628,7 @@ def loaded_example_text(): thinking = ui.notification_show("Message: API Query Running.", duration=None) # isolate input.search_type to prevent trigger when options change. with reactive.isolate(): - if input.search_type() == 'STR DB': + if input.search_type() == 'STRprofiler Database': results = _single_query( query, str_database(), @@ -624,7 +637,7 @@ def loaded_example_text(): input.query_filter(), input.query_filter_threshold(), ) - elif input.search_type() == 'CLASTR': + elif input.search_type() == 'Cellosaurus Database (CLASTR)': results = clastr_query( query, input.query_filter(), @@ -641,7 +654,7 @@ def out_result(): if output_df() is not None: # isolate input.search_type to prevent trigger when options change. with reactive.isolate(): - if input.search_type() == 'STR DB': + if input.search_type() == 'STRprofiler Database': out_df = output_df().copy() out_df = out_df.style.set_table_attributes( 'class="dataframe shiny-table table w-auto"' @@ -655,7 +668,7 @@ def out_result(): }, na_rep="" ) - elif input.search_type() == 'CLASTR': + elif input.search_type() == 'Cellosaurus Database (CLASTR)': out_df = output_df().copy() print(out_df) if ('No Clastr Result' in out_df.columns) | ('Error' in out_df.columns): diff --git a/strprofiler/shiny_app/www/help.html b/strprofiler/shiny_app/www/help.html index 6c1bdf1..513700f 100644 --- a/strprofiler/shiny_app/www/help.html +++ b/strprofiler/shiny_app/www/help.html @@ -1524,12 +1524,20 @@

Default Database

Laboratory PDX program

If this app is hosted with a custom database, please contact the host for information on the database source.

+ +
+

CLASTR / Cellosaurus API Query

+

Query of the Cellosaurus +(Bairoch, 2018) cell line database is also available for single samples +via the CLASTR +(Robin, Capes-Davis, and Bairoch, 2019) REST +API.


Single Query Report

For individual samples, a report is generated with the following -fields.

+fields when ‘STR DB’ is selected as the search type.

@@ -1562,17 +1570,59 @@

Single Query Report

+sample (if Tanabe selected). +database sample (if Master Query selected). +and database sample (if Master Ref selected). + + + + + + +
Tanabe Score Tanabe similarity score between the query and database -sample.
Master Query Score Master ‘Query’ similarity score between the query and -database sample.
Master Ref Score Master ‘Reference’ similarity score between the query -and database sample.
Markers 1 … nMarker alleles with mismatches highlight.
+

The report is filtered to include only those samples with greater +than or equal to the Similarity Score Filter Threshold +defined by the user, and report only the similarity score selected.

+

When ‘CLASTR’ is selected as the search type, a report is generated +with the following fields:

+ ++++ + + + + + + + + + + + + + + + + + + + + + +
Output FieldDescription
AccessionCellosaurus cell line accession ID. Links are provided +to each accession information page.
NameCell line name.
ScoreSimilarity score between the query and cell line +sample. Reported score reflectes the selected Similarity Score +Filter.
Markers 1 … nMarker alleles with mismatches highlight.
@@ -1716,14 +1766,21 @@

Batch and File Query Specfic


-
-

Reference

+
+

References

strprofiler is provided under the MIT license. If you use this app in your research please cite:
Jared Andrews, Mike Lloyd, & Sam Culley. (2024). j-andrews7/strprofiler: v0.2.0. Zenodo. https://doi.org/10.5281/zenodo.10544686

+

Bairoch A. (2018) The Cellosaurus, a cell line knowledge resource. +Journal of Biomolecular Techniques. 29:25-38. DOI: +10.7171/jbt.18-2902-002; PMID: 29805321

+

Robin, T., Capes-Davis, A. & Bairoch, A. (2019) CLASTR: the +Cellosaurus STR Similarity Search Tool - A Precious Help for Cell Line +Authentication. International Journal of Cancer. PubMed: 31444973  DOI: +10.1002/IJC.32639

diff --git a/strprofiler/shiny_app/www/help.md b/strprofiler/shiny_app/www/help.md index c6b61f1..7aac4c7 100644 --- a/strprofiler/shiny_app/www/help.md +++ b/strprofiler/shiny_app/www/help.md @@ -20,21 +20,36 @@ The report will differ depending on if an individual sample or batch of samples ## Default Database Current data underlying the default database were provided by: [The Jackson Laboratory PDX program](https://tumor.informatics.jax.org/mtbwi/pdxSearch.do) -If this app is hosted with a custom database, please contact the host for information on the database source. +If this app is hosted with a custom database, please contact the host for information on the database source. + +## CLASTR / Cellosaurus API Query +Query of the [Cellosaurus](https://www.cellosaurus.org/description.html) (Bairoch, 2018) cell line database is also available for single samples via the [CLASTR](https://www.cellosaurus.org/str-search/) (Robin, Capes-Davis, and Bairoch, 2019) [REST API](https://www.cellosaurus.org/str-search/help.html#5). --- ## Single Query Report -For individual samples, a report is generated with the following fields. +For individual samples, a report is generated with the following fields when 'STR DB' is selected as the search type. | Output Field | Description | | :--- | :---- | | Mixed Sample | Flag to indicate sample mixing. Sample mixing is determined by the "'Mixed' Sample Threshold" option. If more markers are tri+ allelic than the threshold, samples are flagged as potentially mixed. | | Shared Markers | Number of markers shared between the query and database sample. | | Shared Alleles | Number of alleles shared between the query and database sample. | -| Tanabe Score | Tanabe similarity score between the query and database sample. | -| Master Query Score | Master 'Query' similarity score between the query and database sample. | -| Master Ref Score | Master 'Reference' similarity score between the query and database sample. | +| Tanabe Score | Tanabe similarity score between the query and database sample (if Tanabe selected). | +| Master Query Score | Master 'Query' similarity score between the query and database sample (if Master Query selected). | +| Master Ref Score | Master 'Reference' similarity score between the query and database sample (if Master Ref selected). | +| Markers 1 ... n | Marker alleles with mismatches highlight. | + +The report is filtered to include only those samples with greater than or equal to the `Similarity Score Filter Threshold` defined by the user, and report only the similarity score selected. + +When 'CLASTR' is selected as the search type, a report is generated with the following fields: + +| Output Field | Description | +| :--- | :---- | +| Accession | Cellosaurus cell line accession ID. Links are provided to each accession information page. | +| Name | Cell line name. | +| Score | Similarity score between the query and cell line sample. Reported score reflectes the selected Similarity Score Filter. | +| Markers 1 ... n | Marker alleles with mismatches highlight. | The report is filtered to include only those samples with greater than or equal to the `Similarity Score Filter Threshold` defined by the user. @@ -108,7 +123,11 @@ For batch samples entered in the File Query tab, `STR Similarity` will generate --- -# Reference +# References `strprofiler` is provided under the MIT license. If you use this app in your research please cite: Jared Andrews, Mike Lloyd, & Sam Culley. (2024). j-andrews7/strprofiler: v0.2.0. Zenodo. https://doi.org/10.5281/zenodo.10544686 + +Bairoch A. (2018) The Cellosaurus, a cell line knowledge resource. Journal of Biomolecular Techniques. 29:25-38. DOI: 10.7171/jbt.18-2902-002; PMID: 29805321 + +Robin, T., Capes-Davis, A. & Bairoch, A. (2019) CLASTR: the Cellosaurus STR Similarity Search Tool - A Precious Help for Cell Line Authentication. International Journal of Cancer. PubMed: 31444973  DOI: 10.1002/IJC.32639 \ No newline at end of file From 1cd2ef4b07220af54eaac4f831be246bbc7c2e3a Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Fri, 3 May 2024 08:39:03 -0400 Subject: [PATCH 04/29] add window title --- strprofiler/shiny_app/shiny_app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index 397fac2..17fcbfd 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -103,6 +103,7 @@ def create_app(db=None): ) app_ui = ui.page_fluid( + ui.panel_title('', "STR Profiler"), ui.tags.style("#main {padding:12px !important} #sidebar {padding:12px}"), ui.tags.style( ".h3 {margin-bottom:0.1rem; line-height:1} .card-body {padding-top:6px; padding-bottom:6px}" From 86453204cefdb0f6962e7073d780f3b3e9aa9cf4 Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Thu, 9 May 2024 15:33:28 -0400 Subject: [PATCH 05/29] clastr batch method rough in --- strprofiler/shiny_app/clastr_api.py | 62 +++++++++++- strprofiler/shiny_app/shiny_app.py | 142 +++++++++++++++++++--------- 2 files changed, 154 insertions(+), 50 deletions(-) diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py index 6a7386b..0082298 100644 --- a/strprofiler/shiny_app/clastr_api.py +++ b/strprofiler/shiny_app/clastr_api.py @@ -4,7 +4,7 @@ from flatten_json import flatten -def clastr_query(query, query_filter, include_amelogenin, score_filter): +def _clastr_query(query, query_filter, include_amelogenin, score_filter): url = "https://www.cellosaurus.org/str-search/api/query/" dct = {k: [v] for k, v in query.items()} @@ -65,7 +65,7 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter): # Marker names are not consistant across results. MarkerName[1] != the same thing in all cases. # We must track marker name by index by result. - # The same logic from above applies, split the compount column name string, + # The same logic from above applies, split the compound column name string, # Melt on markerID, and then merge with concat allele made above. # Finally, pivot into a table and rejoin to higher level results. marker_names = df.filter(regex='^profiles_0_.*_name').T @@ -105,6 +105,30 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter): return query_added +def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): + url = "https://www.cellosaurus.org/str-search/api/batch/" + + if query_filter == "Tanabe": + query = [dict(item, **{'algorithm': 1}) for item in query] + elif query_filter == "Masters Query": + query = [dict(item, **{'algorithm': 2}) for item in query] + elif query_filter == "Masters Reference": + query = [dict(item, **{'algorithm': 2}) for item in query] + + query = [dict(item, **{'includeAmelogenin': include_amelogenin}) for item in query] + query = [dict(item, **{'scoreFilter': score_filter}) for item in query] + query = [dict(item, **{'outputFormat': 'xlsx'}) for item in query] + + r = requests.post(url, data=json.dumps(query)) + + try: + r.raise_for_status() + except requests.exceptions.HTTPError as e: + return pd.DataFrame({"Error": [str(e)]}) + + return r + + if __name__ == '__main__': # url = "https://www.cellosaurus.org/str-search/api/query/%" # Use above URL for 400 error @@ -143,10 +167,42 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter): # "vWA": "16", # } - r = clastr_query(data, 'Tanabe', False, 70) + r = _clastr_query(data, 'Tanabe', False, 70) print(r) + batch_data = [{ + "description": "Example 1", + "Amelogenin": "X", + "CSF1PO": "13,14", + "D5S818": "13", + "D7S820": "8", + "D13S317": "12", + "FGA": "24", + "TH01": "8", + "TPOX": "11", + "vWA": "16", + }, { + "description": "Example 2", + "Amelogenin": "X, Y", + "CSF1PO": "13", + "D5S818": "13, 14", + "D7S820": "8, 19", + "D13S317": "11, 12", + "FGA": "24", + "TH01": "8", + "TPOX": "11", + "vWA": "15", + "outputFormat": "xlsx" + }] + + r = _clastr_batch_query(batch_data, 'Tanabe', False, 70) + + with open('testing.xlsx', 'wb') as fd: + for chunk in r.iter_content(chunk_size=128): + fd.write(chunk) + + # JSON data structure: # { # "description": "", diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index 17fcbfd..b977253 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -6,7 +6,7 @@ import strprofiler.utils as sp from strprofiler.shiny_app.calc_functions import _single_query, _batch_query, _file_query -from strprofiler.shiny_app.clastr_api import clastr_query +from strprofiler.shiny_app.clastr_api import _clastr_query, _clastr_batch_query from datetime import date import time @@ -272,6 +272,12 @@ def create_app(db=None): multiple=False, width="100%", ), + ui.input_select( + "search_type_batch", + "Search Type", + ["STRprofiler Database", "Cellosaurus Database (CLASTR)"], + width="100%" + ), ui.input_action_button( "csv_query", "CSV Query", @@ -639,7 +645,7 @@ def loaded_example_text(): input.query_filter_threshold(), ) elif input.search_type() == 'Cellosaurus Database (CLASTR)': - results = clastr_query( + results = _clastr_query( query, input.query_filter(), input.score_amel_query(), @@ -693,6 +699,7 @@ def out_result(): else: out_df = pd.DataFrame({"No input provided.": []}) return out_df + # TO DO: Remove results table when changing query methods. # Dealing with downloading results, when requested. # Note that output_results() is a reactive Calc result. @@ -718,26 +725,31 @@ def download(): @render.data_frame def out_batch_df(): output_df.set(batch_query_results()) - try: - return render.DataTable(output_df()) - except Exception: - m = ui.modal( - ui.div( - {"style": "font-size: 18px"}, - ui.HTML( - ( - "There was a fatal error in the query.

" - "Ensure marker names match expectation, and that" - " no special characters (spaces, etc.) were used in sample names." - ) - ), - ), - title="Batch Query Error", - easy_close=True, - footer=None, - ) - ui.modal_show(m) - return render.DataTable(pd.DataFrame({"Failed Query. Fix Input File": []})) + print(output_df) + with reactive.isolate(): + if input.search_type_batch() == 'STRprofiler Database': + try: + return render.DataTable(output_df()) + except Exception: + m = ui.modal( + ui.div( + {"style": "font-size: 18px"}, + ui.HTML( + ( + "There was a fatal error in the query.

" + "Ensure marker names match expectation, and that" + " no special characters (spaces, etc.) were used in sample names." + ) + ), + ), + title="Batch Query Error", + easy_close=True, + footer=None, + ) + ui.modal_show(m) + return render.DataTable(pd.DataFrame({"Failed Query. Fix Input File": []})) + elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)': + return render.DataTable(pd.DataFrame({"CASTR Batch Query": ['Download Results']})) # File input loading @reactive.calc @@ -776,39 +788,75 @@ def batch_query_results(): return pd.DataFrame({"Failed Query. Fix Input File": []}) if res_click_file() == 0: - ui.insert_ui( - ui.div( - {"id": "inserted-downloader2"}, - ui.download_button( - "download2", "Download CSV", width="25%", class_="btn-primary" + if input.search_type_batch() == 'STRprofiler Database': + ui.insert_ui( + ui.div( + {"id": "inserted-downloader2"}, + ui.download_button( + "download2", "Download CSV", width="25%", class_="btn-primary" + ), ), - ), - selector="#res_card_batch", - where="beforeEnd", - ) - res_click_file.set(1) - return _batch_query( - query_df, - str_database(), - input.score_amel_batch(), - input.mix_threshold_batch(), - input.tan_threshold_batch(), - input.mas_q_threshold_batch(), - input.mas_r_threshold_batch(), - ) + selector="#res_card_batch", + where="beforeEnd", + ) + res_click_file.set(1) + elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)': + ui.insert_ui( + ui.div( + {"id": "inserted-downloader2"}, + ui.download_button( + "download2", "Download XLSX", width="25%", class_="btn-primary" + ), + ), + selector="#res_card_batch", + where="beforeEnd", + ) + res_click_file.set(1) + + with reactive.isolate(): + if input.search_type_batch() == 'STRprofiler Database': + results = _batch_query( + query_df, + str_database(), + input.score_amel_batch(), + input.mix_threshold_batch(), + input.tan_threshold_batch(), + input.mas_q_threshold_batch(), + input.mas_r_threshold_batch(), + ) + elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)': + clastr_query = [(lambda d: d.update(description=key) or d)(val) for (key, val) in query_df.items()] + results = _clastr_batch_query( + clastr_query, + input.query_filter(), + input.score_amel_batch(), + input.query_filter_threshold() + ) + # TO DO: Change to a batch filter option set. + return results + + # File input loading + @reactive.effect + @reactive.event(input.search_type_batch) + def _(): + ui.remove_ui("#inserted-downloader2") + res_click_file.set(0) + # TO DO: Remove batch results table when changing methods. # Dealing with dowloading results, when requested. # Note that batch_query_results() is a reactive Calc result. @render.download( - filename="STR_Batch_Results_" - + date.today().isoformat() - + "_" - + time.strftime("%Hh-%Mm", time.localtime()) - + ".csv" + filename=lambda: "STR_Batch_Results_" + date.today().isoformat() + "_" + time.strftime("%Hh-%Mm", time.localtime()) + ".csv" + if f"{input.search_type_batch()}" == 'STRprofiler Database' + else "STR_Batch_Results_" + date.today().isoformat() + "_" + time.strftime("%Hh-%Mm", time.localtime()) + ".xlsx" ) def download2(): if batch_query_results() is not None: - yield batch_query_results().to_csv(index=False) + if input.search_type_batch() == 'STRprofiler Database': + yield batch_query_results().to_csv(index=False) + if input.search_type_batch() == 'Cellosaurus Database (CLASTR)': + for chunk in batch_query_results().iter_content(chunk_size=128): + yield chunk # Dealing with passing example file to user. @render.download() From 1bd91a99be750effb4dc609d3bd1d5df26ddeac6 Mon Sep 17 00:00:00 2001 From: Jared Andrews Date: Tue, 14 May 2024 12:32:45 -0500 Subject: [PATCH 06/29] add requirements, bump version --- .gitignore | 1 + CHANGELOG.md | 7 +++++++ docs/requirements.txt | 4 +++- pyproject.toml | 4 +++- requirements.txt | 3 ++- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index bad588f..719d172 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +.conda/* # PyInstaller # Usually these files are written by a python script from a template diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ea17a6..ef1f28f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## v0.3.0 + +**Release date:** + + - Added ability to query the CLASTR API for single or batch queries from within the STRprofiler + app - [#24](https://github.com/j-andrews7/strprofiler/pull/24). + ## v0.2.0 **Release date: 04/16/2024** diff --git a/docs/requirements.txt b/docs/requirements.txt index 367585e..c262090 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,4 +4,6 @@ myst-parser rich-click shiny shinyswatch -faicons \ No newline at end of file +faicons +requests +flatten-json \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index f83e082..834af42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "strprofiler" -version = "0.2.0" +version = "0.3.0" description = "A simple python utility to compare short tandem repeat (STR) profiles." authors = ["Jared Andrews ", "Mike Lloyd "] @@ -18,6 +18,8 @@ shiny = "^0.8.0" shinyswatch = "^0.4.2" Jinja2 = "^3.1.3" faicons = "^0.2.2" +requests = "^2.31.0" +flatten-json = "^0.1.14" [tool.poetry.dev-dependencies] diff --git a/requirements.txt b/requirements.txt index 9ebff4b..daba595 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ requests==2.31.0 rich-click==1.7.3 shiny==0.8.1 shinyswatch==0.4.2 -Jinja2==3.1.2 \ No newline at end of file +Jinja2==3.1.2 +requests==2.31.0 \ No newline at end of file From 579a1cf82538d398a12a7ccd0f5c160e277773c1 Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Thu, 16 May 2024 09:16:23 -0400 Subject: [PATCH 07/29] fix for #26 --- .gitignore | 3 +- strprofiler/shiny_app/clastr_api.py | 44 ++++++++++++++++++++++++++--- strprofiler/shiny_app/shiny_app.py | 1 - 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 719d172..8599a3a 100644 --- a/.gitignore +++ b/.gitignore @@ -153,4 +153,5 @@ cython_debug/ #.idea/ .DS_Store -strprofiler.json \ No newline at end of file +strprofiler.json +testing.xlsx diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py index 0082298..60ade3d 100644 --- a/strprofiler/shiny_app/clastr_api.py +++ b/strprofiler/shiny_app/clastr_api.py @@ -18,6 +18,16 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): elif query_filter == "Masters Reference": query['algorithm'] = 3 + if "PentaD" in query.keys(): + query["Penta D"] = query.pop("PentaD") + elif "Penta_D" in query.keys(): + query["Penta D"] = query.pop("Penta_D") + + if "PentaE" in query.keys(): + query["Penta E"] = query.pop("PentaE") + elif "Penta_E" in query.keys(): + query["Penta E"] = query.pop("Penta_E") + query['includeAmelogenin'] = include_amelogenin query['scoreFilter'] = score_filter @@ -87,6 +97,11 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): merged['accession_link'] = "https://web.expasy.org/cellosaurus/" + merged['accession'] + if "Penta D" in merged.keys(): + merged["PentaD"] = merged.pop("Penta D") + if "Penta E" in merged.keys(): + merged["PentaE"] = merged.pop("Penta E") + # add the query line to the top of merged, and reorder columns query_added = pd.concat([query_df, merged]).reset_index(drop=True) @@ -147,8 +162,8 @@ def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): "D19S433": "14", "D21S11": "31,31.2", "FGA": "23", - "Penta D": "", - "Penta E": "", + "PentaD": "", + "PentaE": "", "TH01": "7,9.3", "TPOX": "8", "vWA": "18", @@ -159,12 +174,33 @@ def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): # "Amelogenin": "X", # "CSF1PO": "13,14", # "D5S818": "13", - # "D7S820": "8", + # "D7S820": "8,9", # "D13S317": "12", # "FGA": "24", # "TH01": "8", # "TPOX": "11", - # "vWA": "16", + # "vWA": "16" + # } + + # # stock example from https://www.cellosaurus.org/str-search/ + # data = {"Amelogenin": "X", + # "CSF1PO": "11,12", + # "D2S1338": "19,23", + # "D3S1358": "15,17", + # "D5S818": "11,12", + # "D7S820": "10", + # "D8S1179": "10", + # "D13S317": "11,12", + # "D16S539": "11,12", + # "D18S51": "13", + # "D19S433": "14", + # "D21S11": "29,30", + # "FGA": "20,22", + # "PentaD": "11,14", + # "PentaE": "14,16", + # "TH01": "6,9", + # "TPOX": "8,9", + # "vWA": "17,19" # } r = _clastr_query(data, 'Tanabe', False, 70) diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index b977253..ff1c2c5 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -677,7 +677,6 @@ def out_result(): ) elif input.search_type() == 'Cellosaurus Database (CLASTR)': out_df = output_df().copy() - print(out_df) if ('No Clastr Result' in out_df.columns) | ('Error' in out_df.columns): return out_df try: From 77529be3f0844788a68b5eec75cec79557038fe2 Mon Sep 17 00:00:00 2001 From: Jared Andrews Date: Thu, 16 May 2024 10:02:43 -0500 Subject: [PATCH 08/29] additional tweaks for #26 --- strprofiler/shiny_app/clastr_api.py | 21 +++++---------- strprofiler/utils.py | 41 ++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py index 60ade3d..16f1c26 100644 --- a/strprofiler/shiny_app/clastr_api.py +++ b/strprofiler/shiny_app/clastr_api.py @@ -2,7 +2,7 @@ import json import pandas as pd from flatten_json import flatten - +from strprofiler.utils import _pentafix def _clastr_query(query, query_filter, include_amelogenin, score_filter): url = "https://www.cellosaurus.org/str-search/api/query/" @@ -17,16 +17,8 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): query['algorithm'] = 2 elif query_filter == "Masters Reference": query['algorithm'] = 3 - - if "PentaD" in query.keys(): - query["Penta D"] = query.pop("PentaD") - elif "Penta_D" in query.keys(): - query["Penta D"] = query.pop("Penta_D") - - if "PentaE" in query.keys(): - query["Penta E"] = query.pop("PentaE") - elif "Penta_E" in query.keys(): - query["Penta E"] = query.pop("Penta_E") + + query = _pentafix(query, reverse = True) query['includeAmelogenin'] = include_amelogenin query['scoreFilter'] = score_filter @@ -97,10 +89,7 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): merged['accession_link'] = "https://web.expasy.org/cellosaurus/" + merged['accession'] - if "Penta D" in merged.keys(): - merged["PentaD"] = merged.pop("Penta D") - if "Penta E" in merged.keys(): - merged["PentaE"] = merged.pop("Penta E") + merged = _pentafix(merged) # add the query line to the top of merged, and reorder columns @@ -123,6 +112,8 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): url = "https://www.cellosaurus.org/str-search/api/batch/" + query = [_pentafix(item, reverse = True) for item in query] + if query_filter == "Tanabe": query = [dict(item, **{'algorithm': 1}) for item in query] elif query_filter == "Masters Query": diff --git a/strprofiler/utils.py b/strprofiler/utils.py index 987128f..d3e533d 100644 --- a/strprofiler/utils.py +++ b/strprofiler/utils.py @@ -35,17 +35,38 @@ def _clean_element(x): return ",".join(sorted_elements) -def _pentafix(samps_dict): +def _pentafix(samps_dict, reverse = False): """Takes a dictionary of alleles and returns a dictionary with common Penta markers renamed for consistency.""" - if "Penta D" in samps_dict.keys(): - samps_dict["PentaD"] = samps_dict.pop("Penta D") - elif "Penta_D" in samps_dict.keys(): - samps_dict["PentaD"] = samps_dict.pop("Penta_D") - - if "Penta E" in samps_dict.keys(): - samps_dict["PentaE"] = samps_dict.pop("Penta E") - elif "Penta_E" in samps_dict.keys(): - samps_dict["PentaE"] = samps_dict.pop("Penta_E") + if not reverse: + if "Penta C" in samps_dict.keys(): + samps_dict["PentaC"] = samps_dict.pop("Penta C") + elif "Penta_C" in samps_dict.keys(): + samps_dict["PentaC"] = samps_dict.pop("Penta_C") + + if "Penta D" in samps_dict.keys(): + samps_dict["PentaD"] = samps_dict.pop("Penta D") + elif "Penta_D" in samps_dict.keys(): + samps_dict["PentaD"] = samps_dict.pop("Penta_D") + + if "Penta E" in samps_dict.keys(): + samps_dict["PentaE"] = samps_dict.pop("Penta E") + elif "Penta_E" in samps_dict.keys(): + samps_dict["PentaE"] = samps_dict.pop("Penta_E") + else: + if "PentaC" in samps_dict.keys(): + samps_dict["Penta C"] = samps_dict.pop("PentaC") + elif "Penta_C" in samps_dict.keys(): + samps_dict["Penta C"] = samps_dict.pop("Penta_C") + + if "PentaD" in samps_dict.keys(): + samps_dict["Penta D"] = samps_dict.pop("PentaD") + elif "Penta_D" in samps_dict.keys(): + samps_dict["Penta D"] = samps_dict.pop("Penta_D") + + if "PentaE" in samps_dict.keys(): + samps_dict["Penta E"] = samps_dict.pop("PentaE") + elif "Penta_E" in samps_dict.keys(): + samps_dict["Penta E"] = samps_dict.pop("Penta_E") return samps_dict From 771c133d9ac628ff546a5071a9dc1b4293949d35 Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Thu, 16 May 2024 11:26:25 -0400 Subject: [PATCH 09/29] add marker check for single query --- strprofiler/shiny_app/clastr_api.py | 58 +++++++++++++++++++++++++++-- strprofiler/shiny_app/shiny_app.py | 19 +++++++++- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py index 16f1c26..e3c5157 100644 --- a/strprofiler/shiny_app/clastr_api.py +++ b/strprofiler/shiny_app/clastr_api.py @@ -4,6 +4,53 @@ from flatten_json import flatten from strprofiler.utils import _pentafix + +def _valid_marker_check(markers): + + valid_api_markers = ['Amelogenin', + 'CSF1PO', + 'D2S1338', + 'D3S1358', + 'D5S818', + 'D7S820', + 'D8S1179', + 'D13S317', + 'D16S539', + 'D18S51', + 'D19S433', + 'D21S11', + 'FGA', + 'Penta D', + 'Penta E', + 'PentaD', + 'PentaE', + 'TH01', + 'TPOX', + 'vWA', + 'D1S1656', + 'D2S441', + 'D6S1043', + 'D10S1248', + 'D12S391', + 'D22S1045', + 'DXS101', + 'DYS391', + 'F13A01', + 'F13B', + 'FESFPS', + 'LPL', + 'Penta C', + 'PentaC', + 'SE33'] + + # remove extra fields, if present as keys may come from _clastr_query or other. + query_markers = [marker for marker in markers if marker not in ['algorithm', 'includeAmelogenin', 'scoreFilter']] + + missing_markers = list(set(query_markers) - set(valid_api_markers)) + + return missing_markers + + def _clastr_query(query, query_filter, include_amelogenin, score_filter): url = "https://www.cellosaurus.org/str-search/api/query/" @@ -17,8 +64,8 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): query['algorithm'] = 2 elif query_filter == "Masters Reference": query['algorithm'] = 3 - - query = _pentafix(query, reverse = True) + + query = _pentafix(query, reverse=True) query['includeAmelogenin'] = include_amelogenin query['scoreFilter'] = score_filter @@ -112,7 +159,7 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): url = "https://www.cellosaurus.org/str-search/api/batch/" - query = [_pentafix(item, reverse = True) for item in query] + query = [_pentafix(item, reverse=True) for item in query] if query_filter == "Tanabe": query = [dict(item, **{'algorithm': 1}) for item in query] @@ -158,6 +205,7 @@ def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): "TH01": "7,9.3", "TPOX": "8", "vWA": "18", + "NoGoodVeryBad": "I'm not a valid marker. However, that is ok. We catch this now." } # # stock from https://www.cellosaurus.org/str-search/help.html#5.1 @@ -194,6 +242,10 @@ def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): # "vWA": "17,19" # } + malformed_markers = _valid_marker_check(data.keys()) + + print(malformed_markers) + r = _clastr_query(data, 'Tanabe', False, 70) print(r) diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index ff1c2c5..eae29d5 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -6,7 +6,7 @@ import strprofiler.utils as sp from strprofiler.shiny_app.calc_functions import _single_query, _batch_query, _file_query -from strprofiler.shiny_app.clastr_api import _clastr_query, _clastr_batch_query +from strprofiler.shiny_app.clastr_api import _valid_marker_check, _clastr_query, _clastr_batch_query from datetime import date import time @@ -645,6 +645,23 @@ def loaded_example_text(): input.query_filter_threshold(), ) elif input.search_type() == 'Cellosaurus Database (CLASTR)': + malformed_markers = _valid_marker_check(query.keys()) + if malformed_markers: + notify_m = ui.modal( + "Markers: {} are incompatible with the CLASTR query." + .format(str(malformed_markers)[1:-1]), + ui.tags.br(), + ui.tags.br(), + "These markers will not be used in the query.", + ui.tags.br(), + ui.tags.br(), + "See: ", ui.tags.a('CLASTR', href=str("https://www.cellosaurus.org/str-search/"), target="_blank"), + " for a complete list of compatible marker names", + title="Inompatible CLASTR Markers", + easy_close=True, + footer=ui.modal_button('Understood') + ) + ui.modal_show(notify_m) results = _clastr_query( query, input.query_filter(), From 815036f51066e9e8095d48de3391a518eb68adc8 Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Thu, 16 May 2024 19:26:53 -0400 Subject: [PATCH 10/29] conditional batch options. modal notice for malformed markers. --- strprofiler/shiny_app/clastr_api.py | 4 +- strprofiler/shiny_app/shiny_app.py | 126 +++++++++++++++++----------- 2 files changed, 80 insertions(+), 50 deletions(-) diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py index e3c5157..fd0e9e9 100644 --- a/strprofiler/shiny_app/clastr_api.py +++ b/strprofiler/shiny_app/clastr_api.py @@ -44,7 +44,7 @@ def _valid_marker_check(markers): 'SE33'] # remove extra fields, if present as keys may come from _clastr_query or other. - query_markers = [marker for marker in markers if marker not in ['algorithm', 'includeAmelogenin', 'scoreFilter']] + query_markers = [marker for marker in markers if marker not in ['algorithm', 'includeAmelogenin', 'scoreFilter', 'description']] missing_markers = list(set(query_markers) - set(valid_api_markers)) @@ -143,8 +143,6 @@ def _clastr_query(query, query_filter, include_amelogenin, score_filter): query_added = pd.concat([query_df, merged]).reset_index(drop=True) query_added["bestScore"] = query_added['bestScore'].map("{0:.2f}".format).replace("nan", "") - # print(query_added.columns) - if 'problem' in query_added.columns: query_added = query_added[['accession', 'name', 'species', 'bestScore', 'accession_link', 'problem'] + [c for c in query_added if c not in diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index eae29d5..10892d9 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -76,6 +76,25 @@ def _link_wrap(name, link, problem): return ui.tags.a(name, href=str(link), target="_blank") +def notify_modal(marker_list): + ui.modal_show( + ui.modal( + "Marker(s): {} are incompatible with the CLASTR query." + .format(str(marker_list)[1:-1]), + ui.tags.br(), + ui.tags.br(), + "The marker(s) will not be used in the query.", + ui.tags.br(), + ui.tags.br(), + "See: ", ui.tags.a('CLASTR', href=str("https://www.cellosaurus.org/str-search/"), target="_blank"), + " for a complete list of compatible marker names", + title="Inompatible CLASTR Markers", + easy_close=True, + footer=ui.modal_button('Understood') + ) + ) + + # App Generation ### def create_app(db=None): @@ -236,33 +255,61 @@ def create_app(db=None): {"id": "batch_sidebar"}, ui.tags.h3("Options"), ui.tags.hr(), + ui.input_select( + "search_type_batch", + "Search Type", + ["STRprofiler Database", "Cellosaurus Database (CLASTR)"], + width="100%" + ), ui.card( ui.input_switch( "score_amel_batch", "Score Amelogenin", value=False ), - ui.input_numeric( - "mix_threshold_batch", - "'Mixed' Sample Threshold", - value=3, - width="100%", - ), - ui.input_numeric( - "tan_threshold_batch", - "Tanabe Filter Threshold", - value=80, - width="100%", - ), - ui.input_numeric( - "mas_q_threshold_batch", - "Masters (vs. query) Filter Threshold", - value=80, - width="100%", + ui.panel_conditional( + "input.search_type_batch === 'STRprofiler Database'", + ui.input_numeric( + "mix_threshold_batch", + "'Mixed' Sample Threshold", + value=3, + width="100%", + ), + ui.input_numeric( + "tan_threshold_batch", + "Tanabe Filter Threshold", + value=80, + width="100%", + ), + ui.input_numeric( + "mas_q_threshold_batch", + "Masters (vs. query) Filter Threshold", + value=80, + width="100%", + ), + ui.input_numeric( + "mas_r_threshold_batch", + "Masters (vs. reference) Filter Threshold", + value=80, + width="100%", + ), ), - ui.input_numeric( - "mas_r_threshold_batch", - "Masters (vs. reference) Filter Threshold", - value=80, - width="100%", + ui.panel_conditional( + "input.search_type_batch === 'Cellosaurus Database (CLASTR)'", + ui.input_select( + "batch_query_filter", + "Similarity Score Filter", + choices=[ + "Tanabe", + "Masters Query", + "Masters Reference", + ], + width="100%", + ), + ui.input_numeric( + "batch_query_filter_threshold", + "Similarity Score Filter Threshold", + value=80, + width="100%", + ), ), ), ui.input_file( @@ -272,12 +319,6 @@ def create_app(db=None): multiple=False, width="100%", ), - ui.input_select( - "search_type_batch", - "Search Type", - ["STRprofiler Database", "Cellosaurus Database (CLASTR)"], - width="100%" - ), ui.input_action_button( "csv_query", "CSV Query", @@ -645,23 +686,11 @@ def loaded_example_text(): input.query_filter_threshold(), ) elif input.search_type() == 'Cellosaurus Database (CLASTR)': + malformed_markers = _valid_marker_check(query.keys()) if malformed_markers: - notify_m = ui.modal( - "Markers: {} are incompatible with the CLASTR query." - .format(str(malformed_markers)[1:-1]), - ui.tags.br(), - ui.tags.br(), - "These markers will not be used in the query.", - ui.tags.br(), - ui.tags.br(), - "See: ", ui.tags.a('CLASTR', href=str("https://www.cellosaurus.org/str-search/"), target="_blank"), - " for a complete list of compatible marker names", - title="Inompatible CLASTR Markers", - easy_close=True, - footer=ui.modal_button('Understood') - ) - ui.modal_show(notify_m) + notify_modal(malformed_markers) + results = _clastr_query( query, input.query_filter(), @@ -741,7 +770,6 @@ def download(): @render.data_frame def out_batch_df(): output_df.set(batch_query_results()) - print(output_df) with reactive.isolate(): if input.search_type_batch() == 'STRprofiler Database': try: @@ -842,13 +870,17 @@ def batch_query_results(): ) elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)': clastr_query = [(lambda d: d.update(description=key) or d)(val) for (key, val) in query_df.items()] + + malformed_markers = _valid_marker_check(query_df[next(iter(query_df))].keys()) + if malformed_markers: + notify_modal(malformed_markers) + results = _clastr_batch_query( clastr_query, - input.query_filter(), + input.batch_query_filter(), input.score_amel_batch(), - input.query_filter_threshold() + input.batch_query_filter_threshold() ) - # TO DO: Change to a batch filter option set. return results # File input loading From 3d5d412c49ed17cd6dd24a5676a0550796f7754c Mon Sep 17 00:00:00 2001 From: MikeWLloyd Date: Mon, 20 May 2024 15:27:02 -0400 Subject: [PATCH 11/29] global clastr function --- pyproject.toml | 3 +- strprofiler/clastr.py | 237 ++++++++++++++++++++++++++++ strprofiler/shiny_app/clastr_api.py | 50 +----- strprofiler/shiny_app/shiny_app.py | 28 ++-- strprofiler/utils.py | 54 ++++++- tests/Example_clastr_input.csv | 4 + 6 files changed, 312 insertions(+), 64 deletions(-) create mode 100644 strprofiler/clastr.py create mode 100644 tests/Example_clastr_input.csv diff --git a/pyproject.toml b/pyproject.toml index 834af42..1c596cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ pandas = "^2.2" rich-click = "^1.5.2" numpy = "^1.26.3" openpyxl = "^3.0.10" -shiny = "^0.8.0" +shiny = "^0.9.0" shinyswatch = "^0.4.2" Jinja2 = "^3.1.3" faicons = "^0.2.2" @@ -25,6 +25,7 @@ flatten-json = "^0.1.14" [tool.poetry.scripts] strprofiler = 'strprofiler.strprofiler:strprofiler' +clastr = 'strprofiler.clastr:clastr_batch_post_request' strprofiler-app = 'strprofiler.strprofiler:local_shiny_app' [build-system] diff --git a/strprofiler/clastr.py b/strprofiler/clastr.py new file mode 100644 index 0000000..6365d2f --- /dev/null +++ b/strprofiler/clastr.py @@ -0,0 +1,237 @@ +import rich_click as click +from pathlib import Path +from datetime import datetime +import sys +import pandas as pd +import requests +import json +import strprofiler.utils as utils + + +@click.command() +@click.option( + "-sa", + "--search_algorithm", + default=1, + help="""Search algorithm to use in the Clastr query. + 1 - Tanabe, 2 - Masters (vs. query); 3 - Masters (vs. reference)""", + show_default=True, + type=int, +) +@click.option( + "-sm", + "--scoring_mode", + default=1, + help="""Search mode to account for missing alleles in query or reference. + 1 - Non-empty markers, 2 - Query markers, 3 - Reference markers.""", + show_default=True, + type=int, +) +@click.option( + "-sf", + "--score_filter", + default=80, + help="Minimum score to report as potential matches in summary table.", + show_default=True, + type=int, +) +@click.option( + "-mr", + "--max_results", + default=200, + help="Filter defining the maximum number of results to be returned.", + show_default=True, + type=int, +) +@click.option( + "-mm", + "--min_markers", + default=8, + help="Filter defining the minimum number of markers for matches to be reported.", + show_default=True, + type=int, +) +@click.option( + "-sm", + "--sample_map", + help="""Path to sample map in csv format for renaming. + First column should be sample names as given in STR file(s), + second should be new names to assign. No header.""", + type=click.Path(), +) +@click.option( + "-scol", + "--sample_col", + help="Name of sample column in STR file(s).", + default="Sample", + show_default=True, + type=str, +) +@click.option( + "-mcol", + "--marker_col", + help="""Name of marker column in STR file(s). + Only used if format is 'wide'.""", + default="Marker", + show_default=True, + type=str, +) +@click.option( + "-pfix", + "--penta_fix", + help="""Whether to try to harmonize PentaE/D allele spelling.""", + default=True, + show_default=True, + type=bool, +) +@click.option( + "-amel", + "--score_amel", + help="""Use Amelogenin for similarity scoring.""", + default=False, + show_default=True, + type=bool, +) +@click.option( + "-o", + "--output_dir", + default="./STRprofiler", + help="Path to the output directory.", + show_default=True, + type=click.Path(), +) +@click.argument("input_files", required=True, type=click.Path(exists=True), nargs=-1) +@click.version_option() +def clastr_batch_post_request( + input_files, + sample_map=None, + output_dir="./STRprofiler", + search_algorithm=1, + scoring_mode=1, + score_filter=80, + max_results=200, + min_markers=8, + sample_col="Sample Name", + marker_col="Marker", + penta_fix=True, + score_amel=False, +): + """CLASTR_Query compares STR profiles to the human Cellosaurus knowledge base using the CLASTR REST API.. + + :param input_files: List of input STR files in csv, xlsx, tsv, or txt format. + :type input_files: click.Path + + :param sample_map: Path to sample map in csv format for renaming. + First column should be sample names as given in STR file(s), + second should be new names to assign. No header., defaults to None + :type sample_map: str, optional + + :param output_dir: Path to output directory, defaults to "./STRprofiler" + :type output_dir: str, optional + + :param search_algorithm: Search algorithm to use in the Clastr query, Options: 1 - Tanabe, 2 - Masters (vs. query); 3 - Masters (vs. reference) + defaults to 1 (tanabe). + :type search_algorithm: int + + :param scoring_mode: Search mode to account for missing alleles in query or reference. + Options: 1 - Non-empty markers, 2 - Query markers, 3 - Reference markers. + defaults to 1 ( Non-empty markers). + :type search_algorithm: int + + :param score_filter: Minimum score to report as potential matches in summary table, defaults to 80 + :type score_filter: int, optional + + :param max_results: Filter defining the maximum number of results to be returned. + Note that in the case of conflicted cell lines, the Best and Worst versions are processed as pairs and only the best + score is affected by the threshold. Consequently, some Worst cases with a score below the threshold can still be present in the results. + defaults to 200 + :type mix_threshold: int, optional + + :param min_markers: Filter defining the minimum number of markers for matches to be reported, defaults to 8. + :type mix_threshold: int, optional + + :param sample_col: Name of sample column in STR file(s), defaults to "Sample Name" + :type sample_col: str, optional + + :param marker_col: Name of marker column in STR file(s). + Only used if format is 'wide', defaults to "Marker" + :type marker_col: str, optional + + :param penta_fix: Whether to try to harmonize PentaE/D allele spelling, defaults to True + :type penta_fix: bool, optional + + :param score_amel: Use Amelogenin for similarity scoring, defaults to False + :type score_amel: bool, optional + """ + + # Make output directory and open file for logging. + Path(output_dir).mkdir(parents=True, exist_ok=True) + now = datetime.now() + dt_string = now.strftime("%Y%m%d.%H_%M_%S") + log_file = open(Path(output_dir, "strprofiler.clastrQuery." + dt_string + ".log"), "w") + + print("Search algorithm: " + str(search_algorithm), file=log_file) + print("Scoring mode: " + str(scoring_mode), file=log_file) + print("Score filter: " + str(marker_col), file=log_file) + print("Max results: " + str(max_results), file=log_file) + print("Min markers: " + str(min_markers), file=log_file) + print("Sample map: " + str(sample_map), file=log_file) + print("Sample column: " + str(sample_col), file=log_file) + print("Marker column: " + str(marker_col), file=log_file) + print("Penta fix: " + str(penta_fix), file=log_file) + print("Use amelogenin for scoring: " + str(score_amel) + "\n", file=log_file) + print("Full command:", file=log_file) + + print(" ".join(sys.argv) + "\n", file=log_file) + + # Check for sample map. + if sample_map is not None: + sample_map = pd.read_csv(sample_map, header=None, encoding="unicode_escape") + + # Data ingress. + query = utils.str_ingress( + paths=input_files, + sample_col=sample_col, + marker_col=marker_col, + sample_map=sample_map, + penta_fix=penta_fix, + ).to_dict(orient="index") + + clastr_query = [(lambda d: d.update(description=key) or d)(val) for (key, val) in query.items()] + + malformed_markers = utils.validate_api_markers(next(iter(clastr_query)).keys()) + if malformed_markers: + print("Marker(s): {} are incompatible with the CLASTR query. The marker(s) will not be used in the query." + .format(str(malformed_markers)[1:-1]), file=log_file) + print("See: https://www.cellosaurus.org/str-search/ for a complete list of compatible marker names", file=log_file) + + url = "https://www.cellosaurus.org/str-search/api/batch/" + + clastr_query = [utils._pentafix(item, reverse=True) for item in clastr_query] + clastr_query = [dict(item, **{'algorithm': search_algorithm}) for item in clastr_query] + clastr_query = [dict(item, **{'scoringMode': scoring_mode}) for item in clastr_query] + clastr_query = [dict(item, **{'scoreFilter': score_filter}) for item in clastr_query] + clastr_query = [dict(item, **{'includeAmelogenin': score_amel}) for item in clastr_query] + clastr_query = [dict(item, **{'minMarkers': min_markers}) for item in clastr_query] + clastr_query = [dict(item, **{'maxResults': max_results}) for item in clastr_query] + clastr_query = [dict(item, **{'outputFormat': 'xlsx'}) for item in clastr_query] + + print("Querying CLASTR API at: ", url, file=log_file) + r = requests.post(url, data=json.dumps(clastr_query)) + + try: + r.raise_for_status() + except requests.exceptions.HTTPError as e: + print("Request failed with error: '", e, "'", file=log_file) + print("Request failed with error: '", e, "'") + return '' + + print("Response from query: ", r.status_code, file=log_file) + + with open(Path(output_dir, "strprofiler.clastrQueryResult." + dt_string + ".xlsx"), "wb") as fd: + for chunk in r.iter_content(chunk_size=128): + fd.write(chunk) + + print("Results saved: ", Path(output_dir, "strprofiler.clastrQueryResult." + dt_string + ".xlsx"), file=log_file) + + log_file.close() diff --git a/strprofiler/shiny_app/clastr_api.py b/strprofiler/shiny_app/clastr_api.py index fd0e9e9..33047b2 100644 --- a/strprofiler/shiny_app/clastr_api.py +++ b/strprofiler/shiny_app/clastr_api.py @@ -2,53 +2,7 @@ import json import pandas as pd from flatten_json import flatten -from strprofiler.utils import _pentafix - - -def _valid_marker_check(markers): - - valid_api_markers = ['Amelogenin', - 'CSF1PO', - 'D2S1338', - 'D3S1358', - 'D5S818', - 'D7S820', - 'D8S1179', - 'D13S317', - 'D16S539', - 'D18S51', - 'D19S433', - 'D21S11', - 'FGA', - 'Penta D', - 'Penta E', - 'PentaD', - 'PentaE', - 'TH01', - 'TPOX', - 'vWA', - 'D1S1656', - 'D2S441', - 'D6S1043', - 'D10S1248', - 'D12S391', - 'D22S1045', - 'DXS101', - 'DYS391', - 'F13A01', - 'F13B', - 'FESFPS', - 'LPL', - 'Penta C', - 'PentaC', - 'SE33'] - - # remove extra fields, if present as keys may come from _clastr_query or other. - query_markers = [marker for marker in markers if marker not in ['algorithm', 'includeAmelogenin', 'scoreFilter', 'description']] - - missing_markers = list(set(query_markers) - set(valid_api_markers)) - - return missing_markers +from strprofiler.utils import _pentafix, validate_api_markers def _clastr_query(query, query_filter, include_amelogenin, score_filter): @@ -240,7 +194,7 @@ def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter): # "vWA": "17,19" # } - malformed_markers = _valid_marker_check(data.keys()) + malformed_markers = validate_api_markers(data.keys()) print(malformed_markers) diff --git a/strprofiler/shiny_app/shiny_app.py b/strprofiler/shiny_app/shiny_app.py index 10892d9..3d5c145 100644 --- a/strprofiler/shiny_app/shiny_app.py +++ b/strprofiler/shiny_app/shiny_app.py @@ -4,9 +4,9 @@ import pandas as pd from faicons import icon_svg -import strprofiler.utils as sp +import strprofiler.utils as utils from strprofiler.shiny_app.calc_functions import _single_query, _batch_query, _file_query -from strprofiler.shiny_app.clastr_api import _valid_marker_check, _clastr_query, _clastr_batch_query +from strprofiler.shiny_app.clastr_api import _clastr_query, _clastr_batch_query from datetime import date import time @@ -27,7 +27,7 @@ def database_load(file): Exception: If the file fails to load or if sample ID names are duplicated. """ try: - str_database = sp.str_ingress( + str_database = utils.str_ingress( [file], # expects list sample_col="Sample", marker_col="Marker", @@ -211,7 +211,7 @@ def create_app(db=None): width="90%" ), ui.tooltip( - ui.input_action_button( + ui.input_task_button( "search", "Search", class_="btn-success", @@ -238,6 +238,7 @@ def create_app(db=None): ui.column(3, ui.tags.h3("Results")), ui.column(1, ui.p("")), ), + # TO DO: Try loading/thinking spinners. ui.column( 12, {"id": "res_card"}, @@ -319,7 +320,7 @@ def create_app(db=None): multiple=False, width="100%", ), - ui.input_action_button( + ui.input_task_button( "csv_query", "CSV Query", class_="btn-primary", @@ -673,7 +674,7 @@ def loaded_example_text(): where="afterEnd", ) res_click.set(1) - thinking = ui.notification_show("Message: API Query Running.", duration=None) + # isolate input.search_type to prevent trigger when options change. with reactive.isolate(): if input.search_type() == 'STRprofiler Database': @@ -687,7 +688,7 @@ def loaded_example_text(): ) elif input.search_type() == 'Cellosaurus Database (CLASTR)': - malformed_markers = _valid_marker_check(query.keys()) + malformed_markers = utils.validate_api_markers(query.keys()) if malformed_markers: notify_modal(malformed_markers) @@ -697,7 +698,8 @@ def loaded_example_text(): input.score_amel_query(), input.query_filter_threshold() ) - ui.notification_remove(thinking) + # TO DO: Does this need to be async? + return results @output @@ -805,7 +807,7 @@ def batch_query_results(): ui.remove_ui("#inserted-downloader2") return pd.DataFrame({"": []}) try: - query_df = sp.str_ingress( + query_df = utils.str_ingress( [file[0]["datapath"]], sample_col="Sample", marker_col="Marker", @@ -850,6 +852,7 @@ def batch_query_results(): {"id": "inserted-downloader2"}, ui.download_button( "download2", "Download XLSX", width="25%", class_="btn-primary" + # TO DO: Adjust spacing on 'results' section. XLSX button is too far down. ), ), selector="#res_card_batch", @@ -870,8 +873,7 @@ def batch_query_results(): ) elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)': clastr_query = [(lambda d: d.update(description=key) or d)(val) for (key, val) in query_df.items()] - - malformed_markers = _valid_marker_check(query_df[next(iter(query_df))].keys()) + malformed_markers = utils.validate_api_markers(query_df[next(iter(query_df))].keys()) if malformed_markers: notify_modal(malformed_markers) @@ -881,6 +883,8 @@ def batch_query_results(): input.score_amel_batch(), input.batch_query_filter_threshold() ) + # TO DO: Does this need to be async? + return results # File input loading @@ -935,7 +939,7 @@ def file_query_results(): if file is None: ui.remove_ui("#inserted-downloader3") return pd.DataFrame({"": []}) - query_df = sp.str_ingress( + query_df = utils.str_ingress( [file[0]["datapath"]], sample_col="Sample", marker_col="Marker", diff --git a/strprofiler/utils.py b/strprofiler/utils.py index d3e533d..b137a50 100644 --- a/strprofiler/utils.py +++ b/strprofiler/utils.py @@ -35,14 +35,14 @@ def _clean_element(x): return ",".join(sorted_elements) -def _pentafix(samps_dict, reverse = False): +def _pentafix(samps_dict, reverse=False): """Takes a dictionary of alleles and returns a dictionary with common Penta markers renamed for consistency.""" if not reverse: if "Penta C" in samps_dict.keys(): samps_dict["PentaC"] = samps_dict.pop("Penta C") elif "Penta_C" in samps_dict.keys(): samps_dict["PentaC"] = samps_dict.pop("Penta_C") - + if "Penta D" in samps_dict.keys(): samps_dict["PentaD"] = samps_dict.pop("Penta D") elif "Penta_D" in samps_dict.keys(): @@ -140,7 +140,8 @@ def _make_html(dataframe: pd.DataFrame):
{table_html}
- +