Skip to content

CLASTR WIP #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8a9c4f1
Merge pull request #14 from j-andrews7/dev
j-andrews7 Apr 17, 2024
5961969
clastr api proof of concept
MikeWLloyd May 1, 2024
9916ad9
query row added, UI adjustment
MikeWLloyd May 2, 2024
9bba28f
tooltip added, help updated, req for deploy updated
MikeWLloyd May 2, 2024
1cd2ef4
add window title
MikeWLloyd May 3, 2024
8645320
clastr batch method rough in
MikeWLloyd May 9, 2024
1bd91a9
add requirements, bump version
j-andrews7 May 14, 2024
579a1cf
fix for #26
MikeWLloyd May 16, 2024
77529be
additional tweaks for #26
j-andrews7 May 16, 2024
771c133
add marker check for single query
MikeWLloyd May 16, 2024
815036f
conditional batch options. modal notice for malformed markers.
MikeWLloyd May 16, 2024
3d5d412
global clastr function
MikeWLloyd May 20, 2024
2696ce9
clastr unit test
MikeWLloyd May 20, 2024
3191368
catch non-int thresholds
MikeWLloyd May 21, 2024
27f04c2
fix for #25, docstrings added
MikeWLloyd May 24, 2024
8e1a338
NoneType catch
MikeWLloyd May 24, 2024
e5e4a30
doc updates
MikeWLloyd May 24, 2024
26c2883
Fix #28
j-andrews7 May 28, 2024
38bdb08
remove debug print statement
MikeWLloyd May 28, 2024
4505bd6
linting, minor UI tweaks
j-andrews7 May 29, 2024
2f87d58
more linting
j-andrews7 May 29, 2024
30a3689
Help file typos & formatting
j-andrews7 May 29, 2024
158b2de
update lock file
j-andrews7 May 29, 2024
aed4430
Update CHANGELOG.md
j-andrews7 May 30, 2024
9dd839d
Remove old paper drafts and JOSS workflow
j-andrews7 May 30, 2024
4e940f7
add version, i hate scrolling
j-andrews7 May 30, 2024
c62eaba
doc updates
j-andrews7 May 30, 2024
fdeda54
format README
j-andrews7 May 30, 2024
8f0588d
add CLASTR reference
j-andrews7 May 30, 2024
c1d8fcf
Update README.md
j-andrews7 May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 59 additions & 3 deletions strprofiler/shiny_app/clastr_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from flatten_json import flatten


def clastr_query(query, query_filter, include_amelogenin, score_filter):
def _clastr_query(query, query_filter, include_amelogenin, score_filter):
url = "https://www.cellosaurus.org/str-search/api/query/"

dct = {k: [v] for k, v in query.items()}
Expand Down Expand Up @@ -65,7 +65,7 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter):

# Marker names are not consistant across results. MarkerName[1] != the same thing in all cases.
# We must track marker name by index by result.
# The same logic from above applies, split the compount column name string,
# The same logic from above applies, split the compound column name string,
# Melt on markerID, and then merge with concat allele made above.
# Finally, pivot into a table and rejoin to higher level results.
marker_names = df.filter(regex='^profiles_0_.*_name').T
Expand Down Expand Up @@ -105,6 +105,30 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter):
return query_added


def _clastr_batch_query(query, query_filter, include_amelogenin, score_filter):
url = "https://www.cellosaurus.org/str-search/api/batch/"

if query_filter == "Tanabe":
query = [dict(item, **{'algorithm': 1}) for item in query]
elif query_filter == "Masters Query":
query = [dict(item, **{'algorithm': 2}) for item in query]
elif query_filter == "Masters Reference":
query = [dict(item, **{'algorithm': 2}) for item in query]

query = [dict(item, **{'includeAmelogenin': include_amelogenin}) for item in query]
query = [dict(item, **{'scoreFilter': score_filter}) for item in query]
query = [dict(item, **{'outputFormat': 'xlsx'}) for item in query]

r = requests.post(url, data=json.dumps(query))

try:
r.raise_for_status()
except requests.exceptions.HTTPError as e:
return pd.DataFrame({"Error": [str(e)]})

return r


if __name__ == '__main__':
# url = "https://www.cellosaurus.org/str-search/api/query/%"
# Use above URL for 400 error
Expand Down Expand Up @@ -143,10 +167,42 @@ def clastr_query(query, query_filter, include_amelogenin, score_filter):
# "vWA": "16",
# }

r = clastr_query(data, 'Tanabe', False, 70)
r = _clastr_query(data, 'Tanabe', False, 70)

print(r)

batch_data = [{
"description": "Example 1",
"Amelogenin": "X",
"CSF1PO": "13,14",
"D5S818": "13",
"D7S820": "8",
"D13S317": "12",
"FGA": "24",
"TH01": "8",
"TPOX": "11",
"vWA": "16",
}, {
"description": "Example 2",
"Amelogenin": "X, Y",
"CSF1PO": "13",
"D5S818": "13, 14",
"D7S820": "8, 19",
"D13S317": "11, 12",
"FGA": "24",
"TH01": "8",
"TPOX": "11",
"vWA": "15",
"outputFormat": "xlsx"
}]

r = _clastr_batch_query(batch_data, 'Tanabe', False, 70)

with open('testing.xlsx', 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)


# JSON data structure:
# {
# "description": "",
Expand Down
142 changes: 95 additions & 47 deletions strprofiler/shiny_app/shiny_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import strprofiler.utils as sp
from strprofiler.shiny_app.calc_functions import _single_query, _batch_query, _file_query
from strprofiler.shiny_app.clastr_api import clastr_query
from strprofiler.shiny_app.clastr_api import _clastr_query, _clastr_batch_query

from datetime import date
import time
Expand Down Expand Up @@ -272,6 +272,12 @@ def create_app(db=None):
multiple=False,
width="100%",
),
ui.input_select(
"search_type_batch",
"Search Type",
["STRprofiler Database", "Cellosaurus Database (CLASTR)"],
width="100%"
),
ui.input_action_button(
"csv_query",
"CSV Query",
Expand Down Expand Up @@ -639,7 +645,7 @@ def loaded_example_text():
input.query_filter_threshold(),
)
elif input.search_type() == 'Cellosaurus Database (CLASTR)':
results = clastr_query(
results = _clastr_query(
query,
input.query_filter(),
input.score_amel_query(),
Expand Down Expand Up @@ -693,6 +699,7 @@ def out_result():
else:
out_df = pd.DataFrame({"No input provided.": []})
return out_df
# TO DO: Remove results table when changing query methods.

# Dealing with downloading results, when requested.
# Note that output_results() is a reactive Calc result.
Expand All @@ -718,26 +725,31 @@ def download():
@render.data_frame
def out_batch_df():
output_df.set(batch_query_results())
try:
return render.DataTable(output_df())
except Exception:
m = ui.modal(
ui.div(
{"style": "font-size: 18px"},
ui.HTML(
(
"There was a fatal error in the query.<br><br>"
"Ensure marker names match expectation, and that"
" no special characters (spaces, etc.) were used in sample names."
)
),
),
title="Batch Query Error",
easy_close=True,
footer=None,
)
ui.modal_show(m)
return render.DataTable(pd.DataFrame({"Failed Query. Fix Input File": []}))
print(output_df)
with reactive.isolate():
if input.search_type_batch() == 'STRprofiler Database':
try:
return render.DataTable(output_df())
except Exception:
m = ui.modal(
ui.div(
{"style": "font-size: 18px"},
ui.HTML(
(
"There was a fatal error in the query.<br><br>"
"Ensure marker names match expectation, and that"
" no special characters (spaces, etc.) were used in sample names."
)
),
),
title="Batch Query Error",
easy_close=True,
footer=None,
)
ui.modal_show(m)
return render.DataTable(pd.DataFrame({"Failed Query. Fix Input File": []}))
elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)':
return render.DataTable(pd.DataFrame({"CASTR Batch Query": ['Download Results']}))

# File input loading
@reactive.calc
Expand Down Expand Up @@ -776,39 +788,75 @@ def batch_query_results():
return pd.DataFrame({"Failed Query. Fix Input File": []})

if res_click_file() == 0:
ui.insert_ui(
ui.div(
{"id": "inserted-downloader2"},
ui.download_button(
"download2", "Download CSV", width="25%", class_="btn-primary"
if input.search_type_batch() == 'STRprofiler Database':
ui.insert_ui(
ui.div(
{"id": "inserted-downloader2"},
ui.download_button(
"download2", "Download CSV", width="25%", class_="btn-primary"
),
),
),
selector="#res_card_batch",
where="beforeEnd",
)
res_click_file.set(1)
return _batch_query(
query_df,
str_database(),
input.score_amel_batch(),
input.mix_threshold_batch(),
input.tan_threshold_batch(),
input.mas_q_threshold_batch(),
input.mas_r_threshold_batch(),
)
selector="#res_card_batch",
where="beforeEnd",
)
res_click_file.set(1)
elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)':
ui.insert_ui(
ui.div(
{"id": "inserted-downloader2"},
ui.download_button(
"download2", "Download XLSX", width="25%", class_="btn-primary"
),
),
selector="#res_card_batch",
where="beforeEnd",
)
res_click_file.set(1)

with reactive.isolate():
if input.search_type_batch() == 'STRprofiler Database':
results = _batch_query(
query_df,
str_database(),
input.score_amel_batch(),
input.mix_threshold_batch(),
input.tan_threshold_batch(),
input.mas_q_threshold_batch(),
input.mas_r_threshold_batch(),
)
elif input.search_type_batch() == 'Cellosaurus Database (CLASTR)':
clastr_query = [(lambda d: d.update(description=key) or d)(val) for (key, val) in query_df.items()]
results = _clastr_batch_query(
clastr_query,
input.query_filter(),
input.score_amel_batch(),
input.query_filter_threshold()
)
# TO DO: Change to a batch filter option set.
return results

# File input loading
@reactive.effect
@reactive.event(input.search_type_batch)
def _():
ui.remove_ui("#inserted-downloader2")
res_click_file.set(0)
# TO DO: Remove batch results table when changing methods.

# Dealing with dowloading results, when requested.
# Note that batch_query_results() is a reactive Calc result.
@render.download(
filename="STR_Batch_Results_"
+ date.today().isoformat()
+ "_"
+ time.strftime("%Hh-%Mm", time.localtime())
+ ".csv"
filename=lambda: "STR_Batch_Results_" + date.today().isoformat() + "_" + time.strftime("%Hh-%Mm", time.localtime()) + ".csv"
if f"{input.search_type_batch()}" == 'STRprofiler Database'
else "STR_Batch_Results_" + date.today().isoformat() + "_" + time.strftime("%Hh-%Mm", time.localtime()) + ".xlsx"
)
def download2():
if batch_query_results() is not None:
yield batch_query_results().to_csv(index=False)
if input.search_type_batch() == 'STRprofiler Database':
yield batch_query_results().to_csv(index=False)
if input.search_type_batch() == 'Cellosaurus Database (CLASTR)':
for chunk in batch_query_results().iter_content(chunk_size=128):
yield chunk

# Dealing with passing example file to user.
@render.download()
Expand Down
Loading