Skip to content

Commit f5feafd

Browse files
committed
Release 0.2.4
1 parent b87c75f commit f5feafd

File tree

13 files changed

+152
-45
lines changed

13 files changed

+152
-45
lines changed

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
cff-version: "1.1.0"
22
message: "If you use this software, please cite it using these metadata."
33
title: ElasticBLAST
4-
version: "0.2.3"
5-
date-released: 2022-02-16
4+
version: "0.2.4"
5+
date-released: 2022-03-04
66
license: "NCBI Public Domain"
77
repository-code: "https://github.com/ncbi/elastic-blast/"
88
authors:

bin/results2clustername.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
# results2clustername.sh: Script to convert ElasticBLAST results to the default
3+
# cluster name
4+
#
5+
# Author: Christiam Camacho (camacho@ncbi.nlm.nih.gov)
6+
# Created: Thu 08 Apr 2021 04:07:29 PM EDT
7+
8+
if [ $# -ne 1 ] ; then
9+
echo "Usage: $0 <ElasticBLAST results path>"
10+
exit 1
11+
fi
12+
elb_results=$1
13+
md5=md5sum
14+
command -v $md5 >& /dev/null || md5=md5
15+
results_hash=$(printf $elb_results | $md5 | cut -b-9)
16+
echo elasticblast-$USER-$results_hash

docker-blast/Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ aws-build-from-local-sources: setup.cfg_cloud
8585
gcloud builds submit --config=awscloudbuild.yaml --substitutions=TAG_NAME="${VERSION}",_IMG="${AWS_IMG}",_SERVER="${AWS_SERVER}",_AWS_ECR_PASSWD="`aws ecr-public get-login-password --region ${AWS_REGION}`",_DOCKERFILE='Dockerfile-build-from-local-sources' .
8686
$(MAKE) clean-local-sources
8787

88+
.PHONY: gcp-check
89+
gcp-check:
90+
gcloud builds submit --config test-docker-image-gcp.yaml --substitutions _TAG=$(VERSION),_IMG=${IMG}
91+
92+
.PHONY: aws-check
93+
aws-check:
94+
gcloud builds submit --config test-docker-image-aws.yaml --substitutions _IMG="${AWS_IMG}:${VERSION}"
8895

8996
.PHONY: clean
9097
clean:
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
steps:
2+
- name: '${_IMG}'
3+
args: ['update_blastdb.pl', '--version']
4+
- name: '${_IMG}'
5+
args: ['blastn', '-version-full']
6+
- name: '${_IMG}'
7+
args: ['blastdb_path', '-version-full']
8+
- name: '${_IMG}'
9+
args: ['which', 'vmtouch']
10+
- name: '${_IMG}'
11+
args: ['aws', '--version']
12+
- name: '${_IMG}'
13+
args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/latest-dir']
14+
- name: '${_IMG}'
15+
args: ['gcloud', '--version']
16+
- name: '${_IMG}'
17+
args: ['printenv', 'BLASTDB', 'PATH']
18+
- name: '${_IMG}'
19+
args: ['fasta-split', '--help']
20+
- name: '${_IMG}'
21+
args: ['splitq_download_db_search', '--version']
22+
- name: '${_IMG}'
23+
args: ['splitq_download_db_search', '--help']
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
steps:
2+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
3+
args: ['update_blastdb.pl', '--version']
4+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
5+
args: ['blastn', '-version-full']
6+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
7+
args: ['blastdb_path', '-version-full']
8+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
9+
args: ['which', 'vmtouch']
10+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
11+
args: ['aws', '--version']
12+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
13+
args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/latest-dir']
14+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
15+
args: ['gcloud', '--version']
16+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
17+
args: ['printenv', 'BLASTDB', 'PATH']
18+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
19+
args: ['fasta-split', '--help']
20+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
21+
args: ['splitq_download_db_search', '--version']
22+
- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}'
23+
args: ['splitq_download_db_search', '--help']

docker-job-submit/Makefile

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ SHELL=/bin/bash
2828
.PHONY: all pre-check check clean build publish gcp-build gcp-check gcp-clean
2929

3030
IMG?=ncbi/elasticblast-job-submit
31-
VERSION?=1.2.1
31+
VERSION?=2.0.0
3232
GCP_PROJECT?=$(shell gcloud config get-value project 2>/dev/null)
3333
GCP_TEST_BUCKET?=gs://elasticblast-test/cloud-job-submission
3434
AWS_REGION?=us-east-1
@@ -76,24 +76,20 @@ aws-build-from-local-sources:
7676
-gcloud builds submit --config awscloudbuild.yaml --substitutions _SERVER=${AWS_SERVER},TAG_NAME=${VERSION},_IMG=${AWS_IMG},_DOCKERFILE=Dockerfile-build-from-local-sources.aws,_AWS_ECR_PASSWD="`aws ecr-public get-login-password --region ${AWS_REGION}`"
7777
rm -fr src bin requirements setup.cfg_cloud setup.py
7878

79-
80-
.PHONY: gcp-test
81-
gcp-test:
82-
gcloud builds submit --timeout=120 --config cloudrun.yaml --substitutions _IMG="gcr.io/${GCP_PROJECT}/${IMG}:${VERSION}",_ELB_GCP_PROJECT="${ELB_GCP_PROJECT}",_ELB_GCP_ZONE="${ELB_GCP_ZONE}",_ELB_RESULTS="${ELB_RESULTS}",_ELB_CLUSTER_NAME="${ELB_CLUSTER_NAME}"
83-
84-
.PHONY: aws-test
85-
aws-test:
79+
.PHONY: aws-check
80+
aws-check:
8681
gcloud builds submit --config awscloudrun.yaml --substitutions _IMG="${AWS_IMG}:${VERSION}",_ELB_RESULTS="${ELB_RESULTS}",_ELB_CLUSTER_NAME="${ELB_CLUSTER_NAME}"
8782

8883
gcp-clean:
8984
-gcloud container images delete gcr.io/${GCP_PROJECT}/${IMG}:${VERSION}
9085

86+
.PHONY: gcp-check
9187
gcp-check:
9288
-gcloud container images list --repository=gcr.io/${GCP_PROJECT}/${IMG}
9389
-gcloud container images list-tags gcr.io/${GCP_PROJECT}/${IMG}
9490
-gcloud container images describe gcr.io/${GCP_PROJECT}/${IMG}:latest
9591
-gcloud container images describe gcr.io/${GCP_PROJECT}/${IMG}:${VERSION}
96-
#gcloud builds submit --config test-cloudbuild.yaml --substitutions _TAG=$(VERSION),_IMG=${IMG}
92+
gcloud builds submit --timeout=120 --config cloudrun.yaml --substitutions _IMG="gcr.io/${GCP_PROJECT}/${IMG}:${VERSION}",_ELB_GCP_PROJECT="${ELB_GCP_PROJECT}",_ELB_GCP_ZONE="${ELB_GCP_ZONE}",_ELB_RESULTS="${ELB_RESULTS}",_ELB_CLUSTER_NAME="${ELB_CLUSTER_NAME}"
9793

9894
gcp-list-tagless-images:
9995
gcloud container images list-tags gcr.io/${GCP_PROJECT}/${IMG} \

requirements/base.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ setuptools
33
importlib-resources
44
importlib-metadata
55
pex
6-
boto3==1.20.31
7-
botocore==1.23.33
6+
boto3==1.21.10
7+
botocore==1.24.10
88
awslimitchecker
99
tenacity
1010
dataclasses_json

src/elastic_blast/aws.py

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@
4444
import boto3 # type: ignore
4545
from botocore.exceptions import ClientError, NoCredentialsError, ParamValidationError, WaiterError # type: ignore
4646

47+
from dataclasses_json import dataclass_json
48+
from dataclasses import dataclass, field
49+
from copy import deepcopy
50+
4751
from .util import convert_labels_to_aws_tags, convert_disk_size_to_gb
4852
from .util import convert_memory_to_mb, UserReportError
4953
from .util import ElbSupportedPrograms, get_usage_reporting, sanitize_aws_batch_job_name
@@ -110,6 +114,37 @@ def check_cluster(cfg: ElasticBlastConfig) -> bool:
110114
return False
111115

112116

117+
@dataclass_json
118+
@dataclass
119+
class JobIds:
120+
"""Serializable store of AWS Batch job ids for query splitting, cloud job
121+
submission and BLAST searches"""
122+
query_splitting: str = ''
123+
job_submission: str = ''
124+
search: List[str] = field(default_factory=list)
125+
126+
def __bool__(self):
127+
"""Boolean value of the object: True if at least one job id is set"""
128+
return bool(self.query_splitting) or bool(self.job_submission) or bool(self.search)
129+
130+
def merge(self, obj):
131+
"""Merge another JobIds object into self"""
132+
if not self.query_splitting and obj.query_splitting:
133+
self.query_splitting = obj.query_splitting
134+
if not self.job_submission and obj.job_submission:
135+
self.job_submission = obj.job_submission
136+
self.search = list(set(self.search + obj.search))
137+
138+
def to_list(self) -> List[str]:
139+
"""Return all jobs ids as a list"""
140+
id_list = [job for job in self.search]
141+
if self.query_splitting:
142+
id_list.append(self.query_splitting)
143+
if self.job_submission:
144+
id_list.append(self.job_submission)
145+
return id_list
146+
147+
113148
class ElasticBlastAws(ElasticBlast):
114149
""" Implementation of core ElasticBLAST functionality in AWS.
115150
Uses a CloudFormation template and AWS Batch for its main operation.
@@ -146,8 +181,7 @@ def _init(self, cfg: ElasticBlastConfig, create: bool):
146181
self.subnets = None
147182
self._provide_subnets()
148183
self.cf_stack = None
149-
self.job_ids : List[str] = []
150-
self.qs_job_id = None
184+
self.job_ids = JobIds()
151185

152186
initialized = True
153187

@@ -639,7 +673,7 @@ def cloud_query_split(self, query_files: List[str]) -> None:
639673
jobName=jname,
640674
parameters=parameters,
641675
containerOverrides=overrides)
642-
self.qs_job_id = job['jobId']
676+
self.job_ids.query_splitting = job['jobId']
643677
logging.info(f"Submitted AWS Batch job {job['jobId']} to split query {query_files[0]}")
644678
self.upload_job_ids()
645679
else:
@@ -653,15 +687,15 @@ def wait_for_cloud_query_split(self) -> None:
653687
"""
654688
if self.dry_run:
655689
return
656-
if not self.qs_job_id:
690+
if not self.job_ids.query_splitting:
657691
msg = 'Query splitting job was not submitted!'
658692
logging.fatal(msg)
659693
raise RuntimeError(msg)
660694

661695
while True:
662-
job_batch = self.batch.describe_jobs(jobs=[self.qs_job_id])['jobs']
696+
job_batch = self.batch.describe_jobs(jobs=[self.job_ids.query_splitting])['jobs']
663697
job_status = job_batch[0]['status']
664-
logging.debug(f'Query splitting job status {job_status} for {self.qs_job_id}')
698+
logging.debug(f'Query splitting job status {job_status} for {self.job_ids.query_splitting}')
665699
if job_status == 'SUCCEEDED':
666700
break
667701
if job_status == 'FAILED':
@@ -674,7 +708,7 @@ def wait_for_cloud_query_split(self) -> None:
674708
for k in ['exitCode', 'reason']:
675709
if k in container:
676710
failure_details += f'Container{k[0].upper()+k[1:]}: {container[k]} '
677-
msg = f'Query splitting on the cloud failed (jobId={self.qs_job_id})'
711+
msg = f'Query splitting on the cloud failed (jobId={self.job_ids.query_splitting})'
678712
if failure_details: msg += failure_details
679713
logging.fatal(msg)
680714
raise UserReportError(CLUSTER_ERROR, msg)
@@ -736,11 +770,11 @@ def _cloud_submit(self) -> None:
736770
"parameters": parameters,
737771
"containerOverrides": overrides
738772
}
739-
if self.qs_job_id:
740-
submit_job_args["dependsOn"] = [{'jobId': self.qs_job_id}]
773+
if self.job_ids.query_splitting:
774+
submit_job_args["dependsOn"] = [{'jobId': self.job_ids.query_splitting}]
741775
job = self.batch.submit_job(**submit_job_args)
742776
logging.info(f'Submitted AWS Batch job {job["jobId"]} to submit search jobs')
743-
self.job_ids.append(job['jobId'])
777+
self.job_ids.job_submission = job['jobId']
744778
self.upload_job_ids()
745779

746780

@@ -751,8 +785,6 @@ def client_submit(self, query_batches: List[str], one_stage_cloud_query_split: b
751785
query_batches - list of bucket names of queries to submit
752786
one_stage_cloud_query_split - do the query split in the cloud as a part
753787
of executing a regular job """
754-
self.job_ids = []
755-
756788
prog = self.cfg.blast.program
757789

758790
if self.cfg.cluster.db_source != DBSource.AWS:
@@ -829,10 +861,10 @@ def is_int(value: str):
829861
"parameters": parameters,
830862
"containerOverrides": overrides
831863
}
832-
if self.qs_job_id:
833-
submit_job_args["dependsOn"] = [{'jobId': self.qs_job_id}]
864+
if self.job_ids.query_splitting:
865+
submit_job_args["dependsOn"] = [{'jobId': self.job_ids.query_splitting}]
834866
job = self.batch.submit_job(**submit_job_args)
835-
self.job_ids.append(job['jobId'])
867+
self.job_ids.search.append(job['jobId'])
836868
logging.debug(f"Job definition parameters for job {job['jobId']} {parameters}")
837869
logging.info(f"Submitted AWS Batch job {job['jobId']} with query {q}")
838870
else:
@@ -873,15 +905,23 @@ def get_job_ids(self) -> List[str]:
873905
def upload_job_ids(self) -> None:
874906
"""Save AWS Batch job ids in a metadata file in S3, if the metadata
875907
file is present, append job ids"""
908+
current_job_ids = deepcopy(self.job_ids)
876909
self._load_job_ids_from_aws()
910+
current_job_ids.merge(self.job_ids)
911+
self.job_ids = current_job_ids
912+
877913
bucket_name, key = parse_bucket_name_key(f'{self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_JOB_IDS}')
878914
bucket = self.s3.Bucket(bucket_name)
879-
job_ids = self.job_ids
880-
if self.qs_job_id:
881-
job_ids.append(self.qs_job_id)
882-
job_ids = list(set(job_ids))
883-
bucket.put_object(Body=json.dumps(job_ids).encode(), Key=key)
884-
logging.debug(f'Uploaded {len(job_ids)} job IDs to {self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_JOB_IDS}')
915+
bucket.put_object(Body=self.job_ids.to_json().encode(), Key=key) # type: ignore
916+
logging.debug(f'Uploaded job IDs to {self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_JOB_IDS}')
917+
918+
# This code is needed for janitor backward compatibility in version
919+
# 0.2.4, and can be removed when the ElasticBLAST janitor is upgraded to version 0.2.4.
920+
ELB_AWS_OLD_JOB_IDS = 'job-ids.json'
921+
bucket_name, key = parse_bucket_name_key(f'{self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_OLD_JOB_IDS}')
922+
bucket = self.s3.Bucket(bucket_name)
923+
bucket.put_object(Body=json.dumps(self.job_ids.to_list()).encode(), Key=key)
924+
logging.debug(f'Uploaded job IDs to {self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_OLD_JOB_IDS}')
885925

886926

887927
def upload_query_length(self, query_length: int) -> None:
@@ -920,6 +960,8 @@ def check_status(self, extended=False) -> Tuple[ElbStatus, Dict[str, int], str]:
920960
elif njobs == 0:
921961
# This is likely the case when dry-run is set to True
922962
retval = ElbStatus.UNKNOWN
963+
elif (self.job_ids.query_splitting or self.job_ids.job_submission) and not self.job_ids.search:
964+
retval = ElbStatus.SUBMITTING
923965
elif running > 0 or pending > 0:
924966
retval = ElbStatus.RUNNING
925967
elif (pending + running + failed) == 0 and succeeded == njobs:
@@ -941,8 +983,8 @@ def _load_job_ids_from_aws(self):
941983
try:
942984
bucket.download_file(key, tmp.name)
943985
with open(tmp.name) as f_ids:
944-
self.job_ids += json.load(f_ids)
945-
self.job_ids = list(set(self.job_ids))
986+
new_job_ids = JobIds.from_json(f_ids.read())
987+
self.job_ids.merge(new_job_ids)
946988
except ClientError as err:
947989
err_code = err.response['Error']['Code']
948990
fnx_name = inspect.stack()[0].function
@@ -965,11 +1007,12 @@ def _check_status(self, extended) -> Tuple[Dict[str, int], str]:
9651007

9661008
if not self.job_ids:
9671009
self._load_job_ids_from_aws()
1010+
job_ids = self.job_ids.to_list()
9681011

9691012
# check status of jobs in batches of JOB_BATCH_NUM
9701013
JOB_BATCH_NUM = 100
971-
for i in range(0, len(self.job_ids), JOB_BATCH_NUM):
972-
job_batch = self.batch.describe_jobs(jobs=self.job_ids[i:i + JOB_BATCH_NUM])['jobs']
1014+
for i in range(0, len(job_ids), JOB_BATCH_NUM):
1015+
job_batch = self.batch.describe_jobs(jobs=job_ids[i:i + JOB_BATCH_NUM])['jobs']
9731016
# get number for AWS Batch job states
9741017
for st in AWS_BATCH_JOB_STATES:
9751018
counts[st] += sum([j['status'] == st for j in job_batch])

src/elastic_blast/commands/run_summary.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from botocore.exceptions import ClientError #type: ignore
4343
from elastic_blast.base import PositiveInteger
4444
from elastic_blast.aws_traits import create_aws_config
45-
from elastic_blast.aws import handle_aws_error
45+
from elastic_blast.aws import handle_aws_error, JobIds
4646
from elastic_blast.util import safe_exec
4747
from elastic_blast.filehelper import parse_bucket_name_key
4848
from elastic_blast.constants import ELB_AWS_JOB_IDS, ELB_QUERY_LENGTH, ELB_METADATA_DIR
@@ -494,7 +494,7 @@ def _read_job_logs_aws(cfg, write_logs):
494494
bucket, key = parse_bucket_name_key(fname)
495495
resp = s3.get_object(Bucket=bucket, Key=key)
496496
body = resp['Body']
497-
job_list = json.loads(body.read().decode())
497+
job_list = JobIds.from_json(body.read().decode()).to_list()
498498

499499
write_logs.write('AWS job log dump\n')
500500

src/elastic_blast/commands/status.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def _status(args, cfg: ElasticBlastConfig, clean_up_stack: List[Any]) -> int:
5858
while True:
5959
status, counts, verbose_result = elastic_blast.check_status(args.verbose)
6060
result = str(status)
61-
if counts:
61+
if status == ElbStatus.RUNNING and counts and sum(counts.values()) > 0:
6262
result = '\n'.join([f'{x} {counts[x.lower()]}' for x in
6363
('Pending', 'Running', 'Succeeded', 'Failed')
6464
])

src/elastic_blast/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ class ElbExecutionMode(Enum):
128128
ELB_BACKEND_LOG = 'backends.log'
129129
ELB_TAXIDLIST_FILE = 'taxidlist.txt'
130130
ELB_META_CONFIG_FILE = 'elastic-blast-config.json'
131-
ELB_AWS_JOB_IDS = 'job-ids.json'
131+
ELB_AWS_JOB_IDS = 'job-ids-v2.json'
132132
ELB_QUERY_LENGTH = 'query_length.txt'
133133
ELB_GCP_BATCH_LIST = 'batch_list.txt'
134134
# this file contents should match the number of lines in ELB_GCP_BATCH_LIST
@@ -203,7 +203,7 @@ def __str__(self):
203203
ELB_DOCKER_VERSION = '1.0.3'
204204
ELB_QS_DOCKER_VERSION = '0.1.2'
205205
ELB_JANITOR_DOCKER_VERSION = '0.2.0'
206-
ELB_JOB_SUBMIT_DOCKER_VERSION = '1.2.1'
206+
ELB_JOB_SUBMIT_DOCKER_VERSION = '2.0.0'
207207

208208
ELB_DOCKER_IMAGE_GCP = f'gcr.io/ncbi-sandbox-blast/ncbi/elb:{ELB_DOCKER_VERSION}'
209209
ELB_DOCKER_IMAGE_AWS = f'public.ecr.aws/ncbi-elasticblast/elasticblast-elb:{ELB_DOCKER_VERSION}'

src/elastic_blast/filehelper.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,6 @@ def open_for_read(fname):
458458
body.readable = lambda: True
459459
body.writable = lambda: False
460460
body.seekable = lambda: False
461-
body.closed = False
462461
body.flush = lambda: None
463462
if tarred or gzipped:
464463
fileobj = unpack_stream(body, gzipped, tarred)

tests/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ def Object(self, bucket, key):
593593
return obj
594594

595595

596-
class MockedStream(str):
596+
class MockedStream(io.IOBase):
597597
"""A string stream class needed for mocked downloads from S3, used by
598598
filehelper.open_for_read"""
599599
def __init__(self, data):

0 commit comments

Comments
 (0)