Skip to content

Commit 480f859

Browse files
committed
Release 0.2.6
1 parent bee0ed4 commit 480f859

File tree

15 files changed

+314
-129
lines changed

15 files changed

+314
-129
lines changed

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
cff-version: "1.1.0"
22
message: "If you use this software, please cite it using these metadata."
33
title: ElasticBLAST
4-
version: "0.2.5"
5-
date-released: 2022-03-24
4+
version: "0.2.6"
5+
date-released: 2022-06-08
66
license: "NCBI Public Domain"
77
repository-code: "https://github.com/ncbi/elastic-blast/"
88
authors:

DISCLAIMER.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
Although all reasonable efforts have been taken to ensure the accuracy
2+
and reliability of the software and data, the NLM and the U.S.
3+
Government do not and cannot warrant the performance or results that
4+
may be obtained by using this software or data. The NLM and the U.S.
5+
Government disclaim all warranties, express or implied, including
6+
warranties of performance, merchantability or fitness for any
7+
particular purpose.
8+
9+
Users of ElasticBLAST are solely responsible for any and all cloud service
10+
provider charges associated with their use of ElasticBLAST. 
11+
12+
See also: [LICENSE](LICENSE.md)

bin/gcp-setup-elastic-blast-janitor.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ usage() {
2424
echo -e "\t-h: Show this message"
2525
}
2626

27-
while getopts "u:ph" OPT; do
27+
while getopts "u:p:h" OPT; do
2828
case $OPT in
2929
u) user=${OPTARG}
3030
;;

requirements/base.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
wheel == 0.37.1
22
setuptools == 56.0.0
3-
importlib-resources == 5.4.0
4-
importlib-metadata == 4.11.3
5-
pex == 2.1.73
6-
boto3 == 1.21.24
7-
botocore == 1.24.24
3+
importlib-resources == 5.7.1
4+
importlib-metadata == 4.11.4
5+
pex == 2.1.92
6+
boto3 == 1.24.3
7+
botocore == 1.27.3
88
awslimitchecker == 12.0.0
99
tenacity == 8.0.1
1010
dataclasses-json == 0.5.7

requirements/test.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
-r base.txt
22

3-
pytest == 7.1.1
3+
pytest == 7.1.2
44
pytest-cov == 3.0.0
55
pytest-mock == 3.7.0
66
teamcity-messages == 1.31
7-
mypy == 0.941
7+
mypy == 0.961
88
pylint == 2.7.4
9-
tox == 3.24.5
9+
tox == 3.25.0
1010
yamllint == 1.26.3
11-
moto == 3.1.1
11+
moto == 3.1.12
1212
docker == 5.0.3
13-
cfn-lint == 0.58.4
13+
cfn-lint == 0.61.0

src/elastic_blast/aws.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
from .constants import ELB_AWS_JANITOR_CFN_TEMPLATE, ELB_DFLT_JANITOR_SCHEDULE_AWS
6363
from .constants import ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_BUCKET, ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_KEY
6464
from .constants import CFG_CLOUD_PROVIDER, CFG_CP_AWS_AUTO_SHUTDOWN_ROLE, CSP
65-
from .constants import AWS_JANITOR_ROLE_NAME
65+
from .constants import AWS_JANITOR_ROLE_NAME, ELB_JANITOR_SCHEDULE
6666
from .constants import STATUS_MESSAGE_ERROR, STATUS_MESSAGE_VERBOSE
6767
from .filehelper import parse_bucket_name_key
6868
from .aws_traits import get_machine_properties, create_aws_config, get_availability_zones_for
@@ -240,8 +240,8 @@ def _init(self, cfg: ElasticBlastConfig, create: bool):
240240
logging.debug(f'Found janitor role for {AWS_JANITOR_ROLE_NAME}: {role.arn}')
241241
except:
242242
logging.debug(f'Did not find janitor role for {AWS_JANITOR_ROLE_NAME}')
243-
if 'ELB_JANITOR_SCHEDULE' in os.environ:
244-
janitor_schedule = os.environ['ELB_JANITOR_SCHEDULE']
243+
if ELB_JANITOR_SCHEDULE in os.environ:
244+
janitor_schedule = os.environ[ELB_JANITOR_SCHEDULE]
245245
logging.debug(f'Overriding janitor schedule to "{janitor_schedule}"')
246246
if 'ELB_DISABLE_AUTO_SHUTDOWN' in os.environ:
247247
janitor_schedule = ''
@@ -916,14 +916,6 @@ def upload_job_ids(self) -> None:
916916
bucket.put_object(Body=self.job_ids.to_json().encode(), Key=key) # type: ignore
917917
logging.debug(f'Uploaded job IDs to {self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_JOB_IDS}')
918918

919-
# This code is needed for janitor backward compatibility in version
920-
# 0.2.4, and can be removed when the ElasticBLAST janitor is upgraded to version 0.2.4.
921-
ELB_AWS_OLD_JOB_IDS = 'job-ids.json'
922-
bucket_name, key = parse_bucket_name_key(f'{self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_OLD_JOB_IDS}')
923-
bucket = self.s3.Bucket(bucket_name)
924-
bucket.put_object(Body=json.dumps(self.job_ids.to_list()).encode(), Key=key)
925-
logging.debug(f'Uploaded job IDs to {self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_OLD_JOB_IDS}')
926-
927919

928920
def upload_query_length(self, query_length: int) -> None:
929921
"""Save query length in a metadata file in S3"""

src/elastic_blast/config.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from .constants import ELB_DFLT_AWS_SPOT_BID_PERCENTAGE
4747
from .constants import APP_STATE_RESULTS_MD5, SYSTEM_MEMORY_RESERVE
4848
from .constants import ELB_S3_PREFIX, ELB_GCS_PREFIX
49-
from .constants import ELB_DFLT_AWS_REGION, ELB_DFLT_GCP_REGION
49+
from .constants import ELB_DFLT_AWS_REGION, ELB_DFLT_GCP_REGION, ELB_DFLT_GCP_ZONE
5050
from .util import UserReportError
5151
from .filehelper import parse_bucket_name_key
5252
from typing import List
@@ -70,6 +70,9 @@ def _load_config_from_environment(cfg: configparser.ConfigParser) -> None:
7070
"""Selected environment variables can be used to configure ElasticBLAST"""
7171
if 'ELB_GCP_PROJECT' in os.environ:
7272
cfg[CFG_CLOUD_PROVIDER][CFG_CP_GCP_PROJECT] = os.environ['ELB_GCP_PROJECT']
73+
# If GCP project is provided by the environment/configuration, leverage it
74+
if 'CLOUDSDK_CORE_PROJECT' in os.environ:
75+
cfg[CFG_CLOUD_PROVIDER][CFG_CP_GCP_PROJECT] = os.environ['CLOUDSDK_CORE_PROJECT']
7376
if 'ELB_GCP_REGION' in os.environ:
7477
cfg[CFG_CLOUD_PROVIDER][CFG_CP_GCP_REGION] = os.environ['ELB_GCP_REGION']
7578
if 'ELB_GCP_ZONE' in os.environ:
@@ -140,16 +143,6 @@ def configure(args: argparse.Namespace) -> configparser.ConfigParser:
140143
retval[CFG_CLOUD_PROVIDER][CFG_CP_GCP_REGION] = args.gcp_region
141144
if hasattr(args, 'gcp_zone') and args.gcp_zone:
142145
retval[CFG_CLOUD_PROVIDER][CFG_CP_GCP_ZONE] = args.gcp_zone
143-
144-
# If results bucket was provided, set the default region in the
145-
# corresponding cloud service provider if it wasn't specified by the user
146-
if CFG_BLAST_RESULTS in retval[CFG_BLAST]:
147-
if retval[CFG_BLAST][CFG_BLAST_RESULTS].startswith(ELB_S3_PREFIX):
148-
if CFG_CP_AWS_REGION not in retval[CFG_CLOUD_PROVIDER]:
149-
retval[CFG_CLOUD_PROVIDER][CFG_CP_AWS_REGION] = ELB_DFLT_AWS_REGION
150-
elif retval[CFG_BLAST][CFG_BLAST_RESULTS].startswith(ELB_GCS_PREFIX):
151-
if CFG_CP_GCP_REGION not in retval[CFG_CLOUD_PROVIDER]:
152-
retval[CFG_CLOUD_PROVIDER][CFG_CP_GCP_REGION] = ELB_DFLT_GCP_REGION
153146

154147
# Exception to prevent unnecessary API calls and ensure testability
155148
# of some functionality without credentials
@@ -168,7 +161,7 @@ def _validate_csp(cfg: configparser.ConfigParser) -> None:
168161
Throws a UserReportError in case of invalid configuration.
169162
"""
170163
if CFG_CLOUD_PROVIDER not in cfg:
171-
report_config_error(['Cloud provider configuration is missing'])
164+
return
172165

173166
# are gcp or aws entries present in cloud-provider config
174167
gcp = sum([i.startswith('gcp') for i in cfg[CFG_CLOUD_PROVIDER]]) > 0
@@ -179,8 +172,6 @@ def _validate_csp(cfg: configparser.ConfigParser) -> None:
179172
# both and none are forbidden
180173
if gcp and aws:
181174
msg.append('Cloud provider config contains entries for more than one cloud provider. Only one cloud provider can be used')
182-
if not gcp and not aws:
183-
msg.append('Cloud provider configuration is missing')
184175

185176
if CFG_CP_NAME in cfg[CFG_CLOUD_PROVIDER]:
186177
logging.debug(f'Cloud Service Provider {cfg[CFG_CLOUD_PROVIDER][CFG_CP_NAME]}')

src/elastic_blast/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,9 @@ def __str__(self):
198198

199199

200200
ELB_DFLT_GCP_REGION = 'us-east4'
201+
ELB_DFLT_GCP_ZONE = 'us-east4-b'
201202
ELB_DFLT_AWS_REGION = 'us-east-1'
203+
ELB_UNKNOWN_GCP_PROJECT = 'elb-unknown-gcp-project'
202204

203205
ELB_DOCKER_VERSION = '1.1.0'
204206
ELB_QS_DOCKER_VERSION = '0.1.2'
@@ -235,6 +237,7 @@ def __str__(self):
235237
CFG_CP_GCP_ZONE = 'gcp-zone'
236238
CFG_CP_GCP_NETWORK = 'gcp-network'
237239
CFG_CP_GCP_SUBNETWORK = 'gcp-subnetwork'
240+
CFG_CP_GCP_GKE_VERSION = 'gke-version'
238241
CFG_CP_AWS_REGION = 'aws-region'
239242
CFG_CP_AWS_KEY_PAIR = 'aws-key-pair'
240243
CFG_CP_AWS_VPC = 'aws-vpc'
@@ -317,6 +320,8 @@ def __str__(self):
317320
# https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-create-rule-schedule.html
318321
ELB_DFLT_JANITOR_SCHEDULE_AWS = "cron(*/5 * * * ? *)"
319322

323+
ELB_JANITOR_SCHEDULE = 'ELB_JANITOR_SCHEDULE'
324+
320325
ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_BUCKET = 'elb-camacho'
321326
ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_KEY = 'functions/'
322327
ELB_AWS_JANITOR_CFN_TEMPLATE = 'https://elb-camacho.s3.amazonaws.com/templates/elastic-blast-janitor-cf.yaml'

src/elastic_blast/elb_config.py

Lines changed: 89 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from .constants import CFG_CLOUD_PROVIDER
5858
from .constants import CFG_CP_GCP_PROJECT, CFG_CP_GCP_REGION, CFG_CP_GCP_ZONE
5959
from .constants import CFG_CP_GCP_NETWORK, CFG_CP_GCP_SUBNETWORK
60+
from .constants import CFG_CP_GCP_GKE_VERSION
6061
from .constants import CFG_CP_AWS_REGION, CFG_CP_AWS_VPC, CFG_CP_AWS_SUBNET
6162
from .constants import CFG_CP_AWS_JOB_ROLE, CFG_CP_AWS_BATCH_SERVICE_ROLE
6263
from .constants import CFG_CP_AWS_INSTANCE_ROLE, CFG_CP_AWS_SPOT_FLEET_ROLE
@@ -82,7 +83,9 @@
8283
from .constants import ELB_DFLT_AWS_NUM_CPUS, ELB_DFLT_GCP_NUM_CPUS
8384
from .constants import ELB_S3_PREFIX, ELB_GCS_PREFIX, ELB_UNKNOWN_MAX_NUMBER_OF_CONCURRENT_JOBS
8485
from .constants import AWS_ROLE_PREFIX, CFG_CP_AWS_AUTO_SHUTDOWN_ROLE
85-
from .constants import BLASTDB_ERROR, ELB_UNKNOWN
86+
from .constants import BLASTDB_ERROR, ELB_UNKNOWN, ELB_JANITOR_SCHEDULE
87+
from .constants import ELB_DFLT_GCP_REGION, ELB_DFLT_GCP_ZONE
88+
from .constants import ELB_DFLT_AWS_REGION, ELB_UNKNOWN_GCP_PROJECT
8689
from .util import validate_gcp_string, check_aws_region_for_invalid_characters
8790
from .util import validate_gke_cluster_name, ElbSupportedPrograms
8891
from .util import get_query_batch_size
@@ -210,12 +213,15 @@ class CloudProviderBaseConfig:
210213
@dataclass
211214
class GCPConfig(CloudProviderBaseConfig, ConfigParserToDataclassMapper):
212215
"""GCP config for ElasticBLAST"""
213-
project: GCPString
214-
region: GCPString
215-
zone: GCPString
216+
region: GCPString = GCPString(ELB_DFLT_GCP_REGION)
217+
project: GCPString = GCPString(ELB_UNKNOWN_GCP_PROJECT)
218+
zone: GCPString = GCPString(ELB_DFLT_GCP_ZONE)
216219
network: Optional[str] = None
217220
subnet: Optional[str] = None
218221
user: Optional[str] = None
222+
# FIXME: This is a temporary fix for EB-1530. gke_version should be set to
223+
# None once the proper fix is implemented.
224+
gke_version: Optional[str] = '1.21'
219225

220226
# mapping to class attributes to ConfigParser parameters so that objects
221227
# can be initialized from ConfigParser objects
@@ -225,7 +231,8 @@ class GCPConfig(CloudProviderBaseConfig, ConfigParserToDataclassMapper):
225231
'cloud': None,
226232
'user': None,
227233
'network': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_NETWORK),
228-
'subnet': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_SUBNETWORK)}
234+
'subnet': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_SUBNETWORK),
235+
'gke_version': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_GKE_VERSION)}
229236

230237
def __post_init__(self):
231238
self.cloud = CSP.GCP
@@ -235,6 +242,13 @@ def __post_init__(self):
235242
if p.stdout:
236243
self.user = p.stdout.decode('utf-8').rstrip()
237244

245+
if self.project == ELB_UNKNOWN_GCP_PROJECT:
246+
proj = get_gcp_project()
247+
if not proj:
248+
raise ValueError(f'GCP project is unset, please invoke gcloud config set project REPLACE_WITH_YOUR_PROJECT_NAME_HERE')
249+
else:
250+
self.project = GCPString(proj)
251+
238252
def validate(self, errors: List[str], task: ElbCommand):
239253
"""Validate config"""
240254
if bool(self.network) != bool(self.subnet):
@@ -244,7 +258,7 @@ def validate(self, errors: List[str], task: ElbCommand):
244258
@dataclass
245259
class AWSConfig(CloudProviderBaseConfig, ConfigParserToDataclassMapper):
246260
"""AWS config for ElasticBLAST"""
247-
region: AWSRegion
261+
region: AWSRegion = AWSRegion(ELB_DFLT_AWS_REGION)
248262
vpc: Optional[str] = None
249263
subnet: Optional[str] = None
250264
security_group: Optional[str] = None
@@ -589,7 +603,8 @@ def __init__(self, *args, **kwargs):
589603
# post-init activities
590604

591605
try:
592-
self.cloud_provider.region.validate(dry_run)
606+
if self.cloud_provider.region:
607+
self.cloud_provider.region.validate(dry_run)
593608
except ValueError as err:
594609
raise UserReportError(returncode=INPUT_ERROR, message=str(err))
595610

@@ -713,7 +728,17 @@ def _init_from_ConfigParser(self, cfg: configparser.ConfigParser,
713728

714729
self._validate_config_parser(cfg)
715730
_validate_csp(cfg)
731+
self.cluster = ClusterConfig.create_from_cfg(cfg)
732+
733+
# determine cloud provider, first by user config, then results bucket
716734
if sum([i.startswith('aws') for i in cfg[CFG_CLOUD_PROVIDER]]) > 0:
735+
cloud = CSP.AWS
736+
elif sum([i.startswith('gcp') for i in cfg[CFG_CLOUD_PROVIDER]]) > 0:
737+
cloud = CSP.GCP
738+
else:
739+
cloud = self.cluster.results.get_cloud_provider()
740+
741+
if cloud == CSP.AWS:
717742
self.cloud_provider = AWSConfig.create_from_cfg(cfg)
718743
# for mypy
719744
self.aws = cast(AWSConfig, self.cloud_provider)
@@ -722,8 +747,6 @@ def _init_from_ConfigParser(self, cfg: configparser.ConfigParser,
722747
# for mypy
723748
self.gcp = cast(GCPConfig, self.cloud_provider)
724749

725-
self.cluster = ClusterConfig.create_from_cfg(cfg)
726-
727750
if task == ElbCommand.SUBMIT:
728751
self.blast = BlastConfig.create_from_cfg(cfg)
729752

@@ -853,6 +876,13 @@ def validate(self, task: ElbCommand = ElbCommand.SUBMIT, dry_run=False):
853876
if instance_props.memory - SYSTEM_MEMORY_RESERVE < bytes_to_cache_gb:
854877
errors.append(f'BLAST database {self.blast.db} memory requirements exceed memory available on selected machine type "{self.cluster.machine_type}". Please select machine type with at least {bytes_to_cache_gb + SYSTEM_MEMORY_RESERVE}GB available memory.')
855878

879+
# validate janitor schedule if provided
880+
if ELB_JANITOR_SCHEDULE in os.environ:
881+
try:
882+
validate_janitor_schedule(os.environ[ELB_JANITOR_SCHEDULE], self.cloud_provider.cloud)
883+
except ValueError as err:
884+
errors.append(str(err))
885+
856886
if errors:
857887
raise UserReportError(returncode=INPUT_ERROR,
858888
message='\n'.join(errors))
@@ -1108,6 +1138,31 @@ def get_instance_props(cloud_provider: CSP, region: str, machine_type: str) -> I
11081138
return instance_props
11091139

11101140

1141+
def validate_janitor_schedule(val: str, cloud_provider: CSP) -> None:
1142+
"""Validate cron schedule for janitor job. Raises ValueError if validation fails."""
1143+
special = r'@(yearly|annually|monthly|weekly|daily|midnight|hourly)'
1144+
minute = r'\*|(\*|([1-5]?[0-9]))((,(\*|([1-5]?[0-9])))*([/-][1-5]?[0-9])?)*'
1145+
hour = r'\*|(\*|([1-2]?[0-9]))((,(\*|([1-2]?[0-9])))*([/-][1-2]?[0-9])?)*'
1146+
day_of_month_gcp = r'\*|(\*|([1-3]?[0-9]))((,(\*|([1-3]?[0-9])))*([/-][1-3]?[0-9])?)*'
1147+
day_of_month_aws = r'\*|\?|(\*|([1-3]?[0-9]L?W?))((,(\*|([1-3]?[0-9]L?W?)))*([/-][1-3]?[0-9])?)*'
1148+
month = r'\*|(\*|(1?[0-9]))((,(\*|(1?[0-9])))*([/-]1?[0-9])?)*'
1149+
day_of_week_gcp = r'\*|((\*|[0-7]|mon|tue|wed|thu|fri|sat|sun)((,(\*|[0-7]|mon|tue|wed|thu|fri|sat|sun))*([/-]([1-6]|mon|tue|wed|thu|fri|sat|sun))?)*)'
1150+
day_of_week_aws = r'\*|\?|(((\*|[1-7]|MON|TUE|WED|THU|FRI|SAT|SUN)L?)(([,#](([1-7]|MON|TUE|WED|THU|FRI|SAT|SUN)L?))*([/-]([1-6]|MON|TUE|WED|THU|FRI|SAT|SUN))?)*)'
1151+
year = r'\*|(\*|(2[01][0-9]{2}))((,(\*|(2[01][0-9]{2})))*(-2[01][0-9]{2})?(/\d{1,3})?)*'
1152+
1153+
1154+
if cloud_provider == CSP.GCP:
1155+
pattern = special + '|' + '((' + minute + r')\s(' + hour + r')\s(' + day_of_month_gcp + r')\s(' + month + r')\s(' + day_of_week_gcp + '))'
1156+
url = 'https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax'
1157+
else:
1158+
pattern = r'cron\((' + minute + r')\s(' + hour + r')\s(' + day_of_month_aws + r')\s(' + month + r')\s(' + day_of_week_aws + r')\s(' + year + r')\)'
1159+
url = 'https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-create-rule-schedule.html'
1160+
1161+
r = re.fullmatch(pattern, val)
1162+
if r is None:
1163+
raise ValueError(f'Invalid value of environment variable {ELB_JANITOR_SCHEDULE} "{val}". The string must match the regular expression "{pattern}". For more information, please see {url}')
1164+
1165+
11111166
class JSONEnumEncoder(json.JSONEncoder):
11121167
"""JSON encoder that handles basic types and Enum"""
11131168
def default(self, o):
@@ -1116,3 +1171,28 @@ def default(self, o):
11161171
return o.name
11171172
else:
11181173
return json.JSONEncoder(self, o)
1174+
1175+
1176+
def get_gcp_project() -> Optional[str]:
1177+
"""Return current GCP project or None if the property is unset.
1178+
1179+
Raises:
1180+
util.SafeExecError on problems with command line gcloud
1181+
RuntimeError if gcloud run is successful, but the result is empty"""
1182+
cmd: str = 'gcloud config get-value project'
1183+
p = safe_exec(cmd)
1184+
result: Optional[str]
1185+
1186+
# the result should not be empty, for unset properties gcloud returns the
1187+
# string: '(unset)' to stderr
1188+
if not p.stdout and not p.stderr:
1189+
raise RuntimeError('Current GCP project could not be established')
1190+
1191+
result = p.stdout.decode().split('\n')[0]
1192+
1193+
# return None if project is unset
1194+
if result == '(unset)':
1195+
result = None
1196+
return result
1197+
1198+

0 commit comments

Comments
 (0)