9
9
import java .net .UnknownHostException ;
10
10
import java .util .regex .Matcher ;
11
11
import java .util .regex .Pattern ;
12
+ import org .json .simple .JSONArray ;
13
+ import org .json .simple .JSONObject ;
14
+ import org .json .simple .parser .JSONParser ;
12
15
import edu .utah .hci .aws .util .Util ;
13
16
import java .util .ArrayList ;
14
17
import java .util .Arrays ;
15
18
import java .util .HashMap ;
19
+ import java .util .Iterator ;
16
20
17
21
18
22
/**Looks for bash scripts in a particular s3 bucket, reanames each, downloads, and runs, and transfers back jobs results
@@ -75,7 +79,7 @@ public class JobRunner {
75
79
private boolean verbose = false ;
76
80
private boolean syncDirs = true ;
77
81
private boolean terminateInstance = false ;
78
- private int minToWait = 60 ;
82
+ private int minToWait = 10 ;
79
83
private boolean testing = false ;
80
84
81
85
//internal fields
@@ -86,8 +90,10 @@ public class JobRunner {
86
90
private String ram = "NA" ;
87
91
private String availableDisk = "NA" ;
88
92
private String availabilityZone = null ;
93
+ private String region = null ;
89
94
private String instanceId = null ;
90
95
private String instanceType = null ;
96
+ private String spotId = null ;
91
97
private double spotPrice = 0 ;
92
98
private String awsPath = "aws" ;
93
99
private StringBuilder hostLog = new StringBuilder ();
@@ -120,8 +126,12 @@ public JobRunner (String[] args){
120
126
processArgs (args );
121
127
122
128
loadCredentials ();
123
-
129
+
124
130
checkAwsCli ();
131
+
132
+ loadHostInfo ();
133
+ loadSpotInfo ();
134
+ printHostInfo ();
125
135
126
136
checkResourceBundle ();
127
137
@@ -262,7 +272,7 @@ private double fetchSpotPrice(long currentTime) throws IOException {
262
272
availabilityZone ="us-west-2d" ;
263
273
}
264
274
265
- String [] cmd = {awsPath , "ec2" , "describe-spot-price-history" ,
275
+ String [] cmd = {awsPath , "--region" , region , " ec2" , "describe-spot-price-history" ,
266
276
"--instance-types" , instanceType ,
267
277
"--availability-zone" , availabilityZone ,
268
278
"--start-time" , seconds .toString (),
@@ -359,9 +369,17 @@ private void deleteAndCopyLocalJobDirWithS3JobDir() throws Exception {
359
369
private void shutDown (int ec ) throws Exception {
360
370
exitCode = ec ;
361
371
372
+ //kill the spot request
373
+ if (spotId != null ) {
374
+ if (verbose ) pl ("Canceling spot request..." );
375
+ String [] cmd = new String []{awsPath , "--region" , region , "ec2" , "cancel-spot-instance-requests" , "--spot-instance-request-ids" , spotId };
376
+ executeReturnExitCode (cmd , false , true , null );
377
+ }
378
+
362
379
// this will system exit
363
380
if (availabilityZone != null && terminateInstance ) {
364
- String [] cmd = new String []{awsPath , "ec2" , "terminate-instances" , "--instance-ids" , instanceId };
381
+ if (verbose ) pl ("Terminating instance..." );
382
+ String [] cmd = new String []{awsPath , "--region" , region , "ec2" , "terminate-instances" , "--instance-ids" , instanceId };
365
383
executeReturnExitCode (cmd , false , true , null );
366
384
}
367
385
Util .pl ("\t Complete" );
@@ -833,12 +851,19 @@ private void loadCredentials() throws IOException {
833
851
//check it, the downloaded file might be a error message from AWS about expired
834
852
String [] lines = Util .loadTxtFile (credentialsFile );
835
853
int keyCount = 0 ;
836
- for (String l : lines ) if (l .contains ("aws_access_key_id" )) keyCount ++;
854
+ //String regionLine = null; //region = us-west-2
855
+ for (String l : lines ) {
856
+ if (l .contains ("aws_access_key_id" )) keyCount ++;
857
+ //if (l.contains("region")) regionLine = l;
858
+ }
837
859
String merged = Util .stringArrayToString (lines , "\n \t " );
838
860
if (keyCount !=1 || merged .contains ("region" ) == false || merged .contains ("aws_access_key_id" ) == false || merged .contains ("aws_secret_access_key" ) == false ) {
839
- throw new IOException ("\t Error: the credential file is malformed -> " +credentialsUrl + " \n \t " + merged +"\n \t See the JobRunner help menu." );
861
+ throw new IOException ("\t Error: the credential file is malformed, does it have just one set of credentials? with region, aws_access_key_id, and aws_secret_access_key? -> " +credentialsUrl +"\n \t See the JobRunner help menu." );
840
862
}
841
863
864
+ //String[] splitRegionLine = Util.EQUALS.split(regionLine);
865
+ //region = splitRegionLine[1].trim();
866
+
842
867
//since it was downloaded, mark it for deletion upon exit
843
868
credentialsFile .deleteOnExit ();
844
869
}
@@ -848,14 +873,11 @@ private void loadCredentials() throws IOException {
848
873
849
874
850
875
//TODO: Only needed in Eclipse
851
- if (hostName . endsWith ( "local" ) || hostName . contains ( "utah" ) ) {
876
+ if (testing ) {
852
877
envPropToAdd .put ("PATH" , "/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin" );
853
878
awsPath ="/usr/local/bin/aws" ;
854
879
}
855
880
856
- //fetch the spot price
857
- spotPrice = fetchSpotPrice (System .currentTimeMillis ());
858
-
859
881
}
860
882
861
883
@@ -892,26 +914,18 @@ public void processArgs(String[] args) throws Exception {
892
914
}
893
915
}
894
916
}
895
-
917
+
896
918
//work directory, need this to find RAM on machine
897
919
if (workDirectory == null ) {
898
920
throw new IOException ("Error: failed to find your -t temporary local work directory." );
899
921
}
900
-
922
+
901
923
workDirectory .mkdirs ();
902
924
if (workDirectory .exists () == false || workDirectory .canWrite () == false ) {
903
925
throw new IOException ("Error: failed to find a writable work directory -> " + workDirectory );
904
926
}
905
927
tmpDirectory = new File (workDirectory , "TmpDir" );
906
928
tmpDirectory .mkdir ();
907
-
908
- //set node info
909
- loadHostInfo ();
910
-
911
- //add workingDir to env
912
- envPropToAdd .put ("JR_WORKING_DIR" , workDirectory .getCanonicalPath ());
913
-
914
- printParams ();
915
929
916
930
// required args?
917
931
if (resourceS3Uri == null || credentialsUrl == null || jobsS3Uri == null || logsS3Uri == null ) {
@@ -928,8 +942,49 @@ public void processArgs(String[] args) throws Exception {
928
942
}
929
943
if (resourceS3Uri .endsWith (".zip" ) == false ) throw new IOException ("Error: the zip resource S3Uri must end with xxx.zip, see " +resourceS3Uri );
930
944
945
+ //add workingDir to env
946
+ envPropToAdd .put ("JR_WORKING_DIR" , workDirectory .getCanonicalPath ());
947
+
948
+ printParams ();
949
+
931
950
}
951
+
952
+ private void loadSpotInfo () {
953
+ String jsonString = null ;
954
+ try {
955
+ if (instanceId == null ) return ;
956
+
957
+
958
+ //fetch the spot price
959
+ spotPrice = fetchSpotPrice (System .currentTimeMillis ());
960
+
961
+ String [] cmd = {awsPath , "--region" , region , "ec2" , "describe-spot-instance-requests" };
932
962
963
+ String [] out = executeViaProcessBuilder (cmd , false , null );
964
+
965
+ jsonString = Util .stringArrayToString (out , " " );
966
+ Object obj = new JSONParser ().parse (jsonString );
967
+ JSONObject jo = (JSONObject ) obj ;
968
+ JSONArray ja = (JSONArray ) jo .get ("SpotInstanceRequests" );
969
+
970
+ Iterator <JSONObject > it = ja .iterator ();
971
+ while (it .hasNext ()) {
972
+ JSONObject sr = it .next ();
973
+ String iid = sr .get ("InstanceId" ).toString ();
974
+ if (iid .equals (instanceId )) {
975
+ spotId = sr .get ("SpotInstanceRequestId" ).toString ();
976
+ return ;
977
+ }
978
+ }
979
+ throw new Exception ();
980
+ } catch (Exception e ) {
981
+ el ("\n Failed to fetch spot information for " +instanceId +" from ->\n " +jsonString +"\n " +Util .getStackTrace (e ));
982
+ e .printStackTrace ();
983
+ try {
984
+ shutDown (1 );
985
+ } catch (Exception e1 ) {}
986
+ }
987
+ }
933
988
/*Attempt to get machine info, doesn't work on a mac*/
934
989
private void loadHostInfo () throws UnknownHostException {
935
990
@@ -969,6 +1024,7 @@ private void loadHostInfo() throws UnknownHostException {
969
1024
} catch (Exception e ) {}
970
1025
if (out != null && out .length == 3 ) {
971
1026
availabilityZone = Util .WHITESPACE .split (out [0 ])[1 ];
1027
+ region = availabilityZone .substring (0 , availabilityZone .length ()-1 );
972
1028
instanceId = Util .WHITESPACE .split (out [1 ])[1 ];
973
1029
hostName = instanceId ;
974
1030
instanceType = Util .WHITESPACE .split (out [2 ])[1 ];
@@ -991,9 +1047,12 @@ private void printParams() {
991
1047
pl (" -l Node Logs S3 URI : " + logsS3Uri );
992
1048
pl (" -d Local work dir : " + workDirectory );
993
1049
pl (" -t Terminate node on exit : " + terminateInstance );
994
- pl (" -w Min 2 wait before exit : " + minToWait );
1050
+ pl (" -w Min to wait before exit : " + minToWait );
995
1051
pl (" -x Replace S3 job with local : " + (syncDirs ==false ));
996
1052
1053
+ }
1054
+
1055
+ private void printHostInfo () {
997
1056
pl ("\n Job Runner Info:" );
998
1057
pl (" Host name : " + hostName );
999
1058
pl (" Number processors : " + numberProcessors );
@@ -1003,40 +1062,49 @@ private void printParams() {
1003
1062
1004
1063
if (availabilityZone != null ) {
1005
1064
pl (" Availability Zone : " + availabilityZone );
1065
+ pl (" Region : " + region );
1006
1066
pl (" Instance ID : " + instanceId );
1067
+ pl (" Spot Request ID : " + spotId );
1007
1068
pl (" Instance Type : " + instanceType );
1008
1069
pl (" Terminate upon exit : " + terminateInstance );
1009
1070
}
1010
1071
}
1011
1072
1012
1073
public static void printDocs (){
1013
1074
System .out .println ("\n " +
1014
- "****************************************************************************************************************************\n " +
1015
- "** AWS Job Runner : December 2021 **\n " +
1016
- "****************************************************************************************************************************\n " +
1017
- "JR is an app for running bash scripts on AWS EC2 nodes. It downloads and uncompressed your resource bundle and looks for\n " +
1018
- "xxx.sh_JR_START files in your S3 Jobs directories. For each, it copies over the directory contents, executes the\n " +
1019
- "associated xxx.sh script, and transfers back the results. This is repeated until no unrun jobs are found. Launch many\n " +
1020
- "EC2 JR nodes, each running an instance of the JR, to process hundreds of jobs in parallel. Use spot requests and\n " +
1021
- "hibernation to reduce costs.\n " +
1075
+ "**************************************************************************************\n " +
1076
+ "** AWS Job Runner : January 2021 **\n " +
1077
+ "**************************************************************************************\n " +
1078
+ "JR is an app for running bash scripts on AWS EC2 nodes. It downloads and uncompressed\n " +
1079
+ "your resource bundle and looks for xxx.sh_JR_START files in your S3 Jobs directories.\n " +
1080
+ "For each, it copies over the directory contents, executes the associated xxx.sh\n " +
1081
+ "script, and transfers back the results. This is repeated until no unrun jobs are\n " +
1082
+ "found. Launch many EC2 JR nodes, each running an instance of the JR, to process\n " +
1083
+ "hundreds of jobs in parallel. Use spot requests and hibernation to reduce costs.\n " +
1084
+ "Upon termination, JR will cancel the spot request and kill the instance.\n " +
1022
1085
1023
1086
"\n To use:\n " +
1024
- "1) Install and configure the aws cli on your local workstation, see https://aws.amazon.com/cli/\n " +
1025
- "2) Upload your aws credentials file into a private bucket on aws, e.g.\n " +
1026
- " aws s3 cp ~/.aws/credentials s3://my-jr/aws.cred.txt\n " +
1087
+ "1) Install and configure the aws cli on your local workstation, see\n " +
1088
+ " https://aws.amazon.com/cli/\n " +
1089
+ "2) Upload a [default] aws credential file containing a single set of region,\n " +
1090
+ " aws_access_key_id, and aws_secret_access_key info into a private bucket, e.g.\n " +
1091
+ " aws s3 cp ~/.aws/credentials s3://my-jr/aws.cred.txt \n " +
1027
1092
"3) Generate a secure 24hr timed URL for the credentials file, e.g.\n " +
1028
1093
" aws --region us-west-2 s3 presign s3://my-jr/aws.cred.txt --expires-in 259200\n " +
1029
1094
"4) Upload a zip archive containing resources needed to run your jobs into S3, e.g.\n " +
1030
1095
" aws s3 cp ~/TNRunnerResourceBundle.zip s3://my-jr/TNRunnerResourceBundle.zip\n " +
1031
1096
" This will be copied into the /JRDir/ directory and then unzipped.\n " +
1032
1097
"5) Upload script and job files into a 'Jobs' directory on S3, e.g.\n " +
1033
1098
" aws s3 cp ~/JRJobs/A/ s3://my-jr/Jobs/A/ --recursive\n " +
1034
- "6) Optional, upload bash script files ending with JR_INIT.sh and or JR_TERM.sh. These are executed by JR before and after\n " +
1035
- " running the main bash script. Use these to copy in sample specific resources, e.g. fastq/ cram/ bam files, and to run\n " +
1099
+ "6) Optional, upload bash script files ending with JR_INIT.sh and or JR_TERM.sh. These\n " +
1100
+ " are executed by JR before and after running the main bash script. Use these to\n " +
1101
+ " copy in sample specific resources, e.g. fastq/ cram/ bam files, and to run\n " +
1036
1102
" post job clean up.\n " +
1037
- "7) Upload a file named XXX_JR_START to let the JobRunner know the bash script named XXX is ready to run, e.g.\n " +
1103
+ "7) Upload a file named XXX_JR_START to let the JobRunner know the bash script named\n " +
1104
+ " XXX is ready to run, e.g.\n " +
1038
1105
" aws s3 cp s3://my-jr/emptyFile s3://my-jr/Jobs/A/dnaAlignQC.sh_JR_START\n " +
1039
- "8) Launch the JobRunner.jar on one or more JR configured EC2 nodes. See https://ri-confluence.hci.utah.edu/x/gYCgBw\n " +
1106
+ "8) Launch the JobRunner.jar on one or more JR configured EC2 nodes. See\n " +
1107
+ " https://ri-confluence.hci.utah.edu/x/gYCgBw\n " +
1040
1108
1041
1109
"\n Job Runner Required Options:\n " +
1042
1110
"-c URL to your secure timed config credentials file.\n " +
@@ -1045,23 +1113,25 @@ public static void printDocs(){
1045
1113
"-l S3URI to your Log folder for node logs.\n " +
1046
1114
1047
1115
"\n Default Options:\n " +
1048
- "-d Directory on the local worker node, full path, in which resources and job files will be processed, defaults to /JRDir/\n " +
1116
+ "-d Directory on the local worker node, full path, in which resources and job files\n " +
1117
+ " will be processed, defaults to /JRDir/\n " +
1049
1118
"-a Aws credentials directory, defaults to ~/.aws/\n " +
1050
- "-t Terminate the EC2 node upon job completion. Defaults to looking for jobs for the min2Wait.\n " +
1051
- "-w Minutes to wait when jobs are not found before termination, defaults to 10.\n " +
1052
- "-x Replace S3 job directories with processed analysis, defaults to syncing local with S3. WARNING, if selected, don't place\n " +
1053
- " any files in these S3 jobs directories that cannot be replaced. JR will delete them.\n " +
1119
+ "-t Terminate the EC2 node upon program exit, defaults to leaving it running. \n " +
1120
+ "-w Minutes to wait looking for jobs before exiting, defaults to 10.\n " +
1121
+ "-x Replace S3 job directories with processed analysis, defaults to syncing local with\n " +
1122
+ " S3. WARNING, if selected, don't place any files in these S3 jobs directories that\n " +
1123
+ " cannot be replaced. JR will delete them.\n " +
1054
1124
"-v Verbose debugging output.\n " +
1055
1125
1056
1126
"\n Example: java -jar -Xmx1G JobRunner.jar -x -t \n " +
1057
1127
" -r s3://my-jr/TNRunnerResourceBundle.zip\n " +
1058
1128
" -j s3://my-jr/Jobs/\n " +
1059
1129
" -l s3://my-jr/NodeLogs/\n " +
1060
- " -c 'https://my-jr.s3.us-west-2.amazonaws.com/aws.cred.txt?X-Amz-Algorithm=AWS4-HMXXX ...'\n \n " +
1130
+ " -c 'https://my-jr.s3.us-west-2.amazonaws.com/aws.cred.txt?X-AmRun ...'\n \n " +
1061
1131
1062
1132
1063
1133
1064
- "**************************************************************************************************************************** \n " );
1134
+ "**************************************************************************************\n " );
1065
1135
1066
1136
}
1067
1137
0 commit comments