Skip to content

Commit 68f677d

Browse files
author
Varun Rao Bhamidimarri
committed
Updates to include HDFS plugin to allow RWX access to users home dir and change the Livy's default realm
1 parent 47601df commit 68f677d

File tree

4 files changed

+196
-18
lines changed

4 files changed

+196
-18
lines changed

aws_emr_blog_v3/cloudformation/emr-template.template

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,23 @@ Resources:
451451
- arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role
452452
- arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
453453
- arn:aws:iam::aws:policy/CloudWatchFullAccess
454+
AllowSecretsRetrievalPolicy:
455+
Type: 'AWS::IAM::Policy'
456+
DependsOn: EmrEc2Role
457+
Properties:
458+
PolicyName: AllowSecretsRetrievalPolicy
459+
PolicyDocument:
460+
Version: 2012-10-17
461+
Statement:
462+
- Effect: Allow
463+
Action:
464+
- secretsmanager:GetSecretValue
465+
- secretsmanager:ListSecrets
466+
- secretsmanager:DescribeSecret
467+
Resource:
468+
- !Join [ '', [ 'arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:emr/ranger*' ] ]
469+
Roles:
470+
- !Ref EmrEc2Role
454471
DataAccessRoleARN:
455472
Type: AWS::IAM::Role
456473
Properties:
@@ -499,22 +516,6 @@ Resources:
499516
- !GetAtt 'DataAccessRoleARN.Arn'
500517
Roles:
501518
- !Ref EmrEc2Role
502-
503-
AllowSecretsRetrievalPolicy:
504-
Type: 'AWS::IAM::Policy'
505-
Properties:
506-
PolicyName: AllowSecretsRetrieval
507-
PolicyDocument:
508-
Version: 2012-10-17
509-
Statement:
510-
- Effect: Allow
511-
Action:
512-
- secretsmanager:GetSecretValue
513-
Resource:
514-
- !Join ['', ['arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:', !Ref RangerAgentKeySecretName, '*']]
515-
- !Join ['', ['arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:', !Ref RangerServerCertSecretName, '*']]
516-
Roles:
517-
- !Ref EmrEc2Role
518519
EMRInstanceProfile:
519520
Type: AWS::IAM::InstanceProfile
520521
Properties:

aws_emr_blog_v3/code/launch-cluster/cremr.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ def create(event, context):
6262
],
6363
'Applications': applist,
6464
'Steps': [
65+
{
66+
"Name": "InstallHiveHDFSRangerPlugin",
67+
"ActionOnFailure": "CONTINUE",
68+
"HadoopJarStep": {
69+
"Jar": scriptRunnerJar,
70+
"Args": [
71+
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hdfs-ranger-plugin.sh",
72+
event["ResourceProperties"]["RangerHostname"],
73+
event["ResourceProperties"]["StackRegion"]
74+
]
75+
}
76+
},
6577
{
6678
"Name": "CreateDefaultHiveTables",
6779
"ActionOnFailure": "CONTINUE",
@@ -299,7 +311,8 @@ def create(event, context):
299311
"HadoopJarStep": {
300312
"Jar": scriptRunnerJar,
301313
"Args": [
302-
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/livy-update-kerberos-name-rules.sh"
314+
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/livy-update-kerberos-name-rules.sh",
315+
event["ResourceProperties"]["StackRegion"]
303316
]
304317
}
305318
})
@@ -383,7 +396,9 @@ def create(event, context):
383396
"hadoop.proxyuser.livy.groups": "*",
384397
"hadoop.proxyuser.livy.hosts": "*",
385398
"hadoop.proxyuser.hive.hosts": "*",
399+
"hadoop.proxyuser.hue.hosts": "*",
386400
"hadoop.proxyuser.hive.groups": "*",
401+
"hadoop.proxyuser.hue.groups": "*",
387402
"hadoop.proxyuser.trino.hosts": "*",
388403
"hadoop.proxyuser.trino.groups": "*",
389404
"hadoop.proxyuser.hue_hive.groups": "*"
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
set -x
4+
#Variables
5+
if [[ -n "$JAVA_HOME" ]] && [[ -x "$JAVA_HOME/bin/java" ]]; then
6+
echo "found java executable in JAVA_HOME"
7+
else
8+
export JAVA_HOME=/usr/lib/jvm/java-openjdk
9+
fi
10+
if [ -f "/opt/aws/puppet/bin/puppet" ]; then
11+
echo "Puppet found in path"
12+
puppet_cmd='/opt/aws/puppet/bin/puppet'
13+
else
14+
puppet_cmd='puppet'
15+
fi
16+
sudo -E bash -c 'echo $JAVA_HOME'
17+
installpath=/usr/lib/ranger
18+
ranger_server_fqdn=$1
19+
#mysql_jar_location=http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.39/mysql-connector-java-5.1.39.jar
20+
mysql_jar=mysql-connector-java-5.1.39.jar
21+
default_region=${2-'us-east-1'}
22+
ranger_version=${3-'2.0'}
23+
s3bucket=${4-'aws-bigdata-blog'}
24+
s3bucketKey=${5-'artifacts/aws-blog-emr-ranger'}
25+
project_version=${6-'3.0'}
26+
emr_version=${7-'emr-5.30'}
27+
http_protocol=${8-'https'}
28+
install_cloudwatch_agent_for_audit=${9-'false'}
29+
30+
if [ "$http_protocol" == "https" ]; then
31+
RANGER_HTTP_URL=https://$ranger_server_fqdn:6182
32+
SOLR_HTTP_URL=https://$ranger_server_fqdn:8984
33+
else
34+
RANGER_HTTP_URL=http://$ranger_server_fqdn:6080
35+
SOLR_HTTP_URL=http://$ranger_server_fqdn:8983
36+
fi
37+
38+
ranger_download_version=0.5
39+
40+
emr_release_version_regex="^emr-6.*"
41+
if [[ ("$emr_version" =~ $emr_release_version_regex) && ("$ranger_version" == "2.0") ]]; then
42+
ranger_download_version=2.2.0-SNAPSHOT
43+
elif [ "$ranger_version" == "2.0" ]; then
44+
ranger_download_version=2.1.0-SNAPSHOT
45+
else
46+
ranger_download_version=1.1.0
47+
fi
48+
49+
ranger_s3bucket=s3://${s3bucket}/${s3bucketKey}/ranger/ranger-$ranger_download_version
50+
ranger_hdfs_plugin=ranger-$ranger_download_version-hdfs-plugin
51+
52+
## --- SSL Config ---
53+
54+
## Cert configuration
55+
certs_s3_location=s3://${s3bucket}/${s3bucketKey}/${project_version}/emr-tls/
56+
certs_path="/tmp/certs"
57+
58+
ranger_agents_certs_path="${certs_path}/ranger-agents-certs"
59+
ranger_server_certs_path="${certs_path}/ranger-server-certs"
60+
61+
truststore_ranger_server_alias="rangerServerTrust"
62+
keystore_alias="rangerAgent"
63+
truststore_password="changeit"
64+
keystore_password="changeit"
65+
truststore_location="/etc/hadoop/conf/ranger-plugin-truststore.jks"
66+
keystore_location="/etc/hadoop/conf/ranger-plugin-keystore.jks"
67+
68+
mkdir -p ${ranger_agents_certs_path}
69+
mkdir -p ${ranger_server_certs_path}
70+
71+
sudo yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm || true
72+
sudo yum install jq -y
73+
# setup ranger plugin keystore and trust store
74+
aws secretsmanager get-secret-value --secret-id emr/rangerPluginCert --version-stage AWSCURRENT --region $default_region | jq -r ".SecretString" >${ranger_agents_certs_path}/certificateChain.pem
75+
aws secretsmanager get-secret-value --secret-id emr/rangerGAagentkey --version-stage AWSCURRENT --region $default_region | jq -r ".SecretString" >${ranger_agents_certs_path}/privateKey.pem
76+
77+
openssl pkcs12 -export -in ${ranger_agents_certs_path}/certificateChain.pem -inkey ${ranger_agents_certs_path}/privateKey.pem -chain -CAfile ${ranger_agents_certs_path}/certificateChain.pem -name ${keystore_alias} -out ${ranger_agents_certs_path}/keystore.p12 -password pass:${keystore_password}
78+
keytool -delete -alias ${keystore_alias} -keystore ${keystore_location} -storepass ${keystore_password} -noprompt || true
79+
sudo keytool -importkeystore -deststorepass ${keystore_password} -destkeystore ${keystore_location} -srckeystore ${ranger_agents_certs_path}/keystore.p12 -srcstoretype PKCS12 -srcstorepass ${keystore_password}
80+
sudo chmod 444 ${keystore_location}
81+
# -----
82+
83+
# setup ranger admin server trust store
84+
aws secretsmanager get-secret-value --secret-id emr/rangerGAservercert --version-stage AWSCURRENT --region $default_region | jq -r ".SecretString" > ${ranger_server_certs_path}/trustedCertificates.pem
85+
86+
sudo keytool -delete -alias ${truststore_ranger_server_alias} -keystore ${truststore_location} -storepass ${truststore_password} -noprompt || true
87+
sudo keytool -import -file ${ranger_server_certs_path}/trustedCertificates.pem -alias ${truststore_ranger_server_alias} -keystore ${truststore_location} -storepass ${truststore_password} -noprompt
88+
# -----
89+
90+
#Setup
91+
sudo rm -rf $installpath/*hdfs*
92+
sudo mkdir -p $installpath/hadoop
93+
sudo chmod -R 777 $installpath
94+
cd $installpath
95+
#wget $mysql_jar_location
96+
aws s3 cp $ranger_s3bucket/$mysql_jar . --region us-east-1
97+
aws s3 cp $ranger_s3bucket/$ranger_hdfs_plugin.tar.gz . --region us-east-1
98+
mkdir $ranger_hdfs_plugin
99+
tar -xvf $ranger_hdfs_plugin.tar.gz -C $ranger_hdfs_plugin --strip-components=1
100+
cd $installpath/$ranger_hdfs_plugin
101+
102+
## Updates for new Ranger
103+
mkdir -p /usr/lib/ranger/hadoop/etc
104+
sudo ln -s /etc/hadoop /usr/lib/ranger/hadoop/etc/
105+
sudo ln -s /usr/lib/ranger/hadoop/etc/hadoop/conf/hdfs-site.xml /usr/lib/ranger/hadoop/etc/hadoop/hdfs-site.xml || true
106+
sudo cp -r $installpath/$ranger_hdfs_plugin/lib/* /usr/lib/hadoop-hdfs/lib/
107+
sudo cp /usr/lib/hadoop-hdfs/lib/ranger-hdfs-plugin-impl/*.jar /usr/lib/hadoop-hdfs/lib/ || true
108+
sudo ln -s /etc/hadoop/ /usr/lib/ranger/hadoop/
109+
110+
## Copy the keystore and strustone information
111+
sudo cp /etc/hive/conf/ranger-plugin-keystore.jks /etc/hadoop/conf/
112+
sudo cp /etc/hive/conf/ranger-keystore-creds.jceks /etc/hadoop/conf/
113+
sudo cp /etc/hive/conf/ranger-plugin-truststore.jks /etc/hadoop/conf/
114+
sudo cp /etc/hive/conf/ranger-truststore-creds.jceks /etc/hadoop/conf/
115+
#SSL configs
116+
sudo sed -i "s|POLICY_MGR_URL=.*|POLICY_MGR_URL=$RANGER_HTTP_URL|g" install.properties
117+
sudo sed -i "s|SSL_TRUSTSTORE_FILE_PATH=.*|SSL_TRUSTSTORE_FILE_PATH=${truststore_location}|g" install.properties
118+
sudo sed -i "s|SSL_TRUSTSTORE_PASSWORD=.*|SSL_TRUSTSTORE_PASSWORD=${truststore_password}|g" install.properties
119+
sudo sed -i "s|SSL_KEYSTORE_FILE_PATH=.*|SSL_KEYSTORE_FILE_PATH=${keystore_location}|g" install.properties
120+
sudo sed -i "s|SSL_KEYSTORE_PASSWORD=.*|SSL_KEYSTORE_PASSWORD=${keystore_password}|g" install.properties
121+
122+
#Update Ranger URL in HDFS conf
123+
sudo sed -i "s|SQL_CONNECTOR_JAR=.*|SQL_CONNECTOR_JAR=$installpath/$mysql_jar|g" install.properties
124+
sudo sed -i "s|REPOSITORY_NAME=.*|REPOSITORY_NAME=hadoopdev|g" install.properties
125+
sudo sed -i "s|XAAUDIT.SOLR.ENABLE=.*|XAAUDIT.SOLR.ENABLE=true|g" install.properties
126+
sudo sed -i "s|XAAUDIT.SOLR.URL=.*|XAAUDIT.SOLR.URL=$SOLR_HTTP_URL/solr/ranger_audits|g" install.properties
127+
sudo sed -i "s|XAAUDIT.SOLR.SOLR_URL=.*|XAAUDIT.SOLR.SOLR_URL=$SOLR_HTTP_URL/solr/ranger_audits|g" install.properties
128+
129+
#Filecache to write to local file system
130+
sudo mkdir -p /var/log/ranger/audit/
131+
sudo chmod -R 777 /var/log/ranger/audit/
132+
133+
#to solve java.lang.NoClassDefFoundError: org/apache/commons/configuration/Configuration
134+
sed -i 's|jceks://file|localjceks://file|g' enable-hdfs-plugin.sh
135+
136+
#Filecache to write to local file system
137+
if [ "$install_cloudwatch_agent_for_audit" == "true" ]; then
138+
sudo mkdir -p /var/log/ranger/audit/
139+
sudo chmod -R 777 /var/log/ranger/audit/
140+
sudo sed -i "s|XAAUDIT.FILECACHE.IS_ENABLED=.*|XAAUDIT.FILECACHE.IS_ENABLED=true|g" install.properties
141+
sudo sed -i "s|XAAUDIT.FILECACHE.FILE_SPOOL_DIR=.*|XAAUDIT.FILECACHE.FILE_SPOOL_DIR=/var/log/ranger/audit/|g" install.properties
142+
sudo sed -i "s|XAAUDIT.FILECACHE.FILE_SPOOL.ROLLOVER.SECS=.*|XAAUDIT.FILECACHE.FILE_SPOOL.ROLLOVER.SECS=30|g" install.properties
143+
sudo sed -i "s|XAAUDIT.FILECACHE.FILE_SPOOL.MAXFILES=.*|XAAUDIT.FILECACHE.FILE_SPOOL.MAXFILES=10|g" install.properties
144+
fi
145+
146+
sudo -E bash enable-hdfs-plugin.sh
147+
# new copy cammand - 01/26/2020
148+
sudo cp -r /etc/hadoop/ranger-*.xml /etc/hadoop/conf/
149+
150+
#---- Restart Namenode
151+
sudo ${puppet_cmd} apply -e 'service { "hadoop-hdfs-namenode": ensure => false, }'
152+
sudo ${puppet_cmd} apply -e 'service { "hadoop-hdfs-namenode": ensure => true, }'

aws_emr_blog_v3/scripts/emr-steps/livy-update-kerberos-name-rules.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,20 @@
11
#!/bin/bash
22
set -euo pipefail
33
set -x
4+
AWS_REGION={$1-'us-east-1'}
5+
DEFAULT_EC2_REALM='EC2\.INTERNAL'
6+
echo $(tr '[:upper:]' '[:lower:]' <<< "$AWS_REGION")
7+
if [[ $(tr '[:upper:]' '[:lower:]' <<< "$AWS_REGION") = "us-east-1" ]]; then
8+
DEFAULT_EC2_REALM='EC2\.INTERNAL'
9+
echo "AWS region is us-east-1, will use EC2 realm as ec2.internal"
10+
else
11+
DEFAULT_EC2_REALM='COMPUTE\.INTERNAL'
12+
echo "AWS region is NOT us-east-1, will use EC2 realm as compute.internal"
13+
fi
414

515
livy_conf_file='/etc/livy/conf/livy.conf'
616

7-
sudo sh -c "echo 'livy.server.auth.kerberos.name_rules = RULE:[1:\$1@\$0](.*@AWSEMR\.COM)s/@.*///L RULE:[2:\$1@\$0](.*@AWSEMR\.COM)s/@.*///L RULE:[2:\$1@\$0](.*@EC2\.INTERNAL)s/@.*///L' >> $livy_conf_file"
17+
sudo sh -c "echo 'livy.server.auth.kerberos.name_rules = RULE:[1:\$1@\$0](.*@AWSEMR\.COM)s/@.*///L RULE:[2:\$1@\$0](.*@AWSEMR\.COM)s/@.*///L RULE:[2:\$1@\$0](.*@${DEFAULT_EC2_REALM})s/@.*///L' >> $livy_conf_file"
818

919
sudo /opt/aws/puppet/bin/puppet apply -e 'service { "livy-server": ensure => false, }'
1020
sudo /opt/aws/puppet/bin/puppet apply -e 'service { "livy-server": ensure => true, }'

0 commit comments

Comments
 (0)