#!/bin/bash
set -e
#The following need to be provided by modifying the script
GS_BUCKET_NAME=
FUSION_NODE_IP=
function validate_inputs() {
echo "Validating inputs"
if [ -z "$GS_BUCKET_NAME" ]; then
echo "GS_BUCKET_NAME is not set" >&2
exit 1
fi
if [ -z "$FUSION_NODE_IP" ]; then
echo "FUSION_NODE_IP is not set" >&2
exit 1
fi
echo "Script inputs are valid"
}
function validate_gs() {
echo "Validating bucket $GS_BUCKET_NAME."
if ! gsutil ls "gs://$GS_BUCKET_NAME"; then
echo "$GS_BUCKET_NAME is an invalid storage bucket!" >&2
exit 1
fi
echo "$GS_BUCKET_NAME is valid."
}
function validate_fusion_node_ip() {
echo "Validating IP $FUSION_NODE_IP."
if ! ping -c 1 "$FUSION_NODE_IP"; then
echo "Unable to see VM with IP: $FUSION_NODE_IP!" >&2
exit 1
fi
echo "$FUSION_NODE_IP is valid."
}
function install_fusion_client() {
echo "Installing Fusion client."
wget https://storage.googleapis.com/wandisco-public-bucket/fusion-hcfs-gcs-1.1-client-hdfs_2.12.4.deb -O /tmp/fusion-client.deb
dpkg -i /tmp/fusion-client.deb
}
function does_file_exist() {
local target=$1
if [ ! -e "$target" ]; then
echo "Could not find $target." >&2
exit 1
fi
}
function update_config() {
echo "Updating core site configuration."
local core_site_location="/etc/hadoop/conf/core-site.xml"
does_file_exist "$core_site_location"
local valsToInsert+="fs.fusion.underlyingFsgs://$GS_BUCKET_NAME"
valsToInsert+="fusion.server$FUSION_NODE_IP:8023"
valsToInsert+="fs.fusion.implcom.wandisco.fs.client.FusionHcfs"
valsToInsert+="fs.AbstractFileSystem.fusion.implcom.wandisco.fs.client.FusionHcfs"
valsToInsert+=""
sed -i.bk 's##'"$valsToInsert"'#g' "$core_site_location"
echo "Updated core site configuration"
}
function update_yarn_site() {
echo "Updating yarn site configuration."
local yarn_site_location="/etc/hadoop/conf/yarn-site.xml"
does_file_exist "$yarn_site_location"
local valsToInsert+="fs.fusion.implcom.wandisco.fs.client.FusionHcfs"
valsToInsert+="fs.AbstractFileSystem.fusion.implcom.wandisco.fs.client.FusionHcfs"
valsToInsert+=""
sed -i.bk 's##'"$valsToInsert"'#g' "$yarn_site_location"
echo "Updated yarn site configuration"
}
function restart_hadoop_services() {
echo "Restarting Hadoop services"
ROLE=$(/usr/share/google/get_metadata_value attributes/dataproc-role)
if [[ "${ROLE}" != 'Master' ]]; then
sleep 60
/etc/init.d/hadoop-yarn-nodemanager restart
/etc/init.d/hadoop-yarn-nodemanager restart
else
/etc/init.d/hadoop-yarn-resourcemanager restart
fi
}
validate_inputs
validate_gs
validate_fusion_node_ip
install_fusion_client
update_config
update_yarn_site
restart_hadoop_services
echo "Fusion clients are now ready!"