Cloudera Backups

Configuration

# backup directoy/location

/mdclone_shared/backups/

# backup runs daily
# hadoop meta data backups

15 2 ** * bash /root/scripts/rdbms_backup.sh >> /root/scripts/rdbms_backup.log 2>&1

# days of backup retention

retention_days=7

# get cloudera admin password

admin_pw=$(~/.admin.pass)

Script

#!/bin/bash

# configuration

retention_days=7
mysql_schemas=(scm metastore hue amon nav sentry oozie)
postgres1_databases_5432=(ace help lifelux mdclone nlp nlp_old)
postgres2_databases_5432=(ace help lifelux mdclone nlp nlp_old)
postgres2_databases_5433=(notifications dc)

# get cloudera admin password
admin_pw=$(~/.admin.pass)

echo "############### hadoop mdclone meta backups $(date +%d%m%Y) ####################################"

# mysql backups
for i in ${!mysql_schemas[@]}
do
   echo "## $(date +'%F %H:%M:%S'): /usr/bin/mysqldump --single-transaction -u root -B ${mysql_schemas[$i]}"
   echo "## $(date +'%F %H:%M:%S'): creating backup /mdclone_shared/backups/mysql_${mysql_schemas[$i]}.`date +%d%m%Y`.sql.zip"
   /usr/bin/mysqldump --single-transaction -u root -B ${mysql_schemas[$i]} | /bin/zip > /mdclone_shared/backups/mysql_${mysql_schemas[$i]}.`date +%d%m%Y`.sql.zip
   echo "## $(date +'%F %H:%M:%S'): mysqldump completed"
done

# postgres backups
for i in ${!postgres1_databases_5432[@]}
do
   echo "## $(date +'%F %H:%M:%S'): /usr/bin/pg_dump -U postgres ${postgres1_databases_5432[$i]}"
   echo "## $(date +'%F %H:%M:%S'): creating backup /mdclone_shared/backups/postgres_${postgres1_databases_5432[$i]}.`date +%d%m%Y`.sql.zip"
   /usr/bin/pg_dump -h 127.0.0.1 -U postgres ${postgres1_databases_5432[$i]} | /bin/zip > /mdclone_shared/backups/cm1_postgres_${postgres1_databases_5432[$i]}.`date +%d%m%Y`.sql.zip
   echo "## $(date +'%F %H:%M:%S'): pg_dump completed"
done

for i in ${!postgres2_databases_5432[@]}
do
   echo "## $(date +'%F %H:%M:%S'): /usr/bin/pg_dump -U postgres ${postgres2_databases_5432[$i]}"
   echo "## $(date +'%F %H:%M:%S'): creating backup /mdclone_shared/backups/postgres_${postgres2_databases_5432[$i]}.`date +%d%m%Y`.sql.zip"
   /usr/bin/pg_dump -h app-mdclone-cm-2.ris.wustl.edu -U postgres ${postgres2_databases_5432[$i]} | /bin/zip > /mdclone_shared/backups/cm2_postgres_${postgres2_databases_5432[$i]}.`date +%d%m%Y`.sql.zip
   echo "## $(date +'%F %H:%M:%S'): pg_dump completed"
done

for i in ${!postgres2_databases_5433[@]}
do
   echo "## $(date +'%F %H:%M:%S'): /usr/bin/pg_dump -U postgres ${postgres2_databases_5433[$i]}"
   echo "## $(date +'%F %H:%M:%S'): creating backup /mdclone_shared/backups/postgres_${postgres2_databases_5433[$i]}.`date +%d%m%Y`.sql.zip"
   /usr/bin/pg_dump -h app-mdclone-cm-2.ris.wustl.edu -p 5433 -U postgres ${postgres2_databases_5433[$i]} | /bin/zip > /mdclone_shared/backups/cm2_postgres_${postgres2_databases_5433[$i]}.`date +%d%m%Y`.sql.zip
   echo "## $(date +'%F %H:%M:%S'): pg_dump completed"
done

# backup cloudera configuration
curl -u admin:$admin_pw "http://localhost:7180/api/v48/cm/deployment"  | /bin/zip > /mdclone_shared/backups/cloudera_config.`date +%d%m%Y`.scm.zip

# backup file cleanup
echo "removing backup files:"
find /mdclone_shared/backups/ -name "*.sql.zip" -ctime ${retention_days} -exec ls -l {} \;
find /mdclone_shared/backups/ -name "*.scm.zip" -ctime ${retention_days} -exec ls -l {} \;
find /mdclone_shared/backups/ -name "*.sql.zip" -ctime ${retention_days} -exec /bin/rm -vf {} \;
find /mdclone_shared/backups/ -name "*.scm.zip" -ctime ${retention_days} -exec /bin/rm -vf {} \;

Updated on August 7, 2025