Scripts to dump and restore PostgreSQL databases

This commit is contained in:
2021-12-14 06:35:56 +09:00
commit 52ebf9671c
3 changed files with 1550 additions and 0 deletions

661
pg_db_dump_file.sh Executable file
View File

@@ -0,0 +1,661 @@
#!/bin/bash
set -e -u -o pipefail
# dumps all the databases in compressed (custom) format
# EXCLUDE: space seperated list of database names to be skipped
# KEEP: how many files to keep, eg 3 means, keep 3 days + todays backup
# the file format includes the database name and owner:
# <database name>.<owner>.<database type>-<version>_<host>_<port>_<date in YYYYMMDD>_<time in HHMM>_<sequenze in two digits with leading 0>.sql
function usage ()
{
cat <<- EOT
Usage: ${0##/*/} [-t] [-s] [-g] [-c] [-r|-a] [-k <number to keep>] [-n] [-b <path>] [-i <postgreSQL version>] [-d <dump database name> [-d ...]] [-e <exclude dump> [-e ...]] [-u <db user>] [-h <db host>] [-p <db port>] [-l <db password>] [-L <log path>]
-t: test usage, no real backup is done
-s: turn ON ssl mode, default mode is off
-g: turn OFF dumping globals data, default is dumping globals data
-c: run clean up of old files before data is dumped. Default is run after data dump.
-k <number>: keep how many backups, default is 3 days/files
-n: keep files in numbers not in days
-b <path>: backup target path, if not set, /mnt/backup/db_dumps_fc/ is used
-i <version>: override automatically set database version
-d <name>: database name to dump, option can be given multiple times, if not set all databases are dumped
-e <name>: exclude database from dump, option can be given multiple times, if not set non are excluded
-u <db user>: default is 'postgres'
-h <db host>: default is none
-p <db port>: default port is '5432'
-l <db password>: default password is empty
-r: use redhat base paths instead of debian
-a: use amazon base paths instead of debian
-L <log path>: where to put the dump log file, if not set tries to use PostgreSQL log folder
EOT
}
TEST=0; # if set to 1 will run script without doing anything
SSLMODE='disable';
GLOBALS=1; # if set to 0 will not dump globals
# in days, keeps KEEP+1 files, today + KEEP days before
# or keep number of files if CLEAN_NUMBER is true, then it can't be 0
KEEP=3;
CLEAN_NUMBER=0;
BACKUPDIR='';
DB_VERSION='';
DB_USER='';
DB_PASSWD='';
DB_HOST='';
DB_PORT='';
EXCLUDE=''; # space separated list of database names
INCLUDE=''; # space seperated list of database names
BC='/usr/bin/bc';
PRE_RUN_CLEAN_UP=0;
SET_IDENT=0;
PORT_REGEX="^[0-9]{4,5}$";
OPTARG_REGEX="^-";
# log path
LOG_PATH='';
# base path for PostgreSQL binary
DBPATH_BASE='';
# defaults
_BACKUPDIR='/mnt/backup/db_dumps_fc/';
_DB_VERSION=$(pgv=$(pg_dump --version| grep "pg_dump" | cut -d " " -f 3); if [[ $(echo "${pgv}" | cut -d "." -f 1) -ge 10 ]]; then echo "${pgv}" | cut -d "." -f 1; else echo "${pgv}" | cut -d "." -f 1,2; fi );
_DB_USER='postgres';
_DB_PASSWD='';
_DB_HOST='';
_DB_PORT=5432;
_EXCLUDE=''; # space separated list of database names
_INCLUDE=''; # space seperated list of database names
REDHAT=0;
AMAZON=0;
CONN_DB_HOST='';
ERROR=0;
# set options
while getopts ":ctsgnk:b:i:d:e:u:h:p:l:L:ram" opt; do
# pre test for unfilled
if [ "${opt}" = ":" ] || [[ "${OPTARG-}" =~ ${OPTARG_REGEX} ]]; then
if [ "${opt}" = ":" ]; then
CHECK_OPT=${OPTARG};
else
CHECK_OPT=${opt};
fi;
case ${CHECK_OPT} in
k)
echo "-k needs a number";
ERROR=1;
;;
b)
echo "-b needs a path";
ERROR=1;
;;
i)
echo "-i needs an ident";
ERROR=1;
;;
u)
echo "-u needs a user name";
ERROR=1;
;;
h)
echo "-h needs a host name";
ERROR=1;
;;
p)
echo "-p needs a port number";
ERROR=1;
;;
l)
echo "-l needs a login password";
ERROR=1;
;;
d)
echo "-d needs a database name";
ERROR=1;
;;
e)
echo "-e needs a database name";
ERROR=1;
;;
L)
echo "-L needs a log path";
ERROR=1;
;;
esac
fi;
# set options
case ${opt} in
t|test)
TEST=1;
;;
g|globals)
GLOBALS=0;
;;
c|clean-up-before)
PRE_RUN_CLEAN_UP=1;
;;
s|sslmode)
SSLMODE=enable;
;;
k|keep)
KEEP=${OPTARG};
;;
n|number-keep)
CLEAN_NUMBER=1;
;;
b|backuppath)
if [ -z "${BACKUPDIR}" ]; then
BACKUPDIR="${OPTARG}";
fi;
;;
i|ident)
if [ -z "${DB_VERSION}" ]; then
DB_VERSION=${OPTARG};
SET_IDENT=1;
fi;
;;
u|user)
if [ -z "${DB_USER}" ]; then
DB_USER=${OPTARG};
fi;
;;
h|hostname)
if [ -z "${DB_HOST}" ]; then
DB_HOST=${OPTARG};
fi;
;;
p|port)
if [ -z "${DB_PORT}" ]; then
DB_PORT=${OPTARG};
fi;
;;
l|login)
if [ -z "${DB_PASSWD}" ]; then
DB_PASSWD=${OPTARG};
fi;
;;
d|database)
if [ ! -z "${INCLUDE}" ]; then
INCLUDE=${INCLUDE}" ";
fi;
INCLUDE=${INCLUDE}${OPTARG};
;;
e|exclude)
if [ ! -z "${EXCLUDE}" ]; then
EXCLUDE=${EXCLUDE}" ";
fi;
EXCLUDE=${EXCLUDE}${OPTARG};
;;
r|redhat)
REDHAT=1;
;;
a|amazon)
AMAZON=1;
;;
L|logpath)
if [ ! -z "{$LOG_PATH}" ]; then
LOG_PATH="${OPTARG}";
fi;
;;
m|manual)
usage;
exit 0;
;;
:)
echo "Option -$OPTARG requires an argument."
;;
\?)
echo -e "\n Option does not exist: ${OPTARG}\n";
usage;
exit 1;
;;
esac;
done;
if [ ${ERROR} -eq 1 ]; then
exit 0;
fi;
if [ "${REDHAT}" -eq 1 ] && [ "${AMAZON}" -eq 1 ]; then
echo "You cannot set the -a and -r flag at the same time";
exit 0;
fi;
# if we have numeric keep and keep number is set to 0 abort
if [ ${CLEAN_NUMBER} -eq 1 ] && [ ${KEEP} -lt 1 ]; then
echo "If keep in numbers is on, keep must be at least 1 or higher";
exit 0;
fi;
# set the defaults
for name in BACKUPDIR DB_VERSION DB_USER DB_PASSWD DB_HOST DB_PORT EXCLUDE INCLUDE; do
# assign it to the real name if the real name is empty
if [ -z "${!name}" ]; then
# add the _ for the default name
default="_"${name};
eval ${name}=\${!default};
fi;
done;
# check base paths depending on installation and set log file
if [ "${REDHAT}" -eq 1 ]; then
# Redhat base path (for non official ones would be '/usr/pgsql-'
# This is also for Amazon NEWER (after 9.6)
PG_BASE_PATH="/usr/pgsql-";
# I assume that as default
LOG_PATH="/var/lib/pgsql/${DB_VERSION}/data/log/";
elif [ "${AMAZON}" -eq 1 ]; then
# only older 9.6 or before
PG_BASE_PATH="/usr/lib64/pgsql";
# LOG PATH, will be attached to DB VERSION
LOG_PATH="/var/lib/pgsql${DB_VERSION}/data/pg_log";
else
# Debian base path
PG_BASE_PATH="/usr/lib/postgresql/";
LOG_PATH="/var/log/postgresql/";
fi;
# setup log before everything else
LOG="${LOG_PATH}pg_db_dump_file.log";
# if we cannot write to the log file abort
if [[ -f "${LOG}" && ! -w "${LOG}" ]] || [[ ! -f "${LOG}" && ! -w "${LOG_PATH}" ]]; then
echo "Cannot write to ${LOG} or create a new log file in ${LOG_PATH}";
exit;
fi;
# log to file and also write to stdout
exec &> >(tee -a "${LOG}");
# check DB port is valid number
if ! [[ "${DB_PORT}" =~ ${PORT_REGEX} ]]; then
echo "The port needs to be a valid number: ${_port}";
exit 0;
fi;
# check if we have the 'bc' command available or not
if [ -f "${BC}" ]; then
BC_OK=1;
else
BC_OK=0;
fi;
# if DB_HOST is set, we need to add -h to the command line
# if nothing is set, DB_HOST is set to local so we know this is a "port" connection for later automatic restore
if [ -z "${DB_HOST}" ]; then
DB_HOST='local';
else
CONN_DB_HOST='-h '${DB_HOST};
fi;
# set the binaries we need
PG_PATH=${PG_BASE_PATH}${DB_VERSION}'/bin/';
PG_PSQL=${PG_PATH}'psql';
PG_DUMP=${PG_PATH}'pg_dump';
PG_DUMPALL=${PG_PATH}'pg_dumpall';
DB_TYPE='pgsql';
db='';
# core abort if no core files found
if [ ! -f ${PG_PSQL} ] || [ ! -f ${PG_DUMP} ] || [ ! -f ${PG_DUMPALL} ]; then
echo "One of the core binaries (psql, pg_dump, pg_dumpall) could not be found.";
echo "Search Path: ${PG_PATH}";
echo "Perhaps manual ident set with -i is necessary";
echo "Backup aborted";
exit 0;
fi;
if [ ! -d ${BACKUPDIR} ] ; then
if ! mkdir ${BACKUPDIR} ; then
echo "Cannot create backup directory: ${BACKUPDIR}"
exit 0;
fi
fi
# check if we can write into that folder
touch ${BACKUPDIR}/tmpfile || echo "[!] touch failed";
if [ ! -f ${BACKUPDIR}/tmpfile ]; then
echo "Cannot write to ${BACKUPDIR}";
exit 0;
else
rm -f ${BACKUPDIR}/tmpfile;
fi;
# if backupdir is "." rewrite to pwd
if [ "${BACKUPDIR}" == '.' ]; then
BACKUPDIR=$(pwd);
fi;
# check if we can connect to template1 table, if not we abort here
connect=$(${PG_PSQL} -U "${DB_USER}" ${CONN_DB_HOST} -p ${DB_PORT} -d template1 -t -A -F "," -X -q -c "SELECT datname FROM pg_catalog.pg_database WHERE datname = 'template1';") || echo "[!] pgsql connect error";
if [ "${connect}" != "template1" ]; then
echo "Failed to connect to template1 with user '${DB_USER}' at host '${DB_HOST}' on port '${DB_PORT}'";
exit 0;
fi;
# if we have an ident override set, set a different DUMP VERSION here than the automatic one
if [ "${SET_IDENT}" -eq 1 ]; then
DUMP_DB_VERSION=$(pgv=$(${PG_PATH}/pg_dump --version| grep "pg_dump" | cut -d " " -f 3); if [[ $(echo "${pgv}" | cut -d "." -f 1) -ge 10 ]]; then echo "${pgv}" | cut -d "." -f 1; else echo "${pgv}" | cut -d "." -f 1,2; fi );
else
DUMP_DB_VERSION=${DB_VERSION};
fi;
# turn of ssl
# comment line out, if SSL connection is wanted
export PGSSLMODE=${SSLMODE};
# METHOD: convert_time
# PARAMS: timestamp in seconds or with milliseconds (nnnn.nnnn)
# RETURN: formated string with human readable time (d/h/m/s)
# CALL : var=$(convert_time $timestamp);
# DESC : converts a timestamp or a timestamp with float milliseconds to a human readable format
# output is in days/hours/minutes/seconds
function convert_time
{
timestamp=${1};
# round to four digits for ms
timestamp=$(printf "%1.4f" $timestamp);
# get the ms part and remove any leading 0
ms=$(echo ${timestamp} | cut -d "." -f 2 | sed -e 's/^0*//');
timestamp=$(echo ${timestamp} | cut -d "." -f 1);
timegroups=(86400 3600 60 1); # day, hour, min, sec
timenames=("d" "h" "m" "s"); # day, hour, min, sec
output=( );
time_string='';
for timeslice in ${timegroups[@]}; do
# floor for the division, push to output
if [ ${BC_OK} -eq 1 ]; then
output[${#output[*]}]=$(echo "${timestamp}/${timeslice}" | bc);
timestamp=$(echo "${timestamp}%${timeslice}" | bc);
else
output[${#output[*]}]=$(awk "BEGIN {printf \"%d\", ${timestamp}/${timeslice}}");
timestamp=$(awk "BEGIN {printf \"%d\", ${timestamp}%${timeslice}}");
fi;
done;
for ((i=0; i<${#output[@]}; i++)); do
if [ ${output[$i]} -gt 0 ] || [ ! -z "$time_string" ]; then
if [ ! -z "${time_string}" ]; then
time_string=${time_string}" ";
fi;
time_string=${time_string}${output[$i]}${timenames[$i]};
fi;
done;
# milliseconds must be filled, but we also check that they are non "nan" string
# that can appear in the original value
if [ ! -z ${ms} ] && [ "${ms}" != "nan" ]; then
if [ ${ms} -gt 0 ]; then
time_string=${time_string}" "${ms}"ms";
fi;
fi;
# just in case the time is 0
if [ -z "${time_string}" ]; then
time_string="0s";
fi;
echo -n "${time_string}";
}
# METHOD: convert_bytes
# PARAMS: size in bytes
# RETURN: human readable byte data in TB/GB/MB/KB/etc
# CALL : size=$(convert_bytes $bytes);
# DESC : converts bytes into human readable format with 2 decimals
function convert_bytes
{
bytes=${1};
# use awk to calc it
echo -n $(echo ${bytes} | awk 'function human(x) {
s=" B KB MB GB TB EB PB YB ZB"
while (x>=1024 && length(s)>1)
{x/=1024; s=substr(s,4)}
s=substr(s,1,4)
xf=(s==" B ")?"%d ":"%.2f"
return sprintf( xf"%s\n", x, s)
}
{gsub(/^[0-9]+/, human($1)); print}');
}
# METHOD: get_dump_file_name
# PARAMS: none
# RETURN: dump file name (echo)
# CALL : var=$(get_dump_file_name);
# DESC : function for getting the correct dump file
function get_dump_file_name
{
# set base search for the files
sequence=*;
if [ ${db} ]; then
db_name=${db}"."${owner}"."${encoding}".";
else
db_name="pg_globals."${DB_USER}".NONE.";
fi;
file=${BACKUPDIR}"/"${db_name}${DB_TYPE}"-"${DUMP_DB_VERSION}"_"${DB_HOST}"_"${DB_PORT}"_"$(date +%Y%m%d)"_"$(date +%H%M)"_"${sequence}".c.sql";
seq='';
# we need to find the next sequence number
for i in $(ls -1 ${file} 2>/dev/null); do
# get the last sequence and cut any leading 0 so we can run +1 on it
seq=$(echo $i | cut -d "." -f 3 | cut -d "_" -f 4 | sed -e "s/^0//g");
done;
if [ ! -z ${seq} ]; then
# add +1 and if < 10 prefix with 0
let seq=${seq}+1;
if [ ${seq} -lt 10 ]; then
sequence="0"${seq};
else
sequence=${seq};
fi;
else
sequence="01";
fi;
# now build correct file name
filename=${BACKUPDIR}"/"${db_name}${DB_TYPE}"-"${DUMP_DB_VERSION}"_"${DB_HOST}"_"${DB_PORT}"_"$(date +%Y%m%d)"_"$(date +%H%M)"_"${sequence}".c.sql";
echo "${filename}";
}
# METHOD: get_dump_databases
# PARAMS: none
# RETURN: none
# CALLS : var=$(get_dump_databases)
# DESC : this is needd only for clean up run if the clean up is run before
# the actually database dump
# fills up the global search names aray
function get_dump_databases
{
search_names=();
if [ ${GLOBALS} -eq 1 ]; then
search_names+=("pg_globals.*");
fi;
for owner_db in $(${PG_PSQL} -U ${DB_USER} ${CONN_DB_HOST} -p ${DB_PORT} -d template1 -t -A -F "," -X -q -c "SELECT pg_catalog.pg_get_userbyid(datdba) AS owner, datname, pg_catalog.pg_encoding_to_char(encoding) FROM pg_catalog.pg_database WHERE datname "\!"~ 'template(0|1)';"); do
db=$(echo ${owner_db} | cut -d "," -f 2);
# check if we exclude this db
exclude=0;
include=0;
for excl_db in ${EXCLUDE}; do
if [ "${db}" = "${excl_db}" ]; then
exclude=1;
break;
fi;
done;
if [ ! -z "${INCLUDE}" ]; then
for incl_db in ${INCLUDE}; do
if [ "${db}" = "${incl_db}" ]; then
include=1;
break;
fi;
done;
else
include=1;
fi;
if [ ${exclude} -eq 0 ] && [ ${include} -eq 1 ]; then
search_names+=("${db}.*");
fi;
done;
}
# METHOD: clean_up
# PARAMS: none
# RETURN: none
# CALL : $(clean_up);
# DESC : checks for older files than given keep time/amount and removes them
function clean_up
{
if [ -d ${BACKUPDIR} ]; then
if [ ${CLEAN_NUMBER} -eq 0 ]; then
echo "Cleanup older than ${KEEP} days backup in ${BACKUPDIR}";
else
echo "Cleanup up, keep only ${KEEP} backups in ${BACKUPDIR}";
# if we run clean before backup, we need to clean up +1
# so if we keep one, we must remove all data before running new
if [ ${PRE_RUN_CLEAN_UP} -eq 1 ]; then
let KEEP=${KEEP}-1;
fi;
fi;
# build the find string based on the search names patter
find_string='';
for name in "${search_names[@]}"; do
# for not number based, we build the find string here
# else we do the delete here already
if [ ${CLEAN_NUMBER} -eq 0 ]; then
if [ ! -z "${find_string}" ]; then
find_string=${find_string}' -o ';
fi;
find_string=${find_string}"-mtime +${KEEP} -name "${name}${DB_TYPE}*.sql" -type f -delete -print";
echo "- Remove old backups for '${name}'";
else
# if we do number based delete of old data, but only if the number of
# files is bigger than the keep number or equal if we do PRE_RUN_CLEAN_UP
# this can be error, but we allow it -> script should not abort here
# note we have a wildcard in the name, so we can't put that into ""
count=$(ls "${BACKUPDIR}/"${name}${DB_TYPE}*.sql 2>/dev/null | wc -l) || true;
if [ ${PRE_RUN_CLEAN_UP} -eq 1 ]; then
let count=${count}+1;
fi;
if [ ${count} -gt ${KEEP} ]; then
# calculate the amount to delete
# eg if we want to keep 1, and we have 3 files then we need to delete 2
# keep is always +1 (include the to backup count).
# count is +1 if we do a pre-run cleanup
# grouped by db name, db type
let TO_DELETE=${count}-${KEEP};
echo "- Remove old backups for '${name}', found ${count}, will delete ${TO_DELETE}";
if [ ${TEST} -eq 0 ]; then
ls -tr "${BACKUPDIR}/"${name}${DB_TYPE}*.sql 2>/dev/null | head -n ${TO_DELETE} | xargs rm;
else
echo "ls -tr ${BACKUPDIR}/${name}${DB_TYPE}*.sql 2>/dev/null | head -n ${TO_DELETE} | xargs rm";
fi;
fi;
fi;
done;
# if we do find (day based) delete of old data
if [ ${CLEAN_NUMBER} -eq 0 ]; then
if [ ${TEST} -eq 0 ]; then
find ${BACKUPDIR} ${find_string};
else
echo "find ${BACKUPDIR} ${find_string}";
fi;
fi;
fi
}
if [ ! -z "${DB_PASSWD}" ]; then
export PGPASSWORD=${DB_PASSWD};
fi;
START=$(date "+%s");
echo "=======================================================================>";
printf "Starting at %s\n" "$(date '+%Y-%m-%d %H:%M:%S')";
echo "Target dump directory is: ${BACKUPDIR}";
echo "Keep ${KEEP} backups";
# if flag is set, do pre run clean up
if [ ${PRE_RUN_CLEAN_UP} -eq 1 ]; then
get_dump_databases;
clean_up;
fi;
echo "Backing up databases:";
# reset search name list for actual dump
search_names=();
# dump globals
if [ ${GLOBALS} -eq 1 ]; then
echo -e -n "+ Dumping globals ... "
# reset any previous set db name from deletes so the correct global file name is set
db='';
filename=$(get_dump_file_name);
search_names+=("pg_globals.*"); # this is used for the find/delete part
if [ ${TEST} -eq 0 ]; then
${PG_DUMPALL} -U ${DB_USER} ${CONN_DB_HOST} -p ${DB_PORT} --globals-only > "${filename}";
else
echo "${PG_DUMPALL} -U ${DB_USER} ${CONN_DB_HOST} -p ${DB_PORT} --globals-only > ${filename}";
fi;
echo "done";
else
echo "- Skip dumping globals";
fi;
echo -n "(+) Dump databases: ";
if [ -z "${INCLUDE}" ]; then
echo "All";
else
echo ${INCLUDE};
fi;
echo -n "(-) Exclude databases: ";
if [ -z "${EXCLUDE}" ]; then
echo "None";
else
echo ${EXCLUDE};
fi;
filesize_sum=0;
for owner_db in $(${PG_PSQL} -U ${DB_USER} ${CONN_DB_HOST} -p ${DB_PORT} -d template1 -t -A -F "," -X -q -c "SELECT pg_catalog.pg_get_userbyid(datdba) AS owner, datname, pg_catalog.pg_encoding_to_char(encoding) AS encoding FROM pg_catalog.pg_database WHERE datname "\!"~ 'template(0|1)' ORDER BY datname;"); do
# get the user who owns the DB too
owner=$(echo ${owner_db} | cut -d "," -f 1);
db=$(echo ${owner_db} | cut -d "," -f 2);
encoding=$(echo ${owner_db} | cut -d "," -f 3);
# check if we exclude this db
exclude=0;
include=0;
for excl_db in ${EXCLUDE}; do
if [ "${db}" = "${excl_db}" ]; then
exclude=1;
break;
fi;
done;
if [ ! -z "${INCLUDE}" ]; then
for incl_db in ${INCLUDE}; do
if [ "${db}" = "${incl_db}" ]; then
include=1;
break;
fi;
done;
else
include=1;
fi;
if [ ${exclude} -eq 0 ] && [ ${include} -eq 1 ]; then
printf "+ Dumping database: %35s ... " "${db}";
filename=$(get_dump_file_name);
search_names+=("${db}.*");
SUBSTART=$(date "+%s");
if [ ${TEST} -eq 0 ]; then
${PG_DUMP} -U ${DB_USER} ${CONN_DB_HOST} -p ${DB_PORT} -c --format=c ${db} > "${filename}";
else
echo "${PG_DUMP} -U ${DB_USER} ${CONN_DB_HOST} -p ${DB_PORT} -c --format=c ${db} > ${filename}";
fi;
# get the file size for the dumped file and convert it to a human readable format
filesize=0;
if [ -f "${filename}" ]; then
filesize=$(wc -c "${filename}" | cut -f 1 -d ' ');
filesize_sum=$[$filesize+$filesize_sum];
fi;
DURATION=$[$(date "+%s")-${SUBSTART}];
printf "done (%s and %s)\n" "$(convert_time ${DURATION})" "$(convert_bytes ${filesize})";
else
printf -- "- Exclude database: %35s\n" "${db}";
fi;
done
printf "Backup ended at %s\n" "$(date '+%Y-%m-%d %H:%M:%S')";
if [ ! -z "${DB_PASSWD}" ]; then
unset DB_PASSWD;
fi;
if [ ${PRE_RUN_CLEAN_UP} -eq 0 ]; then
clean_up;
fi;
DURATION=$[$(date "+%s")-${START}];
printf "Cleanup ended at %s\n" "$(date '+%Y-%m-%d %H:%M:%S')";
printf "Finished backup in %s with %s\n" "$(convert_time ${DURATION})" "$(convert_bytes ${filesize_sum})";
## END

435
pg_drop_restore.sh Executable file
View File

@@ -0,0 +1,435 @@
#!/bin/bash
# Author: Clemens Schwaighofer
# Description:
# Drop and restore one database
function usage ()
{
cat <<- EOT
Restores a single database dump to a database
Usage: ${0##/*/} -o <DB OWNER> -d <DB NAME> -f <FILE NAME> [-h <DB HOST>] [-p <DB PORT>] [-e <ENCODING>] [-i <POSTGRES VERSION>] [-j <JOBS>] [-s] [-r|-a] [-n]
-o <DB OWNER>: The user who will be owner of the database to be restored
-d <DB NAME>: The database to restore the file to
-f <FILE NAME>: the data that should be loaded
-h <DB HOST>: optional hostname, if not given 'localhost' is used. Use 'local' to use unix socket
-p <DB PORT>: optional port number, if not given '5432' is used
-e <ENCODING>: optional encoding name, if not given 'UTF8' is used
-i <POSTGRES VERSION>: optional postgresql version in the format X.Y, if not given the default is used (current active)
-j <JOBS>: Run how many jobs Parallel. If not set, 2 jobs are run parallel
-s: Restore only schema, no data
-r: use redhat base paths instead of debian
-a: use amazon base paths instead of debian
-n: dry run, do not do anything, just test flow
EOT
}
_port=5432
_host='local';
_encoding='UTF8';
role='';
schema='';
NO_ASK=0;
TEMPLATEDB='template0';
SCHEMA_ONLY=0;
REDHAT=0;
AMAZON=0;
DRY_RUN=0;
BC='/usr/bin/bc';
PORT_REGEX="^[0-9]{4,5}$";
OPTARG_REGEX="^-";
MAX_JOBS='';
# if we have options, set them and then ignore anything below
while getopts ":o:d:h:f:p:e:i:j:raqnms" opt; do
# pre test for unfilled
if [ "${opt}" = ":" ] || [[ "${OPTARG-}" =~ ${OPTARG_REGEX} ]]; then
if [ "${opt}" = ":" ]; then
CHECK_OPT=${OPTARG};
else
CHECK_OPT=${opt};
fi;
case ${CHECK_OPT} in
o)
echo "-o needs an owner name";
ERROR=1;
;;
d)
echo "-d needs a database name";
ERROR=1;
;;
h)
echo "-h needs a host name";
ERROR=1;
;;
f)
echo "-f needs a file name";
ERROR=1;
;;
p)
echo "-h needs a port number";
ERROR=1;
;;
e)
echo "-e needs an encoding";
ERROR=1;
;;
i)
echo "-i needs a postgresql version";
ERROR=1;
;;
j)
echo "-j needs a numeric value for parallel jobs";
ERROR=1;
;;
esac
fi;
case $opt in
o|owner)
if [ -z "$owner" ]; then
owner=$OPTARG;
# if not standard user we need to set restore role
# so tables/etc get set to new user
role="--no-owner --role $owner";
fi;
;;
d|database)
if [ -z "$database" ]; then
database=$OPTARG;
fi;
;;
e|encoding)
if [ -z "$encoding" ]; then
encoding=$OPTARG;
fi;
;;
f|file)
if [ -z "$file" ]; then
file=$OPTARG;
fi;
;;
h|hostname)
if [ -z "$host" ]; then
# if local it is socket
if [ "$OPTARG" != "local" ]; then
host='-h '$OPTARG;
else
host='';
fi;
_host=$OPTARG;
fi;
;;
p|port)
if [ -z "$port" ]; then
port='-p '$OPTARG;
_port=$OPTARG;
fi;
;;
i|ident)
if [ -z "$ident" ]; then
ident=$OPTARG;
fi;
;;
j|jobs)
MAX_JOBS=${OPTARG};
;;
q|quiet)
NO_ASK=1;
;;
r|redhat)
REDHAT=1;
;;
a|amazon)
AMAZON=1;
;;
n|dry-run)
DRY_RUN=1;
;;
s|schema-only)
SCHEMA_ONLY=1
schema='-s';
;;
m|help)
usage;
exit 0;
;;
\?)
echo -e "\n Option does not exist: $OPTARG\n";
usage;
exit 1;
;;
esac;
done;
if [ "$REDHAT" -eq 1 ] && [ "$AMAZON" -eq 1 ]; then
echo "You cannot set the -a and -r flag at the same time";
exit 1;
fi;
# check that the port is a valid number
if ! [[ "$_port" =~ $PORT_REGEX ]]; then
echo "The port needs to be a valid number: $_port";
exit 1;
fi;
NUMBER_REGEX="^[0-9]{1,}$";
# find the max allowed jobs based on the cpu count
# because setting more than this is not recommended
cpu=$(cat /proc/cpuinfo | grep processor | tail -n 1);
_max_jobs=$[ ${cpu##*: }+1 ] # +1 because cpu count starts with 0
# if the MAX_JOBS is not number or smaller 1 or greate _max_jobs
if [ ! -z "${MAX_JOBS}" ]; then
# check that it is a valid number
if ! [[ "$MAX_JOBS" =~ $NUMBER_REGEX ]]; then
echo "Please enter a number for the -j option";
exit 1;
fi;
if [ "${MAX_JOBS}" -lt 1 ] || [ "${MAX_JOBS}" -gt ${_max_jobs} ]; then
echo "The value for the jobs option -j cannot be smaller than 1 or bigger than ${_max_jobs}";
exit 1;
fi;
else
# auto set the MAX_JOBS based on the cpu count
MAX_JOBS=${_max_jobs};
fi;
# check if we have the 'bc' command available or not
if [ -f "${BC}" ]; then
BC_OK=1;
else
BC_OK=0;
fi;
if [ ! -f "${file}" ]; then
echo "File name needs to be provided or file could not be found";
exit 1;
fi;
# METHOD: convert_time
# PARAMS: timestamp in seconds or with milliseconds (nnnn.nnnn)
# RETURN: formated string with human readable time (d/h/m/s)
# CALL : var=$(convert_time $timestamp);
# DESC : converts a timestamp or a timestamp with float milliseconds
# to a human readable format
# output is in days/hours/minutes/seconds
function convert_time
{
timestamp=${1};
# round to four digits for ms
timestamp=$(printf "%1.4f" $timestamp);
# get the ms part and remove any leading 0
ms=$(echo ${timestamp} | cut -d "." -f 2 | sed -e 's/^0*//');
timestamp=$(echo ${timestamp} | cut -d "." -f 1);
timegroups=(86400 3600 60 1); # day, hour, min, sec
timenames=("d" "h" "m" "s"); # day, hour, min, sec
output=( );
time_string=;
for timeslice in ${timegroups[@]}; do
# floor for the division, push to output
if [ ${BC_OK} -eq 1 ]; then
output[${#output[*]}]=$(echo "${timestamp}/${timeslice}" | bc);
timestamp=$(echo "${timestamp}%${timeslice}" | bc);
else
output[${#output[*]}]=$(awk "BEGIN {printf \"%d\", ${timestamp}/${timeslice}}");
timestamp=$(awk "BEGIN {printf \"%d\", ${timestamp}%${timeslice}}");
fi;
done;
for ((i=0; i<${#output[@]}; i++)); do
if [ ${output[$i]} -gt 0 ] || [ ! -z "$time_string" ]; then
if [ ! -z "${time_string}" ]; then
time_string=${time_string}" ";
fi;
time_string=${time_string}${output[$i]}${timenames[$i]};
fi;
done;
if [ ! -z ${ms} ] && [ ${ms} -gt 0 ];; then
time_string=${time_string}" "${ms}"ms";
fi;
# just in case the time is 0
if [ -z "${time_string}" ]; then
time_string="0s";
fi;
echo -n "${time_string}";
}
# for the auto find, we need to get only the filename, and therefore remove all path info
db_file=`basename $file`;
# if file is set and exist, but no owner or database are given, use the file name data to get user & database
if [ -r "$file" ] && ( [ ! "$owner" ] || [ ! "$database" ] || [ ! "$encoding" ] ); then
# file name format is
# <database>.<owner>.<encoding>.<db type>-<version>_<host>_<port>_<date>_<time>_<sequence>
# we only are interested in the first two
_database=`echo $db_file | cut -d "." -f 1`;
_owner=`echo $db_file | cut -d "." -f 2`;
__encoding=`echo $db_file | cut -d "." -f 3`;
# set the others as optional
_ident=`echo $db_file | cut -d "." -f 4 | cut -d "-" -f 2`; # db version first part
_ident=$_ident'.'`echo $db_file | cut -d "." -f 5 | cut -d "_" -f 1`; # db version, second part (after .)
__host=`echo $db_file | cut -d "." -f 4 | cut -d "_" -f 2`;
__port=`echo $db_file | cut -d "." -f 4 | cut -d "_" -f 3`;
# if any of those are not set, override by the file name settings
if [ ! "$owner" ]; then
owner=$_owner;
fi;
if [ ! "$database" ]; then
database=$_database;
fi;
# port hast to be a valid number, at least 4 digits long and maximum 5 digits
if [ ! "$port" ] && [[ $__port =~ $PORT_REGEX ]] ; then
port='-p '$__port;
_port=$__port;
fi;
# unless it is local and no command line option is set, set the target connection host
if [ ! "$host" ] && [ "$__host" != "local" ] && [ "$_host" != "local" ]; then
host='-h '$__host;
_host=$__host;
fi;
if [ ! "$encoding" ]; then
if [ ! -z "$__encoding" ]; then
encoding=$__encoding;
else
encoding=$_encoding;
fi;
fi;
if [ ! "$ident" ]; then
ident=$_ident;
fi;
fi;
# if no user or database, exist
if [ ! "$file" ] || [ ! -f "$file" ]; then
echo "The file has not been set or the file given could not be found.";
exit 1;
fi;
if [ ! "$owner" ] || [ ! "$encoding" ] || [ ! "$database" ]; then
echo "The Owner, database name and encoding could not be set automatically, the have to be given as command line options.";
exit 1;
fi;
if [ "$REDHAT" -eq 1 ]; then
# Debian base path
PG_BASE_PATH='/usr/pgsql-';
elif [ "$AMAZON" -eq 1 ]; then
PG_BASE_PATH='/usr/lib64/pgsql';
else
# Redhat base path (for non official ones would be '/usr/pgsql-'
PG_BASE_PATH='/usr/lib/postgresql/';
fi;
# if no ident is given, try to find the default one, if not fall back to pre set one
if [ ! -z "$ident" ]; then
PG_PATH=$PG_BASE_PATH$ident'/bin/';
if [ ! -d "$PG_PATH" ]; then
ident='';
fi;
fi;
if [ -z "$ident" ]; then
# try to run psql from default path and get the version number
ident=$(pgv=$(pg_dump --version| grep "pg_dump" | cut -d " " -f 3); if [[ $(echo "${pgv}" | cut -d "." -f 1) -ge 10 ]]; then echo "${pgv}" | cut -d "." -f 1; else echo "${pgv}" | cut -d "." -f 1,2; fi );
if [ ! -z "$ident" ]; then
PG_PATH=$PG_BASE_PATH$ident'/bin/';
else
# hard setting
ident='9.6';
PG_PATH=$PG_BASE_PATH'9.6/bin/';
fi;
fi;
PG_DROPDB=$PG_PATH"dropdb";
PG_CREATEDB=$PG_PATH"createdb";
PG_CREATELANG=$PG_PATH"createlang";
PG_RESTORE=$PG_PATH"pg_restore";
PG_PSQL=$PG_PATH"psql";
TEMP_FILE="temp";
LOG_FILE_EXT=$database.`date +"%Y%m%d_%H%M%S"`".log";
# core abort if no core files found
if [ ! -f $PG_PSQL ] || [ ! -f $PG_DROPDB ] || [ ! -f $PG_CREATEDB ] || [ ! -f $PG_RESTORE ]; then
echo "One of the core binaries (psql, pg_dump, createdb, pg_restore) could not be found.";
echo "Search Path: ${PG_PATH}";
echo "Perhaps manual ident set with -i is necessary";
echo "Backup aborted";
exit 0;
fi;
# check if port / host settings are OK
# if I cannot connect with user postgres to template1, the restore won't work
output=`echo "SELECT version();" | $PG_PSQL -U postgres $host $port template1 -q -t -X -A -F "," 2>&1`;
found=`echo "$output" | grep "PostgreSQL"`;
# if the output does not have the PG version string, we have an error and abort
if [ -z "$found" ]; then
echo "Cannot connect to the database: $output";
exit 1;
fi;
echo "Will drop database '$database' on host '$_host:$_port' and load file '$file' with user '$owner', set encoding '$encoding' and use database version '$ident'";
if [ $SCHEMA_ONLY -eq 1 ]; then
echo "!!!!!!! WILL ONLY RESTORE SCHEMA, NO DATA !!!!!!!";
fi;
if [ $NO_ASK -eq 1 ]; then
go='yes';
else
echo "Continue? type 'yes'";
read go;
fi;
if [ "$go" != 'yes' ]; then
echo "Aborted";
exit;
else
start_time=`date +"%F %T"`;
START=`date +'%s'`;
echo "Drop DB $database [$_host:$_port] @ $start_time";
# DROP DATABASE
if [ $DRY_RUN -eq 0 ]; then
$PG_DROPDB -U postgres $host $port $database;
else
echo $PG_DROPDB -U postgres $host $port $database;
fi;
# CREATE DATABASE
echo "Create DB $database with $owner and encoding $encoding on [$_host:$_port] @ `date +"%F %T"`";
if [ $DRY_RUN -eq 0 ]; then
$PG_CREATEDB -U postgres -O $owner -E $encoding -T $TEMPLATEDB $host $port $database;
else
echo $PG_CREATEDB -U postgres -O $owner -E $encoding -T $TEMPLATEDB $host $port $database;
fi;
# CREATE plpgsql LANG
if [ -f $PG_CREATELANG ]; then
echo "Create plpgsql lang in DB $database on [$_host:$_port] @ `date +"%F %T"`";
if [ $DRY_RUN -eq 0 ]; then
$PG_CREATELANG -U postgres plpgsql $host $port $database;
else
echo $PG_CREATELANG -U postgres plpgsql $host $port $database;
fi;
fi;
# RESTORE DATA
echo "Restore data from $file to DB $database on [$_host:$_port] with Jobs $MAX_JOBS @ `date +"%F %T"`";
if [ $DRY_RUN -eq 0 ]; then
$PG_RESTORE -U postgres -d $database -F c -v -c $schema -j $MAX_JOBS $host $port $role $file 2>restore_errors.$LOG_FILE_EXT;
else
echo $PG_RESTORE -U postgres -d $database -F c -v -c $schema -j $MAX_JOBS $host $port $role $file 2>restore_errors.$LOG_FILE_EXT;
fi;
# BUG FIX FOR POSTGRESQL 9.6.2 db_dump
# it does not dump the default public ACL so the owner of the DB cannot access the data, check if the ACL dump is missing and do a basic restore
if [ -z $($PG_RESTORE -l $file | grep -- "ACL - public postgres") ]; then
echo "Fixing missing basic public schema ACLs from DB $database [$_host:$_port] @ `date +"%F %T"`";
# grant usage on schema public to public;
# grant create on schema public to public;
echo "GRANT USAGE ON SCHEMA public TO public;" | $PG_PSQL -U postgres -Atq $host $port $database;
echo "GRANT CREATE ON SCHEMA public TO public;" | $PG_PSQL -U postgres -Atq $host $port $database;
fi;
# SEQUENCE RESET DATA COLLECTION
echo "Resetting all sequences from DB $database [$_host:$_port] @ `date +"%F %T"`";
reset_query="SELECT 'SELECT SETVAL(' ||quote_literal(S.relname)|| ', MAX(' ||quote_ident(C.attname)|| ') ) FROM ' ||quote_ident(T.relname)|| ';' FROM pg_class AS S, pg_depend AS D, pg_class AS T, pg_attribute AS C WHERE S.relkind = 'S' AND S.oid = D.objid AND D.refobjid = T.oid AND D.refobjid = C.attrelid AND D.refobjsubid = C.attnum ORDER BY S.relname;";
if [ $DRY_RUN -eq 0 ]; then
echo "${reset_query}" | $PG_PSQL -U postgres -Atq $host $port -o $TEMP_FILE $database
$PG_PSQL -U postgres $host $port -e -f $TEMP_FILE $database 1>output_sequence.$LOG_FILE_EXT 2>errors_sequence.$database.$LOG_FILE_EXT;
rm $TEMP_FILE;
else
echo "${reset_query}";
echo $PG_PSQL -U postgres $host $port -e -f $TEMP_FILE $database 1>output_sequence.$LOG_FILE_EXT 2>errors_sequence.$database.$LOG_FILE_EXT;
fi;
echo "Restore of data $file for DB $database [$_host:$_port] finished";
DURATION=$[ `date +'%s'`-$START ];
echo "Start at $start_time and end at `date +"%F %T"` and ran for $(convert_time ${DURATION})";
echo "=== END RESTORE" >>restore_errors.$LOG_FILE_EXT;
fi;

454
pg_restore_db_file.sh Executable file
View File

@@ -0,0 +1,454 @@
#!/bin/bash
function usage ()
{
cat <<- EOT
Restores a list of database dumps from a folder to a database server
Usage: ${0##/*/} -f <dump folder> [-j <JOBS>] [-e <ENCODING>] [-h <HOST>] [-r|-a] [-g] [-n]
-e <ENCODING>: override global encoding, will be overruled by per file encoding
-p <PORT>: override default port from file.
-h <HOST>: override default host from file.
-f: dump folder source. Where the database dump files are located. This is a must set option
-j <JOBS>: Run how many jobs Parallel. If not set, 2 jobs are run parallel
-r: use redhat base paths instead of debian
-a: use amazon base paths instead of debian
-g: do not import globals file
-n: dry run, do not import or change anything
EOT
}
_port=5432
PORT='';
_host='local';
HOST='';
_encoding='UTF8';
set_encoding='';
REDHAT=0;
AMAZON=0;
IMPORT_GLOBALS=1;
TEMPLATEDB='template0'; # truly empty for restore
DUMP_FOLDER='';
MAX_JOBS='';
BC='/usr/bin/bc';
PORT_REGEX="^[0-9]{4,5}$";
OPTARG_REGEX="^-";
DRY_RUN=0;
# options check
while getopts ":f:j:h:p:e:granm" opt; do
# pre test for unfilled
if [ "${opt}" = ":" ] || [[ "${OPTARG-}" =~ ${OPTARG_REGEX} ]]; then
if [ "${opt}" = ":" ]; then
CHECK_OPT=${OPTARG};
else
CHECK_OPT=${opt};
fi;
case ${CHECK_OPT} in
h)
echo "-h needs a host name";
ERROR=1;
;;
f)
echo "-f needs a folder name";
ERROR=1;
;;
p)
echo "-h needs a port number";
ERROR=1;
;;
e)
echo "-e needs an encoding";
ERROR=1;
;;
j)
echo "-j needs a numeric value for parallel jobs";
ERROR=1;
;;
esac
fi;
case $opt in
f|file)
DUMP_FOLDER=$OPTARG;
;;
j|jobs)
MAX_JOBS=${OPTARG};
;;
e|encoding)
if [ -z "$encoding" ]; then
encoding=$OPTARG;
fi;
;;
h|hostname)
if [ -z "$host" ]; then
host='-h '$OPTARG;
_host=$OPTARG;
HOST=$OPRTARG;
fi;
;;
p|port)
if [ -z "$port" ]; then
port='-p '$OPTARG;
_port=$OPTARG;
PORT=$OPTARG;
fi;
;;
g|globals)
IMPORT_GLOBALS=0;
;;
r|redhat)
REDHAT=1;
;;
a|amazon)
AMAZON=1;
;;
n|dry-run)
DRY_RUN=1;
;;
m|help)
usage;
exit 0;
;;
\?)
echo -e "\n Option does not exist: $OPTARG\n";
usage;
exit 1;
;;
esac;
done;
if [ "$REDHAT" -eq 1 ] && [ "$AMAZON" -eq 1 ]; then
echo "You cannot set the -a and -r flag at the same time";
fi;
if [ "$REDHAT" -eq 1 ]; then
# Redhat base path (for non official ones would be '/usr/pgsql-'
DBPATH_BASE='/usr/pgsql-'
elif [ "$AMAZON" -eq 1 ]; then
# Amazon paths (lib64 default amazon package)
DBPATH_BASE='/usr/lib64/pgsql';
else
# Debian base path
DBPATH_BASE='/usr/lib/postgresql/';
fi;
# check that the port is a valid number
if ! [[ "$_port" =~ $PORT_REGEX ]]; then
echo "The port needs to be a valid number: $_port";
exit 1;
fi;
NUMBER_REGEX="^[0-9]{1,}$";
# find the max allowed jobs based on the cpu count
# because setting more than this is not recommended
# so this fails in vmware hosts were we have random cpus assigned
cpu=$(cat /proc/cpuinfo | grep "processor" | wc -l);
_max_jobs=${cpu##*: };
# if the MAX_JOBS is not number or smaller 1 or greate _max_jobs
if [ ! -z "${MAX_JOBS}" ]; then
# check that it is a valid number
if [[ ! ${MAX_JOBS} =~ ${NUMBER_REGEX} ]]; then
echo "Please enter a number for the -j option";
exit 1;
fi;
if [ ${MAX_JOBS} -lt 1 ] || [ ${MAX_JOBS} -gt ${_max_jobs} ]; then
echo "The value for the jobs option -j cannot be smaller than 1 or bigger than ${_max_jobs}";
exit 1;
fi;
else
# auto set the MAX_JOBS based on the cpu count
MAX_JOBS=${_max_jobs};
fi;
if [ "$DUMP_FOLDER" = '' ]; then
echo "Please provide a source folder for the dump files with the -f option";
exit;
fi;
# check that source folder is there
if [ ! -d "$DUMP_FOLDER" ]; then
echo "Folder '$DUMP_FOLDER' does not exist";
exit;
fi;
LOGS=$DUMP_FOLDER'/logs/';
# create logs folder if missing
if [ ! -d "$LOGS" ]; then
echo "Creating '$LOGS' folder";
mkdir -p "$LOGS";
if [ ! -d "$LOGS" ]; then
echo "Creation of '$LOGS' folder failed";
exit;
fi;
fi;
# check if we have the 'bc' command available or not
if [ -f "${BC}" ]; then
BC_OK=1;
else
BC_OK=0;
fi;
# METHOD: convert_time
# PARAMS: timestamp in seconds or with milliseconds (nnnn.nnnn)
# RETURN: formated string with human readable time (d/h/m/s)
# CALL : var=$(convert_time $timestamp);
# DESC : converts a timestamp or a timestamp with float milliseconds to a human readable format
# output is in days/hours/minutes/seconds
function convert_time
{
timestamp=${1};
# round to four digits for ms
timestamp=$(printf "%1.4f" $timestamp);
# get the ms part and remove any leading 0
ms=$(echo ${timestamp} | cut -d "." -f 2 | sed -e 's/^0*//');
timestamp=$(echo ${timestamp} | cut -d "." -f 1);
timegroups=(86400 3600 60 1); # day, hour, min, sec
timenames=("d" "h" "m" "s"); # day, hour, min, sec
output=( );
time_string='';
for timeslice in ${timegroups[@]}; do
# floor for the division, push to output
if [ ${BC_OK} -eq 1 ]; then
output[${#output[*]}]=$(echo "${timestamp}/${timeslice}" | bc);
timestamp=$(echo "${timestamp}%${timeslice}" | bc);
else
output[${#output[*]}]=$(awk "BEGIN {printf \"%d\", ${timestamp}/${timeslice}}");
timestamp=$(awk "BEGIN {printf \"%d\", ${timestamp}%${timeslice}}");
fi;
done;
for ((i=0; i<${#output[@]}; i++)); do
if [ ${output[$i]} -gt 0 ] || [ ! -z "$time_string" ]; then
if [ ! -z "${time_string}" ]; then
time_string=${time_string}" ";
fi;
time_string=${time_string}${output[$i]}${timenames[$i]};
fi;
done;
if [ ! -z ${ms} ]; then
if [ ${ms} -gt 0 ]; then
time_string=${time_string}" "${ms}"ms";
fi;
fi;
# just in case the time is 0
if [ -z "${time_string}" ]; then
time_string="0s";
fi;
echo -n "${time_string}";
}
# default version (for folder)
DBPATH_VERSION='9.6/';
# if amazon remove "." from version
if [ "${AMAZON}" -eq 1 ]; then
DBPATH_VERSION=$(echo "${DBPATH_VERSION}" | sed -e 's/\.//');
fi;
DBPATH_BIN='bin/';
# postgresql binaries
DROPDB="dropdb";
CREATEDB="createdb";
CREATELANG="createlang";
PGRESTORE="pg_restore";
CREATEUSER="createuser";
PSQL="psql";
# default port and host
EXCLUDE_LIST="pg_globals"; # space separated
LOGFILE="tee -a $LOGS/PG_RESTORE_DB_FILE.`date +"%Y%m%d_%H%M%S"`.log";
# get the count for DBs to import
db_count=`find $DUMP_FOLDER -name "*.sql" -print | wc -l`;
# start info
if [ "${DUMP_FOLDER}" = "." ]; then
_DUMP_FOLDER="[current folder]";
else
_DUMP_FOLDER=${DUMP_FOLDER};
fi;
if [ -z "${HOST}" ]; then
_HOST="[auto host]";
else
_HOST=${HOST};
fi;
if [ -z "${PORT}" ]; then
_PORT="[auto port]";
else
_PORT=${PORT};
fi;
echo "= Will import $db_count databases from $_DUMP_FOLDER" | $LOGFILE;
echo "= into the DB server $_HOST:$_PORT" | $LOGFILE;
echo "= running $MAX_JOBS jobs" | $LOGFILE;
echo "= import logs: $LOGS" | $LOGFILE;
echo "" | $LOGFILE;
pos=1;
# go through all the files an import them into the database
MASTERSTART=`date +'%s'`;
master_start_time=`date +"%F %T"`;
# first import the pg_globals file if this is requested, default is yes
if [ "$IMPORT_GLOBALS" -eq 1 ]; then
start_time=`date +"%F %T"`;
START=`date +'%s'`;
# get the pg_globals file
echo "=[Globals Restore]=START=[$start_time]==================================================>" | $LOGFILE;
# get newest and only the first one
file=`ls -1t $DUMP_FOLDER/pg_global* | head -1`;
filename=`basename $file`;
# the last _ is for version 10 or higher
version=`echo $filename | cut -d "." -f 4 | cut -d "-" -f 2 | cut -d "_" -f 1`; # db version, without prefix of DB type
# if this is < 10 then we need the second part too
if [ ${version} -lt 10 ]; then
version=$version'.'`echo $filename | cut -d "." -f 5 | cut -d "_" -f 1`; # db version, second part (after .)
fi;
# if amazon remove "." from version
if [ "${AMAZON}" -eq 1 ]; then
version=$(echo "${version}" | sed -e 's/\.//');
fi;
__host=`echo $filename | cut -d "." -f 5 | cut -d "_" -f 2`; # hostname of original DB, can be used as target host too
__port=`echo $filename | cut -d "." -f 5 | cut -d "_" -f 3`; # port of original DB, can be used as target port too
# override file port over given port if it differs and is valid
if [ -z $_port ] && [ "$__port" != $_port ] && [[ $__port =~ $PORT_REGEX ]] ; then
_port=$__port;
port='-p '$_port;
fi;
if [ -z "$_host" ] && [ "$__host" != "local" ]; then
_host=$__host;
host='-h '$_host;
fi;
# create the path to the DB from the DB version in the backup file
if [ ! -z "$version" ]; then
DBPATH_VERSION_LOCAL=$version'/';
else
DBPATH_VERSION_LOCAL=$DBPATH_VERSION;
fi;
DBPATH=$DBPATH_BASE$DBPATH_VERSION_LOCAL$DBPATH_BIN;
echo "+ Restore globals file: $filename to [$_host:$_port] @ `date +"%F %T"`" | $LOGFILE;
if [ ${DRY_RUN} -eq 0 ]; then
$DBPATH$PSQL -U postgres $host $port -f $file -e -q -X template1 | $LOGFILE;
else
echo "$DBPATH$PSQL -U postgres $host $port -f $file -e -q -X template1" | $LOGFILE;
fi;
DURATION=$[ `date +'%s'`-$START ];
printf "=[Globals Restore]=END===[%s]========================================================>\n" "$(convert_time ${DURATION})" | $LOGFILE;
fi;
for file in $DUMP_FOLDER/*.sql; do
start_time=`date +"%F %T"`;
START=`date +'%s'`;
echo "=[$pos/$db_count]=START=[$start_time]==================================================>" | $LOGFILE;
# the encoding
set_encoding='';
# get the filename
filename=`basename $file`;
# get the databse, user
# default file name is <database>.<owner>.<encoding>.<type>-<version>_<host>_<port>_<date>_<time>_<sequence>
database=`echo $filename | cut -d "." -f 1`;
owner=`echo $filename | cut -d "." -f 2`;
__encoding=`echo $filename | cut -d "." -f 3`;
# the last _ part if for version 10
version=`echo $filename | cut -d "." -f 4 | cut -d "-" -f 2 | cut -d "_" -f 1`; # db version, without prefix of DB type
# if this is < 10 then we need the second part too
if [ ${version} -lt 10 ]; then
version=$version'.'`echo $filename | cut -d "." -f 5 | cut -d "_" -f 1`; # db version, second part (after .)
fi;
# if amazon remove "." from version
if [ "${AMAZON}" -eq 1 ]; then
version=$(echo "${version}" | sed -e 's/\.//');
fi;
__host=`echo $filename | cut -d "." -f 5 | cut -d "_" -f 2`; # hostname of original DB, can be used as target host too
__port=`echo $filename | cut -d "." -f 5 | cut -d "_" -f 3`; # port of original DB, can be used as target port too
other=`echo $filename | cut -d "." -f 5 | cut -d "_" -f 2-`; # backup date and time, plus sequence
# override file port over given port if it differs and is valid
if [ -z $_port ] && [ "$__port" != $_port ] && [[ $__port =~ $PORT_REGEX ]] ; then
_port=$__port;
port='-p '$_port;
fi;
if [ -z "$_host" ] && [ "$__host" != "local" ]; then
_host=$__host;
host='-h '$_host;
fi;
# override encoding (dangerous)
# check if we have a master override
if [ ! "$encoding" ]; then
set_encoding=$encoding;
fi;
# if no override encoding set first from file, then from global
if [ ! "$set_encoding" ]; then
if [ ! -z "$__encoding" ]; then
set_encoding=$__encoding;
else
set_encoding=$_encoding;
fi;
fi;
# create the path to the DB from the DB version in the backup file
if [ ! -z "$version" ]; then
DBPATH_VERSION_LOCAL=$version'/';
else
DBPATH_VERSION_LOCAL=$DBPATH_VERSION;
fi;
DBPATH=$DBPATH_BASE$DBPATH_VERSION_LOCAL$DBPATH_BIN;
# check this is skip or not
exclude=0;
for exclude_db in $EXCLUDE_LIST; do
if [ "$exclude_db" = "$database" ]; then
exclude=1;
fi;
done;
if [ $exclude -eq 0 ]; then
# create user if not exist yet
# check query for user
user_oid=`echo "SELECT oid FROM pg_roles WHERE rolname = '$owner';" | $PSQL -U postgres $host $port -A -F "," -t -q -X template1`;
if [ -z $user_oid ]; then
echo "+ Create USER '$owner' for DB '$database' [$_host:$_port] @ `date +"%F %T"`" | $LOGFILE;
if [ ${DRY_RUN} -eq 0 ]; then
$CREATEUSER -U postgres -D -R -S $host $port $owner;
else
echo "$CREATEUSER -U postgres -D -R -S $host $port $owner";
fi;
fi;
# before importing the data, drop this database
echo "- Drop DB '$database' [$_host:$_port] @ `date +"%F %T"`" | $LOGFILE;
if [ ${DRY_RUN} -eq 0 ]; then
$DBPATH$DROPDB -U postgres $host $port $database;
else
echo "$DBPATH$DROPDB -U postgres $host $port $database";
fi;
echo "+ Create DB '$database' with '$owner' [$_host:$_port] @ `date +"%F %T"`" | $LOGFILE;
if [ ${DRY_RUN} -eq 0 ]; then
$DBPATH$CREATEDB -U postgres -O $owner -E $set_encoding -T $TEMPLATEDB $host $port $database;
else
echo "$DBPATH$CREATEDB -U postgres -O $owner -E $set_encoding -T $TEMPLATEDB $host $port $database";
fi;
if [ -f $DBPATH$CREATELANG ]; then
echo "+ Create plpgsql lang in DB '$database' [$_host:$_port] @ `date +"%F %T"`" | $LOGFILE;
if [ ${DRY_RUN} -eq 0 ]; then
$DBPATH$CREATELANG -U postgres plpgsql $host $port $database;
else
echo "$DBPATH$CREATELANG -U postgres plpgsql $host $port $database";
fi;
fi;
echo "% Restore data from '$filename' to DB '$database' using $MAX_JOBS jobs [$_host:$_port] @ `date +"%F %T"`" | $LOGFILE;
if [ ${DRY_RUN} -eq 0 ]; then
$DBPATH$PGRESTORE -U postgres -d $database -F c -v -c -j $MAX_JOBS $host $port $file 2>$LOGS'/errors.'$database'.'$(date +"%Y%m%d_%H%M%S".log);
else
echo "$DBPATH$PGRESTORE -U postgres -d $database -F c -v -c -j $MAX_JOBS $host $port $file 2>$LOGS'/errors.'$database'.'$(date +"%Y%m%d_%H%M%S".log)";
fi;
# BUG FIX FOR POSTGRESQL 9.6.2 db_dump
# it does not dump the default public ACL so the owner of the DB cannot access the data, check if the ACL dump is missing and do a basic restore
if [ -z "$($DBPATH$PGRESTORE -l $file | grep -- "ACL - public postgres")" ]; then
echo "? Fixing missing basic public schema ACLs from DB $database [$_host:$_port] @ `date +"%F %T"`";
# grant usage on schema public to public;
# grant create on schema public to public;
echo "GRANT USAGE ON SCHEMA public TO public;" | $DBPATH$PSQL -U postgres -Atq $host $port $database;
echo "GRANT CREATE ON SCHEMA public TO public;" | $DBPATH$PSQL -U postgres -Atq $host $port $database;
fi;
echo "$ Restore of data '$filename' for DB '$database' [$_host:$_port] finished" | $LOGFILE;
DURATION=$[ `date +'%s'`-$START ];
echo "* Start at $start_time and end at `date +"%F %T"` and ran for $(convert_time ${DURATION}) seconds" | $LOGFILE;
else
DURATION=0;
echo "# Skipped DB '$database'" | $LOGFILE;
fi;
printf "=[$pos/$db_count]=END===[%s]========================================================>\n" "$(convert_time ${DURATION})" | $LOGFILE;
pos=$[ $pos+1 ];
done;
DURATION=$[ `date +'%s'`-$MASTERSTART ];
echo "" | $LOGFILE;
echo "= Start at $master_start_time and end at `date +"%F %T"` and ran for $(convert_time ${DURATION}) seconds. Imported $db_count databases." | $LOGFILE;