Files
DumpDatabasePostgreSQL/pg_restore_db_file.sh
Clemens Schwaighofer b979cdd51f Update with commands in array for calls, logging for single file restore
All commands are now run through a single array variable
All logs are in logs folder for the restore which is a sub folder to where the SQL file is located
On restore abort if the basic database creation failed or skip to the next database in block restore
2025-10-17 10:39:55 +09:00

549 lines
17 KiB
Bash
Executable File

#!/bin/bash
# Author: Clemens Schwaighofer
# Description:
# Drop and restore databases from a folder for dump files created by pg_db_dump_file.sh
function usage ()
{
cat <<- EOT
Restores a list of database dumps from a folder to a database server
Usage: ${0##/*/} -f <dump folder> [-j <JOBS>] [-e <ENCODING>] [-h <HOST>] [-r] [-g] [-n]
-e <ENCODING>: override global encoding, will be overruled by per file encoding
-p <PORT>: override default port from file.
-h <HOST>: override default host from file.
-f: dump folder source. Where the database dump files are located. This is a must set option
-j <JOBS>: Run how many jobs Parallel. If not set, 2 jobs are run parallel
-r: use redhat base paths instead of debian
-g: do not import globals file
-n: dry run, do not import or change anything
EOT
}
_port=5432
PORT='';
_host='local';
HOST='';
_encoding='UTF8';
set_encoding='';
ERROR=0;
REDHAT=0;
IMPORT_GLOBALS=1;
TEMPLATEDB='template0'; # truly empty for restore
DUMP_FOLDER='';
MAX_JOBS='';
BC='/usr/bin/bc';
PORT_REGEX="^[0-9]{4,5}$";
OPTARG_REGEX="^-";
DRY_RUN=0;
PG_PARAM_HOST=();
PG_PARAM_PORT=();
# options check
while getopts ":f:j:h:p:e:grnm" opt; do
# pre test for unfilled
if [ "${opt}" = ":" ] || [[ "${OPTARG-}" =~ ${OPTARG_REGEX} ]]; then
if [ "${opt}" = ":" ]; then
CHECK_OPT=${OPTARG};
else
CHECK_OPT=${opt};
fi;
case ${CHECK_OPT} in
h)
echo "-h needs a host name";
ERROR=1;
;;
f)
echo "-f needs a folder name";
ERROR=1;
;;
p)
echo "-h needs a port number";
ERROR=1;
;;
e)
echo "-e needs an encoding";
ERROR=1;
;;
j)
echo "-j needs a numeric value for parallel jobs";
ERROR=1;
;;
esac
fi;
case $opt in
# f|file)
f)
DUMP_FOLDER=$OPTARG;
;;
# j|jobs)
j)
MAX_JOBS=${OPTARG};
;;
# e|encoding)
e)
if [ -z "$encoding" ]; then
encoding=$OPTARG;
fi;
;;
# h|hostname)
h)
if [ -z "$host" ]; then
# do not set if local name (uses socket)
if [ "$OPTARG" != "local" ]; then
PG_PARAM_HOST=("-h" "${OPTARG}");
fi;
_host=$OPTARG;
HOST=$OPRTARG;
fi;
;;
# p|port)
p)
if [ -z "$port" ]; then
PG_PARAM_PORT=("-p" "${OPTARG}");
_port=$OPTARG;
PORT=$OPTARG;
fi;
;;
# g|globals)
g)
IMPORT_GLOBALS=0;
;;
# r|redhat)
r)
REDHAT=1;
;;
# n|dry-run)
n)
DRY_RUN=1;
;;
# m|help)
m)
usage;
exit 0;
;;
\?)
echo -e "\n Option does not exist: $OPTARG\n";
usage;
exit 1;
;;
esac;
done;
if [ "${ERROR}" -eq 1 ]; then
exit 0;
fi;
if [ "$REDHAT" -eq 1 ]; then
# Redhat base path (for non official ones would be '/usr/pgsql-'
PG_BASE_PATH='/usr/pgsql-'
else
# Debian base path
PG_BASE_PATH='/usr/lib/postgresql/';
fi;
# check that the port is a valid number
if ! [[ "$_port" =~ $PORT_REGEX ]]; then
echo "The port needs to be a valid number: $_port";
exit 1;
fi;
NUMBER_REGEX="^[0-9]{1,}$";
# find the max allowed jobs based on the cpu count
# because setting more than this is not recommended
# so this fails in vmware hosts were we have random cpus assigned
_max_jobs=$(nproc --all);
# if the MAX_JOBS is not number or smaller 1 or greate _max_jobs
if [ -n "${MAX_JOBS}" ]; then
# check that it is a valid number
if [[ ! ${MAX_JOBS} =~ ${NUMBER_REGEX} ]]; then
echo "Please enter a number for the -j option";
exit 1;
fi;
if [ "${MAX_JOBS}" -lt 1 ] || [ "${MAX_JOBS}" -gt "${_max_jobs}" ]; then
echo "The value for the jobs option -j cannot be smaller than 1 or bigger than ${_max_jobs}";
exit 1;
fi;
else
# auto set the MAX_JOBS based on the cpu count
MAX_JOBS=${_max_jobs};
fi;
if [ "$DUMP_FOLDER" = '' ]; then
echo "Please provide a source folder for the dump files with the -f option";
exit;
fi;
# check that source folder is there
if [ ! -d "$DUMP_FOLDER" ]; then
echo "Folder '$DUMP_FOLDER' does not exist";
exit;
fi;
LOG_PATH=$DUMP_FOLDER'/logs/';
# create logs folder if missing
if [ ! -d "$LOG_PATH" ]; then
echo "+ Creating '$LOG_PATH' folder";
mkdir -p "$LOG_PATH";
if [ ! -d "$LOG_PATH" ]; then
echo "[!] Creation of '$LOG_PATH' folder failed";
exit 1;
fi;
fi;
# check if we have the 'bc' command available or not
if [ -f "${BC}" ]; then
BC_OK=1;
else
BC_OK=0;
fi;
# METHOD: convert_time
# PARAMS: timestamp in seconds or with milliseconds (nnnn.nnnn)
# RETURN: formated string with human readable time (d/h/m/s)
# CALL : var=$(convert_time $timestamp);
# DESC : converts a timestamp or a timestamp with float milliseconds to a human readable format
# output is in days/hours/minutes/seconds
function convert_time
{
timestamp=${1};
# round to four digits for ms
timestamp=$(printf "%1.4f" "$timestamp");
# get the ms part and remove any leading 0
ms=$(echo "${timestamp}" | cut -d "." -f 2 | sed -e 's/^0*//');
timestamp=$(echo "${timestamp}" | cut -d "." -f 1);
timegroups=(86400 3600 60 1); # day, hour, min, sec
timenames=("d" "h" "m" "s"); # day, hour, min, sec
output=( );
time_string='';
for timeslice in "${timegroups[@]}"; do
# floor for the division, push to output
if [ ${BC_OK} -eq 1 ]; then
output[${#output[*]}]=$(echo "${timestamp}/${timeslice}" | bc);
timestamp=$(echo "${timestamp}%${timeslice}" | bc);
else
output[${#output[*]}]=$(awk "BEGIN {printf \"%d\", ${timestamp}/${timeslice}}");
timestamp=$(awk "BEGIN {printf \"%d\", ${timestamp}%${timeslice}}");
fi;
done;
for ((i=0; i<${#output[@]}; i++)); do
if [ "${output[$i]}" -gt 0 ] || [ -n "$time_string" ]; then
if [ -n "${time_string}" ]; then
time_string=${time_string}" ";
fi;
time_string=${time_string}${output[$i]}${timenames[$i]};
fi;
done;
# milliseconds must be filled, but we also check that they are non "nan" string
# that can appear in the original value
if [ -n "${ms}" ] && [ "${ms}" != "nan" ]; then
if [ "${ms}" -gt 0 ]; then
time_string="${time_string} ${ms}ms";
fi;
fi;
# just in case the time is 0
if [ -z "${time_string}" ]; then
time_string="0s";
fi;
echo -n "${time_string}";
}
# default version (for folder)
PG_PATH_VERSION='15/';
PG_PATH_BIN='bin/';
# postgresql binaries
PG_DROPDB="dropdb";
PG_CREATEDB="createdb";
PG_CREATELANG="createlang";
PG_RESTORE="pg_restore";
PG_CREATEUSER="createuser";
PG_PSQL="psql";
# default port and host
EXCLUDE_LIST="pg_globals"; # space separated
LOG_FILE="tee -a "$LOG_PATH/PG_RESTORE_DB_FILE.$(date +"%Y%m%d_%H%M%S").log"";
# get the count for DBs to import
db_count=$(find "${DUMP_FOLDER}" -name "*.sql" -print | wc -l);
# start info
if [ "${DUMP_FOLDER}" = "." ]; then
_DUMP_FOLDER="[current folder]";
else
_DUMP_FOLDER=${DUMP_FOLDER};
fi;
if [ -z "${HOST}" ]; then
_HOST="[auto host]";
else
_HOST=${HOST};
fi;
if [ -z "${PORT}" ]; then
_PORT="[auto port]";
else
_PORT=${PORT};
fi;
if [ ${DRY_RUN} ]; then
echo "**** [DRY RUN] ****";
fi;
echo "= Will import $db_count databases from $_DUMP_FOLDER" | $LOG_FILE;
echo "= into the DB server $_HOST:$_PORT" | $LOG_FILE;
echo "= running $MAX_JOBS jobs" | $LOG_FILE;
echo "= import logs: $LOG_PATH" | $LOG_FILE;
echo "" | $LOG_FILE;
pos=1;
# go through all the files an import them into the database
MASTERSTART=$(date +"%s");
master_start_time=$(date +"%F %T");
# first import the pg_globals file if this is requested, default is yes
if [ "$IMPORT_GLOBALS" -eq 1 ]; then
start_time=$(date +"%F %T");
START=$(date +"%s");
# get the pg_globals file
echo "=[Globals Restore]=START=[$start_time]==================================================>" | $LOG_FILE;
# get newest and only the first one
file=$(find "$DUMP_FOLDER" -name "pg_global*" -type f -printf "%Ts\t%p\n" | sort -nr | head -1);
filename=$(basename "$file");
# the last _ is for version 10 or higher
# db version, without prefix of DB type
version=$(echo "$filename" | cut -d "." -f 4 | cut -d "-" -f 2 | cut -d "_" -f 1);
cut_pos=4;
# if this is < 10 then we need the second part too
if [ "${version}" -lt 10 ]; then
# db version, second part (after .)
version=$version'.'$(echo "$filename" | cut -d "." -f 5 | cut -d "_" -f 1);
cut_pos=5;
fi;
# hostname of original DB, can be used as target host too
__host=$(echo "$filename" | cut -d "." -f ${cut_pos} | cut -d "_" -f 2);
# port of original DB, can be used as target port too
__port=$(echo "$filename" | cut -d "." -f ${cut_pos} | cut -d "_" -f 3);
# override file port over given port if it differs and is valid
if [ -z "$_port" ] && [ "$__port" != "$_port" ] && [[ "$__port" =~ $PORT_REGEX ]] ; then
_port=$__port;
PG_PARAM_PORT=("-p" "$_port");
fi;
if [ -z "$_host" ] && [ "$__host" != "local" ]; then
_host=$__host;
PG_PARAM_HOST=("-h" "${_host}");
fi;
# create the path to the DB from the DB version in the backup file
if [ -n "$version" ]; then
PG_PATH_VERSION_LOCAL="${version}/";
else
PG_PATH_VERSION_LOCAL="${PG_PATH_VERSION}";
fi;
PG_PATH="${PG_BASE_PATH}${PG_PATH_VERSION_LOCAL}${PG_PATH_BIN}";
echo "+ Restore globals file: $filename to [$_host:$_port] @ $(date +"%F %T")" | $LOG_FILE;
_PG_PARAMS=("-U" "postgres");
_PG_PARAMS+=("${PG_PARAM_HOST[@]}");
_PG_PARAMS+=("${PG_PARAM_PORT[@]}");
_PG_PARAMS+=("-f" "$file" "-e" "-q" "-X" "template1");
PG_COMMAND=("${PG_PATH}${PG_PSQL}" "${_PG_PARAMS[@]}");
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}" | $LOG_FILE;
else
echo "${PG_COMMAND[*]}" | $LOG_FILE;
fi;
DURATION=$(($(date +"%s")-START));
printf "=[Globals Restore]=END===[%s]========================================================>\n" "$(convert_time ${DURATION})" | $LOG_FILE;
fi;
for file in "$DUMP_FOLDER/"*.sql; do
start_time=$(date +"%F %T");
START=$(date +"%s");
echo "=[$pos/$db_count]=START=[$start_time]==================================================>" | $LOG_FILE;
# the encoding
set_encoding='';
# get the filename
filename=$(basename "$file");
# get the databse, user
# default file name is <database>.<owner>.<encoding>.<type>-<version>_<host>_<port>_<date>_<time>_<sequence>
database=$(echo "$filename" | cut -d "." -f 1);
# check this is skip or not
exclude=0;
for exclude_db in $EXCLUDE_LIST; do
if [ "$exclude_db" = "$database" ]; then
exclude=1;
break;
fi;
done;
if [ $exclude -eq 1 ]; then
DURATION=0;
echo "# Skipped DB '$database'" | $LOG_FILE;
printf "=[$pos/$db_count]=END===[%s]========================================================>\n" "$(convert_time ${DURATION})" | $LOG_FILE;
pos=$((pos+1));
continue;
fi;
# restore DB
owner=$(echo "$filename" | cut -d "." -f 2);
__encoding=$(echo "$filename" | cut -d "." -f 3);
# the last _ part if for version 10
# db version, without prefix of DB type
version=$(echo "$filename" | cut -d "." -f 4 | cut -d "-" -f 2 | cut -d "_" -f 1);
# if this is < 10 then we need the second part too
if [ "${version}" -lt 10 ]; then
# db version, second part (after .)
version=$version'.'$(echo "$filename" | cut -d "." -f 5 | cut -d "_" -f 1);
fi;
# hostname of original DB, can be used as target host too
__host=$(echo "$filename" | cut -d "." -f 5 | cut -d "_" -f 2);
# port of original DB, can be used as target port too
__port=$(echo "$filename" | cut -d "." -f 5 | cut -d "_" -f 3);
# backup date and time, plus sequence
# other=$(echo "$filename" | cut -d "." -f 5 | cut -d "_" -f 2-);
# override file port over given port if it differs and is valid
if [ -z "$_port" ] && [ "$__port" != "$_port" ] && [[ "$__port" =~ $PORT_REGEX ]] ; then
_port=$__port;
PG_PARAM_PORT=("-p" "$_port");
fi;
if [ -z "$_host" ] && [ "$__host" != "local" ]; then
_host=$__host;
PG_PARAM_HOST=("-h" "${_host}");
fi;
# override encoding (dangerous)
# check if we have a master override
if [ ! "$encoding" ]; then
set_encoding=$encoding;
fi;
# if no override encoding set first from file, then from global
if [ ! "$set_encoding" ]; then
if [ -n "$__encoding" ]; then
set_encoding=$__encoding;
else
set_encoding=$_encoding;
fi;
fi;
# create the path to the DB from the DB version in the backup file
if [ -n "$version" ]; then
PG_PATH_VERSION_LOCAL="${version}/";
else
PG_PATH_VERSION_LOCAL="${PG_PATH_VERSION}";
fi;
PG_PATH="${PG_BASE_PATH}${PG_PATH_VERSION_LOCAL}${PG_PATH_BIN}";
# create user if not exist yet
# check query for user
# for all calls
_PG_PARAMS_ALL=("-U" "postgres");
_PG_PARAMS_ALL+=("${PG_PARAM_HOST[@]}");
_PG_PARAMS_ALL+=("${PG_PARAM_PORT[@]}");
# for the call
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("-A" "-F" "," "-t" "-q" "-X" "-c" "SELECT oid FROM pg_roles WHERE rolname = '$owner';" "template1");
user_oid=$("$PG_PSQL" "${_PG_PARAMS[@]}");
if [ -z "$user_oid" ]; then
echo "+ Create USER '$owner' for DB '$database' [$_host:$_port] @ $(date +"%F %T")" | $LOG_FILE;
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("-D" "-R" "-S" "$owner");
PG_COMMAND=("${PG_PATH}${PG_CREATEUSER}" "${_PG_PARAMS[@]}");
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}";
RETURN_CODE=$?;
if [ $RETURN_CODE -ne 0 ]; then
echo "[!] Creation of user '$owner' failed, skipping database '$database'";
printf "=[$pos/$db_count]=END===[%s]========================================================>\n" "$(convert_time 0)" | $LOG_FILE;
pos=$((pos+1));
continue;
fi;
else
echo "${PG_COMMAND[*]}";
fi;
fi;
# before importing the data, drop this database
echo "- Drop DB '$database' [$_host:$_port] @ $(date +"%F %T")" | $LOG_FILE;
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("$database");
PG_COMMAND=("${PG_PATH}${PG_DROPDB}" "${_PG_PARAMS[@]}");
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}";
RETURN_CODE=$?;
if [ $RETURN_CODE -ne 0 ]; then
echo "[!] Could not drop database, skipping database '$database'";
printf "=[$pos/$db_count]=END===[%s]========================================================>\n" "$(convert_time 0)" | $LOG_FILE;
pos=$((pos+1));
continue;
fi;
else
echo "${PG_COMMAND[*]}";
fi;
echo "+ Create DB '$database' with '$owner' [$_host:$_port] @ $(date +"%F %T")" | $LOG_FILE;
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("-O" "$owner" "-E" "$set_encoding" "-T" "$TEMPLATEDB" "$database");
PG_COMMAND=("${PG_PATH}${PG_CREATEDB}" "${_PG_PARAMS[@]}");
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}";
RETURN_CODE=$?;
if [ $RETURN_CODE -ne 0 ]; then
echo "[!] Could not create database, skipping database '$database'";
printf "=[$pos/$db_count]=END===[%s]========================================================>\n" "$(convert_time 0)" | $LOG_FILE;
pos=$((pos+1));
continue;
fi;
else
echo "${PG_COMMAND[*]}";
fi;
if [ -f "${PG_PATH}${PG_CREATELANG}" ]; then
echo "+ Create plpgsql lang in DB '$database' [$_host:$_port] @ $(date +"%F %T")" | $LOG_FILE;
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("plpgsql" "$database");
PG_COMMAND=("${PG_PATH}${PG_CREATELANG}" "${_PG_PARAMS[@]}");
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}";
RETURN_CODE=$?;
if [ $RETURN_CODE -ne 0 ]; then
echo "[!] Could not create plpgsql language, skipping database '$database'";
printf "=[$pos/$db_count]=END===[%s]========================================================>\n" "$(convert_time 0)" | $LOG_FILE;
pos=$((pos+1));
continue;
fi;
else
echo "${PG_COMMAND[*]}";
fi;
fi;
echo "% Restore data from '$filename' to DB '$database' using $MAX_JOBS jobs [$_host:$_port] @ $(date +"%F %T")" | $LOG_FILE;
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("-d" "$database" "-F" "c" "-v" "-c" "-j" "$MAX_JOBS" "$file");
PG_COMMAND=("${PG_PATH}${PG_RESTORE}" "${_PG_PARAMS[@]}");
LOG_ERROR_FILE="$LOG_PATH/errors.${database}.$(date +"%Y%m%d_%H%M%S").log";
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}" 2>"${LOG_ERROR_FILE}";
RETURN_CODE=$?;
if [ $RETURN_CODE -ne 0 ]; then
echo "[!] Restore of database '$database' failed, see ${LOG_ERROR_FILE} for details";
fi;
else
echo "${PG_COMMAND[*]} 2>${LOG_ERROR_FILE}";
fi;
# BUG FIX FOR POSTGRESQL 9.6.2 db_dump
# it does not dump the default public ACL so the owner of the DB cannot access the data,
# check if the ACL dump is missing and do a basic restore
if ! "${PG_PATH}${PG_RESTORE}" -l "$file" | grep -q -- "ACL - public postgres"; then
echo "? Fixing missing basic public schema ACLs from DB $database [$_host:$_port] @ $(date +"%F %T")";
# grant usage on schema public to public;
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("-AtqX" "-c" "GRANT USAGE ON SCHEMA public TO public;" "${database}");
PG_COMMAND=("${PG_PSQL}" "${_PG_PARAMS[@]}");
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}";
else
echo "${PG_COMMAND[*]}";
fi;
# grant create on schema public to public;
_PG_PARAMS=("${_PG_PARAMS_ALL[@]}");
_PG_PARAMS+=("-AtqX" "-c" "GRANT CREATE ON SCHEMA public TO public;" "${database}");
PG_COMMAND=("${PG_PSQL}" "${_PG_PARAMS[@]}");
if [ ${DRY_RUN} -eq 0 ]; then
"${PG_COMMAND[@]}";
else
echo "${PG_COMMAND[*]}";
fi;
fi;
echo "$ Restore of data '$filename' for DB '$database' [$_host:$_port] finished" | $LOG_FILE;
DURATION=$(($(date "+%s")-START));
echo "* Start at $start_time and end at $(date +"%F %T") and ran for $(convert_time ${DURATION}) seconds" | $LOG_FILE;
printf "=[$pos/$db_count]=END===[%s]========================================================>\n" "$(convert_time ${DURATION})" | $LOG_FILE;
pos=$((pos+1));
done;
DURATION=$(($(date "+%s")-MASTERSTART));
echo "" | $LOG_FILE;
echo "= Start at $master_start_time and end at $(date +"%F %T") and ran for $(convert_time ${DURATION}) seconds. Imported $db_count databases." | $LOG_FILE;