#!/bin/bash # Author: Clemens Schwaighofer # Description: # Drop and restore databases from a folder for dump files created by pg_db_dump_file.sh function usage () { cat <<- EOT Restores a list of database dumps from a folder to a database server Usage: ${0##/*/} -f [-j ] [-e ] [-h ] [-r] [-g] [-n] -e : override global encoding, will be overruled by per file encoding -p : override default port from file. -h : override default host from file. -f: dump folder source. Where the database dump files are located. This is a must set option -j : Run how many jobs Parallel. If not set, 2 jobs are run parallel -r: use redhat base paths instead of debian -g: do not import globals file -n: dry run, do not import or change anything EOT } _port=5432 PORT=''; _host='local'; HOST=''; _encoding='UTF8'; set_encoding=''; ERROR=0; REDHAT=0; IMPORT_GLOBALS=1; TEMPLATEDB='template0'; # truly empty for restore DUMP_FOLDER=''; MAX_JOBS=''; BC='/usr/bin/bc'; PORT_REGEX="^[0-9]{4,5}$"; OPTARG_REGEX="^-"; DRY_RUN=0; PG_PARAM_HOST=(); PG_PARAM_PORT=(); # options check while getopts ":f:j:h:p:e:grnm" opt; do # pre test for unfilled if [ "${opt}" = ":" ] || [[ "${OPTARG-}" =~ ${OPTARG_REGEX} ]]; then if [ "${opt}" = ":" ]; then CHECK_OPT=${OPTARG}; else CHECK_OPT=${opt}; fi; case ${CHECK_OPT} in h) echo "-h needs a host name"; ERROR=1; ;; f) echo "-f needs a folder name"; ERROR=1; ;; p) echo "-h needs a port number"; ERROR=1; ;; e) echo "-e needs an encoding"; ERROR=1; ;; j) echo "-j needs a numeric value for parallel jobs"; ERROR=1; ;; esac fi; case $opt in # f|file) f) DUMP_FOLDER=$OPTARG; ;; # j|jobs) j) MAX_JOBS=${OPTARG}; ;; # e|encoding) e) if [ -z "$encoding" ]; then encoding=$OPTARG; fi; ;; # h|hostname) h) if [ -z "$host" ]; then # do not set if local name (uses socket) if [ "$OPTARG" != "local" ]; then PG_PARAM_HOST=("-h" "${OPTARG}"); fi; _host=$OPTARG; HOST=$OPRTARG; fi; ;; # p|port) p) if [ -z "$port" ]; then PG_PARAM_PORT=("-p" "${OPTARG}"); _port=$OPTARG; PORT=$OPTARG; fi; ;; # g|globals) g) IMPORT_GLOBALS=0; ;; # r|redhat) r) REDHAT=1; ;; # n|dry-run) n) DRY_RUN=1; ;; # m|help) m) usage; exit 0; ;; \?) echo -e "\n Option does not exist: $OPTARG\n"; usage; exit 1; ;; esac; done; if [ "${ERROR}" -eq 1 ]; then exit 0; fi; if [ "$REDHAT" -eq 1 ]; then # Redhat base path (for non official ones would be '/usr/pgsql-' PG_BASE_PATH='/usr/pgsql-' else # Debian base path PG_BASE_PATH='/usr/lib/postgresql/'; fi; # check that the port is a valid number if ! [[ "$_port" =~ $PORT_REGEX ]]; then echo "The port needs to be a valid number: $_port"; exit 1; fi; NUMBER_REGEX="^[0-9]{1,}$"; # find the max allowed jobs based on the cpu count # because setting more than this is not recommended # so this fails in vmware hosts were we have random cpus assigned _max_jobs=$(nproc --all); # if the MAX_JOBS is not number or smaller 1 or greate _max_jobs if [ -n "${MAX_JOBS}" ]; then # check that it is a valid number if [[ ! ${MAX_JOBS} =~ ${NUMBER_REGEX} ]]; then echo "Please enter a number for the -j option"; exit 1; fi; if [ "${MAX_JOBS}" -lt 1 ] || [ "${MAX_JOBS}" -gt "${_max_jobs}" ]; then echo "The value for the jobs option -j cannot be smaller than 1 or bigger than ${_max_jobs}"; exit 1; fi; else # auto set the MAX_JOBS based on the cpu count MAX_JOBS=${_max_jobs}; fi; if [ "$DUMP_FOLDER" = '' ]; then echo "Please provide a source folder for the dump files with the -f option"; exit; fi; # check that source folder is there if [ ! -d "$DUMP_FOLDER" ]; then echo "Folder '$DUMP_FOLDER' does not exist"; exit; fi; LOG_PATH=$DUMP_FOLDER'/logs/'; # create logs folder if missing if [ ! -d "$LOG_PATH" ]; then echo "+ Creating '$LOG_PATH' folder"; mkdir -p "$LOG_PATH"; if [ ! -d "$LOG_PATH" ]; then echo "[!] Creation of '$LOG_PATH' folder failed"; exit 1; fi; fi; # check if we have the 'bc' command available or not if [ -f "${BC}" ]; then BC_OK=1; else BC_OK=0; fi; # METHOD: convert_time # PARAMS: timestamp in seconds or with milliseconds (nnnn.nnnn) # RETURN: formated string with human readable time (d/h/m/s) # CALL : var=$(convert_time $timestamp); # DESC : converts a timestamp or a timestamp with float milliseconds to a human readable format # output is in days/hours/minutes/seconds function convert_time { timestamp=${1}; # round to four digits for ms timestamp=$(printf "%1.4f" "$timestamp"); # get the ms part and remove any leading 0 ms=$(echo "${timestamp}" | cut -d "." -f 2 | sed -e 's/^0*//'); timestamp=$(echo "${timestamp}" | cut -d "." -f 1); timegroups=(86400 3600 60 1); # day, hour, min, sec timenames=("d" "h" "m" "s"); # day, hour, min, sec output=( ); time_string=''; for timeslice in "${timegroups[@]}"; do # floor for the division, push to output if [ ${BC_OK} -eq 1 ]; then output[${#output[*]}]=$(echo "${timestamp}/${timeslice}" | bc); timestamp=$(echo "${timestamp}%${timeslice}" | bc); else output[${#output[*]}]=$(awk "BEGIN {printf \"%d\", ${timestamp}/${timeslice}}"); timestamp=$(awk "BEGIN {printf \"%d\", ${timestamp}%${timeslice}}"); fi; done; for ((i=0; i<${#output[@]}; i++)); do if [ "${output[$i]}" -gt 0 ] || [ -n "$time_string" ]; then if [ -n "${time_string}" ]; then time_string=${time_string}" "; fi; time_string=${time_string}${output[$i]}${timenames[$i]}; fi; done; # milliseconds must be filled, but we also check that they are non "nan" string # that can appear in the original value if [ -n "${ms}" ] && [ "${ms}" != "nan" ]; then if [ "${ms}" -gt 0 ]; then time_string="${time_string} ${ms}ms"; fi; fi; # just in case the time is 0 if [ -z "${time_string}" ]; then time_string="0s"; fi; echo -n "${time_string}"; } # default version (for folder) PG_PATH_VERSION='15/'; PG_PATH_BIN='bin/'; # postgresql binaries PG_DROPDB="dropdb"; PG_CREATEDB="createdb"; PG_CREATELANG="createlang"; PG_RESTORE="pg_restore"; PG_CREATEUSER="createuser"; PG_PSQL="psql"; # default port and host EXCLUDE_LIST="pg_globals"; # space separated LOG_FILE="tee -a "$LOG_PATH/PG_RESTORE_DB_FILE.$(date +"%Y%m%d_%H%M%S").log""; # get the count for DBs to import db_count=$(find "${DUMP_FOLDER}" -name "*.sql" -print | wc -l); # start info if [ "${DUMP_FOLDER}" = "." ]; then _DUMP_FOLDER="[current folder]"; else _DUMP_FOLDER=${DUMP_FOLDER}; fi; if [ -z "${HOST}" ]; then _HOST="[auto host]"; else _HOST=${HOST}; fi; if [ -z "${PORT}" ]; then _PORT="[auto port]"; else _PORT=${PORT}; fi; if [ ${DRY_RUN} ]; then echo "**** [DRY RUN] ****"; fi; echo "= Will import $db_count databases from $_DUMP_FOLDER" | $LOG_FILE; echo "= into the DB server $_HOST:$_PORT" | $LOG_FILE; echo "= running $MAX_JOBS jobs" | $LOG_FILE; echo "= import logs: $LOG_PATH" | $LOG_FILE; echo "" | $LOG_FILE; pos=1; # go through all the files an import them into the database MASTERSTART=$(date +"%s"); master_start_time=$(date +"%F %T"); # first import the pg_globals file if this is requested, default is yes if [ "$IMPORT_GLOBALS" -eq 1 ]; then start_time=$(date +"%F %T"); START=$(date +"%s"); # get the pg_globals file echo "=[Globals Restore]=START=[$start_time]==================================================>" | $LOG_FILE; # get newest and only the first one file=$(find "$DUMP_FOLDER" -name "pg_global*" -type f -printf "%Ts\t%p\n" | sort -nr | head -1); filename=$(basename "$file"); # the last _ is for version 10 or higher # db version, without prefix of DB type version=$(echo "$filename" | cut -d "." -f 4 | cut -d "-" -f 2 | cut -d "_" -f 1); cut_pos=4; # if this is < 10 then we need the second part too if [ "${version}" -lt 10 ]; then # db version, second part (after .) version=$version'.'$(echo "$filename" | cut -d "." -f 5 | cut -d "_" -f 1); cut_pos=5; fi; # hostname of original DB, can be used as target host too __host=$(echo "$filename" | cut -d "." -f ${cut_pos} | cut -d "_" -f 2); # port of original DB, can be used as target port too __port=$(echo "$filename" | cut -d "." -f ${cut_pos} | cut -d "_" -f 3); # override file port over given port if it differs and is valid if [ -z "$_port" ] && [ "$__port" != "$_port" ] && [[ "$__port" =~ $PORT_REGEX ]] ; then _port=$__port; PG_PARAM_PORT=("-p" "$_port"); fi; if [ -z "$_host" ] && [ "$__host" != "local" ]; then _host=$__host; PG_PARAM_HOST=("-h" "${_host}"); fi; # create the path to the DB from the DB version in the backup file if [ -n "$version" ]; then PG_PATH_VERSION_LOCAL="${version}/"; else PG_PATH_VERSION_LOCAL="${PG_PATH_VERSION}"; fi; PG_PATH="${PG_BASE_PATH}${PG_PATH_VERSION_LOCAL}${PG_PATH_BIN}"; echo "+ Restore globals file: $filename to [$_host:$_port] @ $(date +"%F %T")" | $LOG_FILE; _PG_PARAMS=("-U" "postgres"); _PG_PARAMS+=("${PG_PARAM_HOST[@]}"); _PG_PARAMS+=("${PG_PARAM_PORT[@]}"); _PG_PARAMS+=("-f" "$file" "-e" "-q" "-X" "template1"); PG_COMMAND=("${PG_PATH}${PG_PSQL}" "${_PG_PARAMS[@]}"); if [ ${DRY_RUN} -eq 0 ]; then "${PG_COMMAND[@]}" | $LOG_FILE; else echo "${PG_COMMAND[*]}" | $LOG_FILE; fi; DURATION=$(($(date +"%s")-START)); printf "=[Globals Restore]=END===[%s]========================================================>\n" "$(convert_time ${DURATION})" | $LOG_FILE; fi; for file in "$DUMP_FOLDER/"*.sql; do start_time=$(date +"%F %T"); START=$(date +"%s"); echo "=[$pos/$db_count]=START=[$start_time]==================================================>" | $LOG_FILE; # the encoding set_encoding=''; # get the filename filename=$(basename "$file"); # get the databse, user # default file name is ...-____