2021-12-14 06:35:56 +09:00
#!/bin/bash
# Author: Clemens Schwaighofer
# Description:
# Drop and restore one database
function usage ( )
{
cat <<- EOT
Restores a single database dump to a database
Usage: ${ 0 ##/*/ } -o <DB OWNER> -d <DB NAME> -f <FILE NAME> [ -h <DB HOST>] [ -p <DB PORT>] [ -e <ENCODING>] [ -i <POSTGRES VERSION>] [ -j <JOBS>] [ -s] [ -r| -a] [ -n]
-o <DB OWNER>: The user who will be owner of the database to be restored
-d <DB NAME>: The database to restore the file to
-f <FILE NAME>: the data that should be loaded
-h <DB HOST>: optional hostname, if not given 'localhost' is used. Use 'local' to use unix socket
-p <DB PORT>: optional port number, if not given '5432' is used
-e <ENCODING>: optional encoding name, if not given 'UTF8' is used
-i <POSTGRES VERSION>: optional postgresql version in the format X.Y, if not given the default is used ( current active)
-j <JOBS>: Run how many jobs Parallel. If not set, 2 jobs are run parallel
-s: Restore only schema, no data
-r: use redhat base paths instead of debian
-a: use amazon base paths instead of debian
-n: dry run, do not do anything, just test flow
EOT
}
_port = 5432
_host = 'local' ;
_encoding = 'UTF8' ;
role = '' ;
schema = '' ;
NO_ASK = 0;
TEMPLATEDB = 'template0' ;
SCHEMA_ONLY = 0;
REDHAT = 0;
AMAZON = 0;
DRY_RUN = 0;
BC = '/usr/bin/bc' ;
PORT_REGEX = " ^[0-9]{4,5} $" ;
OPTARG_REGEX = "^-" ;
MAX_JOBS = '' ;
# if we have options, set them and then ignore anything below
while getopts ":o:d:h:f:p:e:i:j:raqnms" opt; do
# pre test for unfilled
if [ " ${ opt } " = ":" ] || [ [ " ${ OPTARG - } " = ~ ${ OPTARG_REGEX } ] ] ; then
if [ " ${ opt } " = ":" ] ; then
CHECK_OPT = ${ OPTARG } ;
else
CHECK_OPT = ${ opt } ;
fi ;
case ${ CHECK_OPT } in
o)
echo "-o needs an owner name" ;
ERROR = 1;
; ;
d)
echo "-d needs a database name" ;
ERROR = 1;
; ;
h)
echo "-h needs a host name" ;
ERROR = 1;
; ;
f)
echo "-f needs a file name" ;
ERROR = 1;
; ;
p)
echo "-h needs a port number" ;
ERROR = 1;
; ;
e)
echo "-e needs an encoding" ;
ERROR = 1;
; ;
i)
echo "-i needs a postgresql version" ;
ERROR = 1;
; ;
j)
echo "-j needs a numeric value for parallel jobs" ;
ERROR = 1;
; ;
esac
fi ;
case $opt in
o| owner)
if [ -z " $owner " ] ; then
owner = $OPTARG ;
# if not standard user we need to set restore role
# so tables/etc get set to new user
role = " --no-owner --role $owner " ;
fi ;
; ;
d| database)
if [ -z " $database " ] ; then
database = $OPTARG ;
fi ;
; ;
e| encoding)
if [ -z " $encoding " ] ; then
encoding = $OPTARG ;
fi ;
; ;
f| file)
if [ -z " $file " ] ; then
file = $OPTARG ;
fi ;
; ;
h| hostname)
2022-01-17 15:12:12 +09:00
if [ -z " $_host " ] ; then
2021-12-14 06:35:56 +09:00
# if local it is socket
if [ " $OPTARG " != "local" ] ; then
host = '-h ' $OPTARG ;
else
host = '' ;
fi ;
_host = $OPTARG ;
fi ;
; ;
p| port)
if [ -z " $port " ] ; then
port = '-p ' $OPTARG ;
_port = $OPTARG ;
fi ;
; ;
i| ident)
if [ -z " $ident " ] ; then
ident = $OPTARG ;
fi ;
; ;
j| jobs )
MAX_JOBS = ${ OPTARG } ;
; ;
q| quiet)
NO_ASK = 1;
; ;
r| redhat)
REDHAT = 1;
; ;
a| amazon)
AMAZON = 1;
; ;
n| dry-run)
DRY_RUN = 1;
; ;
s| schema-only)
SCHEMA_ONLY = 1
schema = '-s' ;
; ;
m| help )
usage;
exit 0;
; ;
\? )
echo -e " \n Option does not exist: $OPTARG \n " ;
usage;
exit 1;
; ;
esac ;
done ;
if [ " $REDHAT " -eq 1 ] && [ " $AMAZON " -eq 1 ] ; then
echo "You cannot set the -a and -r flag at the same time" ;
exit 1;
fi ;
# check that the port is a valid number
if ! [ [ " $_port " = ~ $PORT_REGEX ] ] ; then
echo " The port needs to be a valid number: $_port " ;
exit 1;
fi ;
NUMBER_REGEX = " ^[0-9]{1,} $" ;
# find the max allowed jobs based on the cpu count
# because setting more than this is not recommended
cpu = $( cat /proc/cpuinfo | grep processor | tail -n 1) ;
_max_jobs = $[ ${ cpu ##* : } +1 ] # +1 because cpu count starts with 0
# if the MAX_JOBS is not number or smaller 1 or greate _max_jobs
if [ ! -z " ${ MAX_JOBS } " ] ; then
# check that it is a valid number
if ! [ [ " $MAX_JOBS " = ~ $NUMBER_REGEX ] ] ; then
echo "Please enter a number for the -j option" ;
exit 1;
fi ;
if [ " ${ MAX_JOBS } " -lt 1 ] || [ " ${ MAX_JOBS } " -gt ${ _max_jobs } ] ; then
echo " The value for the jobs option -j cannot be smaller than 1 or bigger than ${ _max_jobs } " ;
exit 1;
fi ;
else
# auto set the MAX_JOBS based on the cpu count
MAX_JOBS = ${ _max_jobs } ;
fi ;
# check if we have the 'bc' command available or not
if [ -f " ${ BC } " ] ; then
BC_OK = 1;
else
BC_OK = 0;
fi ;
if [ ! -f " ${ file } " ] ; then
echo "File name needs to be provided or file could not be found" ;
exit 1;
fi ;
# METHOD: convert_time
# PARAMS: timestamp in seconds or with milliseconds (nnnn.nnnn)
# RETURN: formated string with human readable time (d/h/m/s)
# CALL : var=$(convert_time $timestamp);
# DESC : converts a timestamp or a timestamp with float milliseconds
# to a human readable format
# output is in days/hours/minutes/seconds
function convert_time
{
timestamp = ${ 1 } ;
# round to four digits for ms
timestamp = $( printf "%1.4f" $timestamp ) ;
# get the ms part and remove any leading 0
ms = $( echo ${ timestamp } | cut -d "." -f 2 | sed -e 's/^0*//' ) ;
timestamp = $( echo ${ timestamp } | cut -d "." -f 1) ;
timegroups = ( 86400 3600 60 1) ; # day, hour, min, sec
timenames = ( "d" "h" "m" "s" ) ; # day, hour, min, sec
output = ( ) ;
time_string = ;
for timeslice in ${ timegroups [@] } ; do
# floor for the division, push to output
if [ ${ BC_OK } -eq 1 ] ; then
output[ ${# output [*] } ] = $( echo " ${ timestamp } / ${ timeslice } " | bc) ;
timestamp = $( echo " ${ timestamp } % ${ timeslice } " | bc) ;
else
output[ ${# output [*] } ] = $( awk " BEGIN {printf \"%d\", ${ timestamp } / ${ timeslice } } " ) ;
timestamp = $( awk " BEGIN {printf \"%d\", ${ timestamp } % ${ timeslice } } " ) ;
fi ;
done ;
for ( ( i = 0; i<${# output [@] } ; i++) ) ; do
if [ ${ output [ $i ] } -gt 0 ] || [ ! -z " $time_string " ] ; then
if [ ! -z " ${ time_string } " ] ; then
time_string = ${ time_string } " " ;
fi ;
time_string = ${ time_string } ${ output [ $i ] } ${ timenames [ $i ] } ;
fi ;
done ;
if [ ! -z ${ ms } ] && [ ${ ms } -gt 0 ] ; ; then
time_string = ${ time_string } " " ${ ms } "ms" ;
fi ;
# just in case the time is 0
if [ -z " ${ time_string } " ] ; then
time_string = "0s" ;
fi ;
echo -n " ${ time_string } " ;
}
# for the auto find, we need to get only the filename, and therefore remove all path info
db_file = ` basename $file ` ;
# if file is set and exist, but no owner or database are given, use the file name data to get user & database
if [ -r " $file " ] && ( [ ! " $owner " ] || [ ! " $database " ] || [ ! " $encoding " ] ) ; then
# file name format is
# <database>.<owner>.<encoding>.<db type>-<version>_<host>_<port>_<date>_<time>_<sequence>
# we only are interested in the first two
_database = ` echo $db_file | cut -d "." -f 1` ;
_owner = ` echo $db_file | cut -d "." -f 2` ;
__encoding = ` echo $db_file | cut -d "." -f 3` ;
# set the others as optional
_ident = ` echo $db_file | cut -d "." -f 4 | cut -d "-" -f 2` ; # db version first part
_ident = $_ident '.' ` echo $db_file | cut -d "." -f 5 | cut -d "_" -f 1` ; # db version, second part (after .)
__host = ` echo $db_file | cut -d "." -f 4 | cut -d "_" -f 2` ;
__port = ` echo $db_file | cut -d "." -f 4 | cut -d "_" -f 3` ;
# if any of those are not set, override by the file name settings
if [ ! " $owner " ] ; then
owner = $_owner ;
fi ;
if [ ! " $database " ] ; then
database = $_database ;
fi ;
# port hast to be a valid number, at least 4 digits long and maximum 5 digits
if [ ! " $port " ] && [ [ $__port = ~ $PORT_REGEX ] ] ; then
port = '-p ' $__port ;
_port = $__port ;
fi ;
# unless it is local and no command line option is set, set the target connection host
if [ ! " $host " ] && [ " $__host " != "local" ] && [ " $_host " != "local" ] ; then
host = '-h ' $__host ;
_host = $__host ;
fi ;
if [ ! " $encoding " ] ; then
if [ ! -z " $__encoding " ] ; then
encoding = $__encoding ;
else
encoding = $_encoding ;
fi ;
fi ;
if [ ! " $ident " ] ; then
ident = $_ident ;
fi ;
fi ;
# if no user or database, exist
if [ ! " $file " ] || [ ! -f " $file " ] ; then
echo "The file has not been set or the file given could not be found." ;
exit 1;
fi ;
if [ ! " $owner " ] || [ ! " $encoding " ] || [ ! " $database " ] ; then
echo "The Owner, database name and encoding could not be set automatically, the have to be given as command line options." ;
exit 1;
fi ;
if [ " $REDHAT " -eq 1 ] ; then
# Debian base path
PG_BASE_PATH = '/usr/pgsql-' ;
elif [ " $AMAZON " -eq 1 ] ; then
PG_BASE_PATH = '/usr/lib64/pgsql' ;
else
# Redhat base path (for non official ones would be '/usr/pgsql-'
PG_BASE_PATH = '/usr/lib/postgresql/' ;
fi ;
# if no ident is given, try to find the default one, if not fall back to pre set one
if [ ! -z " $ident " ] ; then
PG_PATH = $PG_BASE_PATH $ident '/bin/' ;
if [ ! -d " $PG_PATH " ] ; then
ident = '' ;
fi ;
fi ;
if [ -z " $ident " ] ; then
# try to run psql from default path and get the version number
ident = $( pgv = $( pg_dump --version| grep "pg_dump" | cut -d " " -f 3) ; if [ [ $( echo " ${ pgv } " | cut -d "." -f 1) -ge 10 ] ] ; then echo " ${ pgv } " | cut -d "." -f 1; else echo " ${ pgv } " | cut -d "." -f 1,2; fi ) ;
if [ ! -z " $ident " ] ; then
PG_PATH = $PG_BASE_PATH $ident '/bin/' ;
else
# hard setting
ident = '9.6' ;
PG_PATH = $PG_BASE_PATH '9.6/bin/' ;
fi ;
fi ;
PG_DROPDB = $PG_PATH "dropdb" ;
PG_CREATEDB = $PG_PATH "createdb" ;
PG_CREATELANG = $PG_PATH "createlang" ;
PG_RESTORE = $PG_PATH "pg_restore" ;
PG_PSQL = $PG_PATH "psql" ;
TEMP_FILE = "temp" ;
LOG_FILE_EXT = $database .` date +"%Y%m%d_%H%M%S" ` ".log" ;
# core abort if no core files found
if [ ! -f $PG_PSQL ] || [ ! -f $PG_DROPDB ] || [ ! -f $PG_CREATEDB ] || [ ! -f $PG_RESTORE ] ; then
echo "One of the core binaries (psql, pg_dump, createdb, pg_restore) could not be found." ;
echo " Search Path: ${ PG_PATH } " ;
echo "Perhaps manual ident set with -i is necessary" ;
echo "Backup aborted" ;
exit 0;
fi ;
# check if port / host settings are OK
# if I cannot connect with user postgres to template1, the restore won't work
output = ` echo "SELECT version();" | $PG_PSQL -U postgres $host $port template1 -q -t -X -A -F "," 2>& 1` ;
found = ` echo " $output " | grep "PostgreSQL" ` ;
# if the output does not have the PG version string, we have an error and abort
if [ -z " $found " ] ; then
echo " Cannot connect to the database: $output " ;
exit 1;
fi ;
echo " Will drop database ' $database ' on host ' $_host : $_port ' and load file ' $file ' with user ' $owner ', set encoding ' $encoding ' and use database version ' $ident ' " ;
if [ $SCHEMA_ONLY -eq 1 ] ; then
echo "!!!!!!! WILL ONLY RESTORE SCHEMA, NO DATA !!!!!!!" ;
fi ;
if [ $NO_ASK -eq 1 ] ; then
go = 'yes' ;
else
echo "Continue? type 'yes'" ;
read go;
fi ;
if [ " $go " != 'yes' ] ; then
echo "Aborted" ;
exit;
else
start_time = ` date +"%F %T" ` ;
START = ` date +'%s' ` ;
echo " Drop DB $database [ $_host : $_port ] @ $start_time " ;
# DROP DATABASE
if [ $DRY_RUN -eq 0 ] ; then
$PG_DROPDB -U postgres $host $port $database ;
else
echo $PG_DROPDB -U postgres $host $port $database ;
fi ;
# CREATE DATABASE
echo " Create DB $database with $owner and encoding $encoding on [ $_host : $_port ] @ `date + " %F %T"`" ;
if [ $DRY_RUN -eq 0 ] ; then
$PG_CREATEDB -U postgres -O $owner -E $encoding -T $TEMPLATEDB $host $port $database ;
else
echo $PG_CREATEDB -U postgres -O $owner -E $encoding -T $TEMPLATEDB $host $port $database ;
fi ;
# CREATE plpgsql LANG
if [ -f $PG_CREATELANG ] ; then
echo " Create plpgsql lang in DB $database on [ $_host : $_port ] @ `date + " %F %T"`" ;
if [ $DRY_RUN -eq 0 ] ; then
$PG_CREATELANG -U postgres plpgsql $host $port $database ;
else
echo $PG_CREATELANG -U postgres plpgsql $host $port $database ;
fi ;
fi ;
# RESTORE DATA
echo " Restore data from $file to DB $database on [ $_host : $_port ] with Jobs $MAX_JOBS @ `date + " %F %T"`" ;
if [ $DRY_RUN -eq 0 ] ; then
$PG_RESTORE -U postgres -d $database -F c -v -c $schema -j $MAX_JOBS $host $port $role $file 2>restore_errors.$LOG_FILE_EXT ;
else
echo $PG_RESTORE -U postgres -d $database -F c -v -c $schema -j $MAX_JOBS $host $port $role $file 2>restore_errors.$LOG_FILE_EXT ;
fi ;
# BUG FIX FOR POSTGRESQL 9.6.2 db_dump
# it does not dump the default public ACL so the owner of the DB cannot access the data, check if the ACL dump is missing and do a basic restore
if [ -z $( $PG_RESTORE -l $file | grep -- "ACL - public postgres" ) ] ; then
echo " Fixing missing basic public schema ACLs from DB $database [ $_host : $_port ] @ `date + " %F %T"`" ;
# grant usage on schema public to public;
# grant create on schema public to public;
echo "GRANT USAGE ON SCHEMA public TO public;" | $PG_PSQL -U postgres -Atq $host $port $database ;
echo "GRANT CREATE ON SCHEMA public TO public;" | $PG_PSQL -U postgres -Atq $host $port $database ;
fi ;
# SEQUENCE RESET DATA COLLECTION
echo " Resetting all sequences from DB $database [ $_host : $_port ] @ `date + " %F %T"`" ;
reset_query = "SELECT 'SELECT SETVAL(' ||quote_literal(S.relname)|| ', MAX(' ||quote_ident(C.attname)|| ') ) FROM ' ||quote_ident(T.relname)|| ';' FROM pg_class AS S, pg_depend AS D, pg_class AS T, pg_attribute AS C WHERE S.relkind = 'S' AND S.oid = D.objid AND D.refobjid = T.oid AND D.refobjid = C.attrelid AND D.refobjsubid = C.attnum ORDER BY S.relname;" ;
if [ $DRY_RUN -eq 0 ] ; then
echo " ${ reset_query } " | $PG_PSQL -U postgres -Atq $host $port -o $TEMP_FILE $database
$PG_PSQL -U postgres $host $port -e -f $TEMP_FILE $database 1>output_sequence.$LOG_FILE_EXT 2>errors_sequence.$database .$LOG_FILE_EXT ;
rm $TEMP_FILE ;
else
echo " ${ reset_query } " ;
echo $PG_PSQL -U postgres $host $port -e -f $TEMP_FILE $database 1>output_sequence.$LOG_FILE_EXT 2>errors_sequence.$database .$LOG_FILE_EXT ;
fi ;
echo " Restore of data $file for DB $database [ $_host : $_port ] finished " ;
DURATION = $[ ` date +'%s' ` -$START ] ;
echo " Start at $start_time and end at `date + " %F %T" ` and ran for $( convert_time ${ DURATION } ) " ;
echo "=== END RESTORE" >>restore_errors.$LOG_FILE_EXT ;
fi ;