#!/bin/sh -u # # Freshmeat release: 2004-09-21 # # =head1 NAME # # histbackup - backup a directory using an incremental technique # # =head1 SYNOPSIS # # histbackup [options...] [host:]source backupdir [rsyncopts...] # # histbackup [options...] --inplace parent [rsyncopts...] # # histbackup [options...] --no-rsync parent # # =head1 DESCRIPTION # # I # makes incremental backups of a directory tree # in a set of directories named by dates, or date-time if the date already exists. # The new backup is prepopulated by hardlinks of each file from the previous backup # and then updates with rsync(1). # This arranges that the only new content of each new backup is fresh copies of the changed files. # In this way each backup directory is a full copy of the source directory # but the disc space cost is that of an incremental backup. # Because it uses rsync, # it is possible to efficient backup remote directories in this manner. # The companion script I keeps the growth under control. # # =head1 SETUP AND USE # # Before deployment, decide who will own and do the backups. # Do all the local (backup end) setup as that user. # If you're not a sysadmin then that user is probably yourself. # # Generally, setup is as follows: # # =over 4 # # =item Create the backup area. # # Decide where the backup data will be. Make an empty directory to hold it: # # mkdir /path/to/backup/area # # =item Arrange access to the source data # # If the data to backup are local to this machine # you merely need read access to it. # If the data are remote # you will need ssh(1) access to the remote host as a user with read access to the data there. # Test it: # # ssh user@remote ls -l /path/to/source/directory # # =item Perform a test backup # # Histbackup does the right thing when the backup area is empty # so just: # # histbackup -x user@remote:/path/to/source/directory /path/to/backup/area # # =item Schedule your backups # # If these backups are to happen regularly # a crontab(5) entry should be made to do so. # Use "C" to make this. # Example: # # MAILTO=backup-admin@yoursite # PATH=$PATH:/opt/css/bin # 0 * * * * histbackup -x user@remote:/path/to/source/directory /path/to/backup/area # # That will run a I every day at midnight, # delivering the output of the run by email to B. # # Note that cron's environment is very spartan # (it does not source C or C<~/.profile>) # and so you may need to arrange that the CSS package's environment is present. # Also, # because cron is a batch situation, # your ssh access will almost certainly require a special purpose passphraseless key # to access the remote host. # # =back # trace= symlink=LATEST src= dest= mono= inplace= linkonly= rrsync=rsync includefile= rsopts= verbose= [ -t 1 ] && verbose=1 cmd=`basename "$0"` usage="Usage: $cmd [options...] [host:]source backupdir [rsyncopts...] $cmd [options...] --inplace parent [rsyncopts...] $cmd [options...] --no-rsync parent -x Trace execution. Recite linktree and rsync invocations. -v, --verbose Verbose. Passed to rsync. --inplace In addition to the dated subdirectories in the backupdir, take the source to be backupdir/current. Turns off --symlink. --no-delete, --mono Monotonic growth - don't pass --delete to the rsync pass. --delete, --delete-excluded Passed to rsync. --no-rsync, --linkonly Don't do the rsync pass - just set up the new link tree and report its basename. --rsync-path rrsync Execute the command \"rrsync\" as the remote rsync. Passed to rsync. --symlink name Make a symlink named \"name\" pointing at the latest backup. Default: $symlink Use the empty string for no symlink." badopts= # have "cp -rpl"? otherwise rely on my linktree script if [ "x`cp --help 2>&1 | grep ' -[rpl][, ]' | wc -l`" = x3 ] then mklinktree(){ $trace cp -rpl "$1" "$2"; } cpr(){ $trace cp -rp "$1" "$2"; } else mklinktree(){ $trace linktree "$1" "$2"; } cpr(){ $trace cpdir "$1" "$2"; } fi daycode(){ date '+%Y-%m-%d'; } datecode(){ date '+%Y-%m-%d-%H:%M:%S'; } # rsync capabilities at local end rsync_has_link_dest= rsv=`rsync --version | sed -n 's/^rsync *version \([1-9]\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\) .*/\1 \2 \3/p'` if [ -n "$rsv" ] && rsvn=`set -- $rsv; printf "%d%03d%03d\n" "$@"` then [ "$rsvn" -ge 2005006 ] && rsync_has_link_dest=1 ## other end needs proto 29 also ## [ "$rsvn" -ge 2006004 ] && rsopts="$rsopts --fuzzy" fi # =head1 OPTIONS # # =over 4 # while [ $# -gt 0 ] do case $1 in # =item --inplace # # Specify the parent of the source directory; # backs up B/current> to B/I>. # --inplace) inplace=1 symlink= ;; # =item --no-delete, --mono # # Monotonic growth. # Suppresses passing the B<--delete> option to rsync. # --no-delete|--mono) mono=1 ;; # =item --delete # # Passed to rsync. # # =item --delete-excluded # # Passed to rsync. # --delete|--delete-excluded) mono= rsopts="$rsopts $1" ;; # =item --no-rsync, --linkonly # # Specify the parent backup directory only and omit the rsync. # Emits the backup directory linktree subdirectory name on standard output. # --no-rsync|--linkonly) linkonly=1 exec 3>&1 1>&2 # ensure clean stdout ;; # =item --rsync-path rrsync # # Passed to rsync. # Specifies the pathname to the remote rsync executable. # --rsync-path) rrsync=$2; shift ;; # =item --symlink name # # After the backup is done, # make a symbolic link named I pointing at the latest backup directory. # This is useful so that other programs may trivially reference the latest # backup without having to figure out its name. # Eg: B<--symlink LATEST>. # --symlink) symlink=$2; shift ;; # =item -v, --verbose # # Turn on verbose mode. # Also passes B<-v> to rsync. # -v|--verbose) verbose=1 rsopts="$rsopts -v" ;; # =item -P # # Passes B<-P> to rsync. # -P) rsopts="$rsopts -P" ;; # =item -x # # Trace execution of important commands (rsync, linktree etc). # -x) trace=set-x ;; --) shift; break ;; -*) echo "$cmd: unrecognised option: $1" >&2 badopts=1 ;; *) break ;; esac shift done # # =back # if [ $linkonly ] then if [ $# -lt 1 ] then echo "$cmd: missing parent" >&2 badopts=1 else dest=$1 shift fi else if [ $inplace ] then # src is the "current" subdir of the supplied dest if [ $# -lt 1 ] then echo "$cmd: missing parent" >&2 badopts=1 else src=$1/current dest=$1 shift fi else if [ $# -lt 2 ] then echo "$cmd: missing source or backupdir" >&2 badopts=1 else src=$1 dest=$2 shift; shift fi fi fi if [ $linkonly ] && [ $# -gt 0 ] then echo "$cmd: --linkonly: extra arguments after parent: $*" >&2 badopts=1 fi [ -d "$dest/." ] || { echo "$cmd: backup dir $dest: not a directory" >&2 badopts=1 } case "$src" in *:*) # remote - don't check ;; *) [ -d "$src/." ] || { echo "$cmd: source dir $src: not a directory" >&2 badopts=1 } ;; esac [ $badopts ] && { echo "$usage" >&2; exit 2; } if [ $verbose ] then echo=echo else echo=: fi # =head1 CUSTOMISATION FILE # # If the file I/F<.hbinclude> is present # the option B<"--include-from=>I/F<.hbinclude> # will be added to the I command invocation. # In this way the content backed up may be tuned. # See rsync(1). # hbinclude=$dest/.hbinclude [ -s "$hbinclude" ] && set -- ${1+"$@"} "--include-from=$hbinclude" # locate to most recent backup, if any lastbackup=`cd "$dest" || exit 1; ls -dr [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]* 2>/dev/null | sed 1q` today=`daycode` || exit 1 thisbackup=$today if [ -d "$dest/$thisbackup/." ] then now=`datecode` || exit 1 thisbackup=$now if [ -d "$dest/$thisbackup/." ] then echo "$cmd: $dest/$today and $dest/$now already exist, aborting" >&2 exit 1 fi fi if [ $mono ]; then delete= else delete=--delete fi xit=0 if [ -n "$lastbackup" -a -d "$dest/$lastbackup/." ] then $echo "existing backup: linking $lastbackup to $thisbackup ..." ( cd "$dest" || exit 1 mklinktree "$lastbackup" "$thisbackup" ) \ || echo "$cmd: warning: mklinktree $dest/$lastbackup $dest/$thisbackup fails" >&2 if [ $linkonly ] then echo "$thisbackup" >&3 else $echo "updating $thisbackup from $src ..." $trace rsync $rsopts -aH $delete --rsync-path "$rrsync" ${1+"$@"} "$src/." "$dest/$thisbackup/." xit=$? fi else if [ $linkonly ] then echo "$cmd: $dest: no subdirectory to link from" >&2 ls -la "$dest" >&2 exit 1 else $echo "fresh backup, just copying source ..." use_rsync= case "$src" in *:*) # probably remote directory - use rsync use_rsync=1 ;; *) # check if they supplied rsync options [ $# = 0 ] || use_rsync=1 ;; esac if [ $use_rsync ] then $trace mkdir "$dest/$thisbackup" || exit 1 $trace rsync $rsopts -aH --rsync-path "$rrsync" ${1+"$@"} "$src/." "$dest/$thisbackup/." else cpr "$src" "$dest/$thisbackup" fi xit=$? fi fi if [ -d "$dest/$thisbackup/." -a -n "$symlink" ] then ( cd "$dest" || exit 1 rm -f "$symlink" ln -s "$thisbackup" "$symlink" ) fi exit $xit # =head1 AUTHOR # # Cameron Simpson 03apr2000 # # =head1 SEE ALSO # # histbackup-prune(1cs), linktree(1cs), rsync(1), ssh(1), cp(1), cpdir(1cs) #