Changeset c0e6494


Ignore:
Timestamp:
Sep 12, 2013 7:08:56 AM (7 months ago)
Author:
Kris Moore <kris@…>
Branches:
master, 9.2-release, releng/10.0, releng/10.0.1
Children:
03c14f8
Parents:
651bd28
Message:

Update to lpreserver backend:

Added new "replicate init <dataset>" option, which will re-init the remote side of the replication server, cleaning up any corrupt datasets and preparing to sync fresh again.

Also added a check during replication, we won't try to kickoff a second replication task if a previous one is still running, to avoid collisions. In addition we will not delete any snapshots until the initial replication is finished, in order to prevent undercutting the active replication

Location:
src-sh/lpreserver
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • src-sh/lpreserver/backend/functions.sh

    r59f367e rc0e6494  
    257257  if [ "$2" = "sync" -a "$REPTIME" != "sync" ] ; then return 0; fi 
    258258 
     259  # Doing a replication task, check if one is in progress 
     260  export pidFile="${DBDIR}/.reptask-`echo ${LDATA} | sed 's|/|-|g'`" 
     261  if [ -e "${pidFile}" ] ; then 
     262     pgrep -F ${pidFile} >/dev/null 2>/dev/null 
     263     if [ $? -eq 0 ] ; then 
     264        echo_log "Skipped replication on $LDATA, previous replication is still running." 
     265        return 0 
     266     else 
     267        rm ${pidFile} 
     268     fi 
     269  fi 
     270 
     271  # Save this PID 
     272  echo "$$" > ${pidFile} 
     273 
    259274  # Is this a sync-task we do at the time of a snapshot? 
    260275  if [ "$2" = "sync" -a "$REPTIME" = "sync" ] ; then 
     
    286301  if [ "$lastSEND" = "$lastSNAP" ] ; then 
    287302     queue_msg "`date`: Last snapshot $lastSNAP is already marked as replicated!" 
     303     rm ${pidFile} 
    288304     return 1 
    289305  fi 
     
    332348  fi 
    333349 
     350  rm ${pidFile} 
    334351  return $zStatus 
    335352} 
     
    552569   exit $? 
    553570} 
     571 
     572init_rep_task() { 
     573 
     574  LDATA="$1" 
     575 
     576  repLine=`cat ${REPCONF} | grep "^${LDATA}:"` 
     577  if [ -z "$repLine" ] ; then return 0; fi 
     578  
     579  # We have a replication task for this set, get some vars 
     580  hName=`hostname` 
     581  REPHOST=`echo $repLine | cut -d ':' -f 3` 
     582  REPUSER=`echo $repLine | cut -d ':' -f 4` 
     583  REPPORT=`echo $repLine | cut -d ':' -f 5` 
     584  REPRDATA=`echo $repLine | cut -d ':' -f 6` 
     585 
     586  # First check if we even have a dataset on the remote 
     587  ssh -p ${REPPORT} ${REPUSER}@${REPHOST} zfs list ${REPRDATA}/${hName} 2>/dev/null >/dev/null 
     588  if [ $? -eq 0 ] ; then 
     589     # Lets cleanup the remote side 
     590     echo "Removing remote dataset: ${REPRDATA}/${hName}" 
     591     ssh -p ${REPPORT} ${REPUSER}@${REPHOST} zfs destroy -r ${REPRDATA}/${hName} 
     592     if [ $? -ne 0 ] ; then 
     593        echo "Warning: Could not delete remote dataset ${REPRDATA}/${hName}" 
     594     fi 
     595  fi 
     596 
     597  # Now lets mark none of our datasets as replicated 
     598  lastSEND=`zfs get -r backup:lpreserver ${LDATA} | grep LATEST | awk '{$1=$1}1' OFS=" " | tail -1 | cut -d '@' -f 2 | cut -d ' ' -f 1` 
     599  if [ -n "$lastSEND" ] ; then 
     600     zfs set backup:lpreserver=' ' ${LDATA}@$lastSEND 
     601  fi 
     602 
     603} 
  • src-sh/lpreserver/backend/runsnap.sh

    rac48c86 rc0e6494  
    1818fi 
    1919 
     20# Make sure this is a valid DATASET 
     21zfs list ${DATASET} >/dev/null 2>/dev/null 
     22if [ $? -ne 0 ] ; then 
     23   exit_err "Invalid dataset specified ${DATASET}" 
     24fi 
     25 
    2026# Create the snapshot now with the "auto-" tag 
    2127echo_log "Creating snapshot on ${DATASET}" 
     
    2329if [ $? -ne 0 ] ; then 
    2430  echo_log "ERROR: Failed creating snapshot on ${DATASET}" 
    25   queue_msg "Snapshot ERROR" "ERROR: Failed creating snapshot on ${DATASET} @ `date`\n\r`cat $CMDLOG`" 
     31  queue_msg "ERROR: Failed creating snapshot on ${DATASET} @ `date`\n\r`cat $CMDLOG`" 
    2632  snapStat=1 
    2733else 
     
    3844done 
    3945 
    40 # Do any pruning 
    41 num=0 
    42 for snap in $rSnaps 
    43 do 
    44    # Only remove snapshots which are auto-created, so we don't delete one the user 
    45    # made specifically 
    46    cur="`echo $snap | cut -d '-' -f 1`"  
    47    if [ "$cur" != "auto" ] ; then 
    48      continue; 
    49    fi 
     46# Before we start pruning, check if any replication is running 
     47skipPrune=0 
     48export pidFile="${DBDIR}/.reptask-`echo ${DATASET} | sed 's|/|-|g'`" 
     49if [ -e "${pidFile}" ] ; then 
     50   pgrep -F ${pidFile} >/dev/null 2>/dev/null 
     51   if [ $? -eq 0 ] ; then skipPrune=1; fi 
     52fi 
    5053 
    51    num=`expr $num + 1` 
    52    if [ $num -gt $KEEP ] ; then 
    53       echo_log "Pruning old snapshot: $snap" 
    54       rmZFSSnap "${DATASET}" "$snap" 
    55       if [ $? -ne 0 ] ; then 
    56         echo_log "ERROR: Failed pruning snapshot $snap on ${DATASET}" 
    57         queue_msg "Snapshot ERROR" "ERROR: Failed pruning snapshot $snap on ${DATASET} @ `date`\n\r`cat $CMDLOG`" 
    58         snapStat=1 
    59       else 
    60         queue_msg "Success pruning snapshot $snap on ${DATASET} @ `date`\n\r`cat $CMDLOG`" 
     54if [ $skipPrune -eq 1 ] ; then 
     55  # No pruning since replication is currently running 
     56  echo_log "WARNING: Skipped pruning snapshots on ${DATASET} while replication is running." 
     57  queue_msg "WARNING: Skipped pruning snapshots on ${DATASET} while replication is running." 
     58 
     59else 
     60  # Do any pruning 
     61  num=0 
     62  for snap in $rSnaps 
     63  do 
     64     # Only remove snapshots which are auto-created, so we don't delete one the user 
     65     # made specifically 
     66     cur="`echo $snap | cut -d '-' -f 1`"  
     67     if [ "$cur" != "auto" ] ; then 
     68       continue; 
     69     fi 
     70 
     71     num=`expr $num + 1` 
     72     if [ $num -gt $KEEP ] ; then 
     73        echo_log "Pruning old snapshot: $snap" 
     74        rmZFSSnap "${DATASET}" "$snap" 
     75        if [ $? -ne 0 ] ; then 
     76          echo_log "ERROR: Failed pruning snapshot $snap on ${DATASET}" 
     77          queue_msg "ERROR: Failed pruning snapshot $snap on ${DATASET} @ `date`\n\r`cat $CMDLOG`" 
     78          snapStat=1 
     79        else 
     80          queue_msg "Success pruning snapshot $snap on ${DATASET} @ `date`\n\r`cat $CMDLOG`" 
     81        fi 
    6182      fi 
    62     fi 
    63 done 
     83  done 
     84fi 
    6485 
    6586# If we failed at any point, sent out a notice 
  • src-sh/lpreserver/lpreserver

    r7c0c657 rc0e6494  
    146146 
    147147Available Flags: 
     148 
     149         add - Add a new replication target 
     150        init - Initialize the remote side again 
    148151        list - List replication targets 
    149          add - Add a new replication target 
    150152      remove - Remove a replication target 
    151153 
     
    165167 
    166168        Will schedule replication of tank1 to tankbackup/backups at 10PM, notated in 24hour time 
     169 
     170 
     171Init Options: 
     172 
     173        init <localdataset/zpool>  
     174 
     175        Will re-init the remote side of the replication. This can be useful 
     176        when your replication gets stuck. Doing this option will remove 
     177        all the data on the remote side, and require a full re-sync again. 
    167178 
    168179Remove Options: 
     
    486497            case ${1} in 
    487498                add) add_rep_task "$2" "$3" "$4" "$5" "$6" "$7" ;; 
     499                init) init_rep_task "$2" ;; 
    488500                list) list_rep_task ;; 
    489501                remove) cat ${REPCONF} | grep -q "^${2}:"  
Note: See TracChangeset for help on using the changeset viewer.