Changeset b5a7675


Ignore:
Timestamp:
Sep 11, 2013 8:55:41 PM (10 months ago)
Author:
Ken Moore <ken@…>
Branches:
master, 9.2-release, releng/10.0, releng/10.0.1, releng/10.0.2
Children:
4f9718b
Parents:
f698804
Message:

Add the ability to detect the results of a scrub on the system, as well as set up the system for detecting pool errors, and any resilvering. Just need to finish adding the parsing for the active tasks (as well as the disk errors).

Location:
src-qt4/life-preserver
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • src-qt4/life-preserver/LPBackend.h

    r284b216 rb5a7675  
    4444        //Mirroring Management 
    4545 
    46          
    47 private: 
     46        //General utility functions 
    4847        static QStringList getCmdOutput(QString); 
    49         static int runCmd(QString); 
     48        static int runCmd(QString);      
     49 
    5050}; 
    5151#endif 
  • src-qt4/life-preserver/LPWatcher.cpp

    rf698804 rb5a7675  
    77    "critical" status: 30-39 
    88    "mirror" status: 40-49 
     9    "resilvering" status: 50-59 
     10    "scrub" status: 60-69 
    911  Within each set: 
    1012    *0 = ID Code (for internal identification as necessary) 
    1113    *1 = dataset (example: tank1) 
    12     *2 = summary (shortened version of the message - good for titles) 
     14    *2 = summary (shortened version of the message - tooltips) 
    1315    *3 = message (full message) 
    1416    *4 = timestamp (full date/time timestamp in readable format) 
     
    3436    connect(watcher, SIGNAL(fileChanged(QString)),this,SLOT(fileChanged(QString)) ); 
    3537  timer = new QTimer(); 
    36     timer->setInterval( 600000 ); //10 minute check time 
    37     connect(timer, SIGNAL(timeout()), this, SLOT(checkErrorFile()) ); 
     38    timer->setInterval( 300000 ); //5 minute check time 
     39    connect(timer, SIGNAL(timeout()), this, SLOT(checkPoolStatus()) ); 
    3840  //initialize the log file reader 
    3941  logfile = new QFile(FILE_LOG, this); 
     
    6769  watcher->addPath(FILE_LOG); 
    6870  //Now check for any current errors in the LPbackend 
    69   checkErrorFile(); 
     71  checkPoolStatus(); 
    7072  //And start up the error file watcher 
    7173  timer->start(); 
     
    8890  else if(type=="critical"){base=30;} 
    8991  else if(type=="mirror"){base=40;} 
     92  else if(type=="resilvering"){base=50;} 
     93  else if(type=="scrub"){base=60;} 
    9094  else{ return output; } //invalid input type 
    9195  //Now fill the output array based upon requested outputs 
     
    111115  if(LOGS.contains(32) && LOGS.contains(34)){ output << LOGS[34]+" -- "+LOGS[32]; } 
    112116  if(LOGS.contains(42) && LOGS.contains(44)){ output << LOGS[44]+" -- "+LOGS[42]; } 
     117  if(LOGS.contains(52) && LOGS.contains(54)){ output << LOGS[54]+" -- "+LOGS[52]; } 
     118  if(LOGS.contains(62) && LOGS.contains(64)){ output << LOGS[64]+" -- "+LOGS[62]; } 
    113119  return output; 
    114120} 
     
    117123  if(LOGS.value(20) == "STARTED" || LOGS.value(20) == "RUNNING"){ return true; } 
    118124  else if(LOGS.value(40) == "STARTED" || LOGS.value(40) == "RUNNING"){ return true; } 
     125  else if(LOGS.value(50) == "STARTED" || LOGS.value(50) == "RUNNING"){ return true; } 
     126  else if(LOGS.value(60) == "STARTED" || LOGS.value(60) == "RUNNING"){ return true; } 
    119127  else{ return false; } 
    120128} 
    121129 
    122130bool LPWatcher::hasError(){ 
    123   return (LOGS.value(20)=="ERROR" || LOGS.contains(30) || LOGS.value(40)=="ERROR"); 
     131  return (LOGS.value(20)=="ERROR" || LOGS.contains(30) || LOGS.value(40)=="ERROR" || LOGS.value(50)=="ERROR" || LOGS.value(60)=="ERROR"); 
    124132} 
    125133 
     
    137145    QString dev = log.section(":",4,4).simplified(); //dataset/snapshot/nothing 
    138146    //Now decide what to do/show because of the log message 
    139     qDebug() << "New Log Message:" << log; 
     147    //qDebug() << "New Log Message:" << log; 
    140148    if(message.contains("creating snapshot")){ 
    141149      dev = message.section(" ",-1).simplified(); 
     
    143151      LOGS.insert(10,"SNAPCREATED"); 
    144152      LOGS.insert(11,dev); //dataset 
    145       LOGS.insert(12, tr("New Snapshot") ); //summary 
     153      LOGS.insert(12, QString(tr("New snapshot of %1")).arg(dev) ); //summary 
    146154      LOGS.insert(13, QString(tr("Creating snapshot for %1")).arg(dev) ); 
    147155      LOGS.insert(14, timestamp); //full timestamp 
     
    275283} 
    276284 
    277 void LPWatcher::checkErrorFile(){ 
    278   return; 
    279   if(QFile::exists(FILE_ERROR)){ 
    280     //Read the file to determine the cause of the error 
    281     QString msg, id, summary, timestamp, time, dataset; 
    282     QFile file(FILE_ERROR); 
    283       file.open(QIODevice::ReadOnly | QIODevice::Text); 
    284       QTextStream in(&file); 
    285       qDebug() << "Error File Parsing not implemented yet. \n - File Contents:"; 
    286       while(!in.atEnd()){ 
    287         QString line = in.readLine(); 
    288         //Now look for key information on this line 
    289         qDebug() << line; 
     285void LPWatcher::checkPoolStatus(){ 
     286  //Now check zpool status for bad/running statuses 
     287  QStringList zstat = LPBackend::getCmdOutput("zpool status"); 
     288    //parse the output 
     289    QString pool, state, timestamp; 
     290    qDebug() << "-----zpool status------"; 
     291    bool newresilver = false; 
     292    for(int i=0; i<zstat.length(); i++){ 
     293      zstat[i] = zstat[i].simplified(); 
     294      if(zstat[i].isEmpty()){ continue; } 
     295      qDebug() << zstat[i]; 
     296      if(zstat[i].startsWith("pool:")){ pool = zstat[i].section(":",1,10).simplified(); } 
     297      else if(zstat[i].startsWith("state:")){ state = zstat[i].section(":",1,10).simplified(); } 
     298      else if(zstat[i].startsWith("scan:")){ 
     299        //check for scrubs/resilvering progress 
     300        bool isnew = false; 
     301        // ------ SCRUB ------ 
     302        if(zstat[i].contains("scrub")){ 
     303          //Setup the latest/running scrub info 
     304          if(zstat[i].contains(" scrub repaired ")){ 
     305            zstat[i]  = zstat[i].replace("\t"," ").simplified(); 
     306            timestamp = zstat[i].section(" ",10,14,QString::SectionSkipEmpty); 
     307            QString numFixed = zstat[i].section(" ",3,3,QString::SectionSkipEmpty); 
     308            QString numErr = zstat[i].section(" ",7,7,QString::SectionSkipEmpty); 
     309            QString timeRun = zstat[i].section(" ",5,5,QString::SectionSkipEmpty); 
     310            //Scrub finished previously 
     311            if(numFixed.toInt() > 0){  
     312              if(LOGS.value(60)!="ERROR"){ isnew=true; } 
     313              LOGS.insert(60, "ERROR");  
     314              LOGS.insert(62, QString(tr("Scrub repaired %1 bad blocks")).arg(numFixed) ); 
     315              LOGS.insert(63, QString(tr("Scrub repaired %1 blocks in %2 with %3 errors")).arg(numFixed, timeRun, numErr) ); 
     316            }else{  
     317              if(LOGS.value(60)!= " " && LOGS.value(60)!="FINISHED"){ isnew=true; } 
     318              LOGS.insert(60,"FINISHED");  
     319              LOGS.insert(62, tr("Scrub completed") ); 
     320              LOGS.insert(63, tr("Scrub completed without needing repairs") ); 
     321            } 
     322            LOGS.insert(61,pool); 
     323            LOGS.insert(64, timestamp); 
     324            LOGS.insert(65, timestamp.section(" ",3,3) ); 
     325          }else{ 
     326            //Scrub is running - parse the line 
     327            timestamp = "??"; 
     328            QString percent = "??"; 
     329            QString remain = "??"; 
     330            if(LOGS.value(60) != "RUNNING"){isnew=true;} 
     331            LOGS.insert(60,"RUNNING"); 
     332            LOGS.insert(61,pool); 
     333            LOGS.insert(62, QString(tr("Resilvering: %1")).arg(percent) ); 
     334            LOGS.insert(63, QString(tr("Resilvering: %1 (%2 remaining)")).arg(percent, remain) ); 
     335            LOGS.insert(64, timestamp); 
     336            LOGS.insert(65, timestamp.section(" ",3,3) ); 
     337            qDebug() << "***Running Scrub: line needs parsing"; 
     338          } 
     339          if(isnew){ emit MessageAvailable("scrub"); } 
     340          if(LOGS.value(50) == "RUNNING"){ 
     341            //Resilvering is done - remove the info and send a ping 
     342            LOGS.insert(50,"FINISHED"); 
     343            LOGS.insert(51,pool); 
     344            LOGS.insert(52, tr("Resilvering complete")); 
     345            LOGS.insert(53, tr("Resilvering completed successfully")); 
     346            LOGS.insert(54, timestamp); 
     347            LOGS.insert(55, timestamp.section(" ",3,3) ); 
     348            emit MessageAvailable("resilvering"); 
     349          } 
     350        // --------- RESILVERING ------- 
     351        }else if(zstat[i].contains("resilver")){ 
     352          //Setup the running re-silvering progress 
     353          if(LOGS.value(50)!= " " && LOGS.value(50)!="RUNNING"){newresilver=true; } 
     354          LOGS.insert(50, "RUNNING"); 
     355          // 51 - need to put the actual device in here (not available on this line) 
     356          LOGS.insert(52, tr("Resilvering in progress")); 
     357          if(newresilver){ LOGS.insert(53, tr("Resilvering started") ); } 
     358          else{ LOGS.insert(53, tr("Resilvering in progress")); } 
     359          LOGS.insert(54, timestamp); 
     360          LOGS.insert(55, timestamp.section(" ",3,3) ); 
     361          if(isnew){ emit MessageAvailable("resilvering"); } 
     362        } 
     363      }else if(zstat[i].startsWith("errors:")){ 
     364        if(zstat[i] != "errors: No known data errors"){ 
     365          qDebug() << "New zpool status error line that needs parsing:" << zstat[i]; 
     366        } 
     367      }else if( state != "ONLINE" ){ 
     368        //Check for state/resilvering of all real devices 
     369        if(zstat[i].contains("NAME\tSTATE\tREAD")){continue;} //nothing on this header line 
     370        else if(zstat[i].contains("(resilvering)")){ LOGS.insert(51, zstat[i].section("\t",0,0,QString::SectionSkipEmpty) ); } 
     371        else if(zstat[i].contains("ONLINE")){continue;} //do nothing for this device - it is good 
     372        else if(zstat[i].contains("OFFLINE")){ } 
     373        else if(zstat[i].contains("DEGRADED")){ } 
     374        else if(zstat[i].contains("FAULTED")){ } 
     375        else if(zstat[i].contains("REMOVED")){ } 
     376        else if(zstat[i].contains("UNAVAIL")){ } 
    290377      } 
    291     //Now set the status and emit the signal 
    292     LOGS.insert(30, id); 
    293     LOGS.insert(31, dataset); //dataset 
    294     LOGS.insert(32, summary ); //summary 
    295     LOGS.insert(33, msg ); //message 
    296     LOGS.insert(34, timestamp); //full timestamp 
    297     LOGS.insert(35, time); // time only     
    298     emit MessageAvailable("critical"); 
    299   } 
    300 } 
     378    } //end of loop over zpool status lines 
     379  if(newresilver){ emit MessageAvailable("resilvering"); } 
     380} 
  • src-qt4/life-preserver/LPWatcher.h

    rf698804 rb5a7675  
    1111#include <QTextStream> 
    1212#include <QDebug> 
     13 
     14#include "LPBackend.h" 
    1315 
    1416class LPWatcher : public QObject{ 
     
    5052private slots: 
    5153        void fileChanged(QString); //file system watcher saw a change 
    52         void checkErrorFile(); //check for serious system error file 
     54        void checkPoolStatus(); //check for serious system errors 
    5355 
    5456signals: 
Note: See TracChangeset for help on using the changeset viewer.