source: src-qt4/life-preserver/LPWatcher.cpp @ b5a7675c

9.2-releasereleng/10.0releng/10.0.1releng/10.0.2releng/10.0.3releng/10.1
Last change on this file since b5a7675c was b5a7675c, checked in by Ken Moore <ken@…>, 16 months ago

Add the ability to detect the results of a scrub on the system, as well as set up the system for detecting pool errors, and any resilvering. Just need to finish adding the parsing for the active tasks (as well as the disk errors).

  • Property mode set to 100644
File size: 15.2 KB
Line 
1#include "LPWatcher.h"
2
3/* ------ HASH NUMBERING NOTE -----
4  Each set of 10 is a different type of status
5    "message" status: 10-19
6    "replication" status: 20-29
7    "critical" status: 30-39
8    "mirror" status: 40-49
9    "resilvering" status: 50-59
10    "scrub" status: 60-69
11  Within each set:
12    *0 = ID Code (for internal identification as necessary)
13    *1 = dataset (example: tank1)
14    *2 = summary (shortened version of the message - tooltips)
15    *3 = message (full message)
16    *4 = timestamp (full date/time timestamp in readable format)
17    *5 = short timestamp (just time in readable format)
18
19  Valid Internal ID's:
20    SNAPCREATED -> new snapshot created
21    STARTED     -> Task started
22    RUNNING     -> Task running (I.E. status update)
23    FINISHED    -> Task finished
24    ERROR               -> Task failed
25   
26*/
27
28LPWatcher::LPWatcher() : QObject(){
29  //Initialize the path variables
30  FILE_LOG = "/var/log/lpreserver/lpreserver.log";
31  FILE_ERROR="/var/log/lpreserver/error.log";
32  FILE_REPLICATION=""; //this is set automatically based on the log file outputs
33
34  //initialize the watcher and timer
35  watcher = new QFileSystemWatcher(this);
36    connect(watcher, SIGNAL(fileChanged(QString)),this,SLOT(fileChanged(QString)) );
37  timer = new QTimer();
38    timer->setInterval( 300000 ); //5 minute check time
39    connect(timer, SIGNAL(timeout()), this, SLOT(checkPoolStatus()) );
40  //initialize the log file reader
41  logfile = new QFile(FILE_LOG, this);
42  LFSTREAM = new QTextStream(logfile);
43  //initialize the replication file reader
44  repfile = new QFile(this);
45}
46
47LPWatcher::~LPWatcher(){
48  //clean up the internal objects
49  delete watcher;
50  delete timer;
51  delete logfile;
52  delete LFSTREAM;
53}
54
55// -----------------------------------
56//    PUBLIC FUNCTIONS
57// -----------------------------------
58void LPWatcher::start(){
59  if(!logfile->exists()){
60    QString dir = FILE_LOG;
61          dir.chop( dir.section("/",-1).count()+1 );
62    if(!QFile::exists(dir)){ system( QString("mkdir -p "+dir).toUtf8() ); }
63    system( QString("touch "+FILE_LOG).toUtf8() );
64  }
65  //Read the current state of the log file
66  logfile->open(QIODevice::ReadOnly | QIODevice::Text);
67  readLogFile(true); //do this quietly the first time through
68  //Now start up the log file watcher
69  watcher->addPath(FILE_LOG);
70  //Now check for any current errors in the LPbackend
71  checkPoolStatus();
72  //And start up the error file watcher
73  timer->start();
74}
75
76void LPWatcher::stop(){
77  watcher->removePaths(watcher->files());
78  logfile->close();
79  timer->stop();
80}
81
82QStringList LPWatcher::getMessages(QString type, QStringList msgList){
83  QStringList output;
84  type = type.toLower();
85  //Valid types - "critical"/"running"/"message"
86  //Valid messages - "dataset","message","summary","id", "timestamp", "time"
87  unsigned int base;
88  if(type=="message"){base=10;}
89  else if(type=="replicate"){base=20;}
90  else if(type=="critical"){base=30;}
91  else if(type=="mirror"){base=40;}
92  else if(type=="resilvering"){base=50;}
93  else if(type=="scrub"){base=60;}
94  else{ return output; } //invalid input type
95  //Now fill the output array based upon requested outputs
96  for(int i=0; i<msgList.length(); i++){
97    msgList[i] = msgList[i].toLower();
98    if(msgList[i]=="id" && LOGS.contains(base)){ output << LOGS[base]; }
99    else if(msgList[i]=="dataset" && LOGS.contains(base+1)){ output << LOGS[base+1]; }
100    else if(msgList[i]=="summary" && LOGS.contains(base+2)){ output << LOGS[base+2]; }
101    else if(msgList[i]=="message" && LOGS.contains(base+3)){ output << LOGS[base+3]; }
102    else if(msgList[i]=="timestamp" && LOGS.contains(base+4)){ output << LOGS[base+4]; }
103    else if(msgList[i]=="time" && LOGS.contains(base+5)){ output << LOGS[base+5]; }
104    else{ output << ""; }
105  }
106  //Return the output list
107  return output;
108}
109
110QStringList LPWatcher::getAllCurrentMessages(){
111  //Useful for quickly displaying all latest messages in a tooltip or summary
112  QStringList output;
113  if(LOGS.contains(12) && LOGS.contains(14)){ output << LOGS[14]+" -- "+LOGS[12]; }
114  if(LOGS.contains(22) && LOGS.contains(24)){ output << LOGS[24]+" -- "+LOGS[22]; }
115  if(LOGS.contains(32) && LOGS.contains(34)){ output << LOGS[34]+" -- "+LOGS[32]; }
116  if(LOGS.contains(42) && LOGS.contains(44)){ output << LOGS[44]+" -- "+LOGS[42]; }
117  if(LOGS.contains(52) && LOGS.contains(54)){ output << LOGS[54]+" -- "+LOGS[52]; }
118  if(LOGS.contains(62) && LOGS.contains(64)){ output << LOGS[64]+" -- "+LOGS[62]; }
119  return output;
120}
121
122bool LPWatcher::isRunning(){
123  if(LOGS.value(20) == "STARTED" || LOGS.value(20) == "RUNNING"){ return true; }
124  else if(LOGS.value(40) == "STARTED" || LOGS.value(40) == "RUNNING"){ return true; }
125  else if(LOGS.value(50) == "STARTED" || LOGS.value(50) == "RUNNING"){ return true; }
126  else if(LOGS.value(60) == "STARTED" || LOGS.value(60) == "RUNNING"){ return true; }
127  else{ return false; }
128}
129
130bool LPWatcher::hasError(){
131  return (LOGS.value(20)=="ERROR" || LOGS.contains(30) || LOGS.value(40)=="ERROR" || LOGS.value(50)=="ERROR" || LOGS.value(60)=="ERROR");
132}
133
134// -------------------------------------
135//    PRIVATE FUNCTIONS
136// -------------------------------------
137void LPWatcher::readLogFile(bool quiet){
138  QTextStream in(logfile);
139  while(!LFSTREAM->atEnd()){
140    QString log = LFSTREAM->readLine();
141    //Divide up the log into it's sections
142    QString timestamp = log.section(":",0,2).simplified();
143    QString time = timestamp.section(" ",3,3).simplified();
144    QString message = log.section(":",3,3).toLower().simplified();
145    QString dev = log.section(":",4,4).simplified(); //dataset/snapshot/nothing
146    //Now decide what to do/show because of the log message
147    //qDebug() << "New Log Message:" << log;
148    if(message.contains("creating snapshot")){
149      dev = message.section(" ",-1).simplified();
150      //Setup the status of the message
151      LOGS.insert(10,"SNAPCREATED");
152      LOGS.insert(11,dev); //dataset
153      LOGS.insert(12, QString(tr("New snapshot of %1")).arg(dev) ); //summary
154      LOGS.insert(13, QString(tr("Creating snapshot for %1")).arg(dev) );
155      LOGS.insert(14, timestamp); //full timestamp
156      LOGS.insert(15, time); // time only
157      if(!quiet){ emit MessageAvailable("message"); }
158    }else if(message.contains("starting replication")){
159      //Setup the file watcher for this new log file
160      FILE_REPLICATION = dev;
161      startRepFileWatcher();
162      //Set the appropriate status variables
163      LOGS.insert(20,"REPSTARTED");
164      // 21 - dataset set on update ping
165      LOGS.insert(22, tr("Replication Started") ); //summary
166      // 23 - Full message set on update ping
167      LOGS.insert(24, timestamp); //full timestamp
168      LOGS.insert(25, time); // time only
169      if(!quiet){ emit MessageAvailable("replication"); }
170    }else if(message.contains("finished replication")){
171      stopRepFileWatcher();
172      dev = message.section(" ",-1).simplified();
173      //Now set the status of the process
174      LOGS.insert(20,"REPFINISHED");
175      LOGS.insert(21,dev); //dataset
176      LOGS.insert(22, tr("Finished Replication") ); //summary
177      LOGS.insert(23, QString(tr("Finished replication for %1")).arg(dev) );
178      LOGS.insert(24, timestamp); //full timestamp
179      LOGS.insert(25, time); // time only     
180      if(!quiet){ emit MessageAvailable("replication"); }
181    }else if( message.contains("FAILED replication") ){
182      stopRepFileWatcher();
183      //Now set the status of the process
184      dev = message.section(" ",-1).simplified();
185      QString file = log.section("LOGFILE:",1,1).simplified();
186      QString tt = QString(tr("Replication Failed for %1")).arg(dev) +"\n"+ QString(tr("Logfile available at: %1")).arg(file);
187      LOGS.insert(20,"REPERROR");
188      LOGS.insert(21,dev); //dataset
189      LOGS.insert(22, tr("Replication Failed") ); //summary
190      LOGS.insert(23, tt );
191      LOGS.insert(24, timestamp); //full timestamp
192      LOGS.insert(25, time); // time only     
193      if(!quiet){ emit MessageAvailable("replication"); }
194    }
195         
196  }
197}
198
199void LPWatcher::readReplicationFile(bool quiet){
200  QString stat;
201  while( !RFSTREAM->atEnd() ){ 
202    QString line = RFSTREAM->readLine(); 
203    if(line.contains("total estimated size")){ repTotK = line.section(" ",-1).simplified(); } //save the total size to replicate
204    else if(line.startsWith("send from ")){}
205    else if(line.startsWith("TIME ")){}
206    else{ stat = line; } //only save the relevant/latest status line
207  }
208  if(stat.isEmpty()){
209    qDebug() << "New Status Message:" << stat;
210    //Divide up the status message into sections
211    stat.replace("\t"," ");
212    QString dataset = stat.section(" ",2,2,QString::SectionSkipEmpty).section("/",0,0).simplified();
213    QString cSize = stat.section(" ",1,1,QString::SectionSkipEmpty);
214    //Now Setup the tooltip
215    if(cSize != lastSize){ //don't update the info if the same size info
216      QString percent;
217      if(!repTotK.isEmpty()){
218        //calculate the percentage
219        double tot = displayToDoubleK(repTotK);
220        double c = displayToDoubleK(cSize);
221        if( tot!=-1 & c!=-1){
222          double p = (c*100)/tot;
223          p = int(p*10)/10.0; //round to 1 decimel places
224          percent = QString::number(p) + "%";
225        }
226      }
227      if(repTotK.isEmpty()){ repTotK = "??"; }
228      //Format the info string
229      QString status = cSize+"/"+repTotK;
230      if(!percent.isEmpty()){ status.append(" ("+percent+")"); }
231      QString txt = QString(tr("Replicating %1: %2")).arg(dataset, status);
232      lastSize = cSize; //save the current size for later
233      //Now set the current process status
234      LOGS.insert(20,"RUNNING");
235      LOGS.insert(21,dataset);
236      LOGS.insert(23,txt);
237      if(!quiet){ emit MessageAvailable("replication"); }
238    }
239  }
240}
241
242void LPWatcher::startRepFileWatcher(){
243  if(FILE_REPLICATION.isEmpty()){ return; }
244  if(!QFile::exists(FILE_REPLICATION)){ system( QString("touch "+FILE_REPLICATION).toUtf8() ); }
245  repfile->setFileName(FILE_REPLICATION);
246  repfile->open(QIODevice::ReadOnly | QIODevice::Text);
247  RFSTREAM = new QTextStream(repfile);
248  watcher->addPath(FILE_REPLICATION);
249}
250
251void LPWatcher::stopRepFileWatcher(){
252  if(FILE_REPLICATION.isEmpty()){ return; }
253  watcher->removePath(FILE_REPLICATION);
254  repfile->close();
255  delete RFSTREAM;
256  FILE_REPLICATION.clear();
257  repTotK.clear();
258  lastSize.clear();
259}
260
261double LPWatcher::displayToDoubleK(QString displayNumber){
262  QStringList labels; 
263    labels << "K" << "M" << "G" << "T" << "P" << "E";
264  QString clab = displayNumber.right(1); //last character is the size label
265        displayNumber.chop(1); //remove the label from the number
266  double num = displayNumber.toDouble();
267  //Now format the number properly
268  bool ok = false;
269  for(int i=0; i<labels.length(); i++){
270    if(labels[i] == clab){ ok = true; break; }
271    else{ num = num*1024; } //get ready for the next size
272  }
273  if(!ok){ num = -1; } //could not determine the size
274  return num;
275}
276
277// ------------------------------
278//    PRIVATE SLOTS
279// ------------------------------
280void LPWatcher::fileChanged(QString file){
281  if(file == FILE_LOG){ readLogFile(); }
282  else  if(file == FILE_REPLICATION){ readReplicationFile(); }
283}
284
285void LPWatcher::checkPoolStatus(){
286  //Now check zpool status for bad/running statuses
287  QStringList zstat = LPBackend::getCmdOutput("zpool status");
288    //parse the output
289    QString pool, state, timestamp;
290    qDebug() << "-----zpool status------";
291    bool newresilver = false;
292    for(int i=0; i<zstat.length(); i++){
293      zstat[i] = zstat[i].simplified();
294      if(zstat[i].isEmpty()){ continue; }
295      qDebug() << zstat[i];
296      if(zstat[i].startsWith("pool:")){ pool = zstat[i].section(":",1,10).simplified(); }
297      else if(zstat[i].startsWith("state:")){ state = zstat[i].section(":",1,10).simplified(); }
298      else if(zstat[i].startsWith("scan:")){
299        //check for scrubs/resilvering progress
300        bool isnew = false;
301        // ------ SCRUB ------
302        if(zstat[i].contains("scrub")){
303          //Setup the latest/running scrub info
304          if(zstat[i].contains(" scrub repaired ")){
305            zstat[i]  = zstat[i].replace("\t"," ").simplified();
306            timestamp = zstat[i].section(" ",10,14,QString::SectionSkipEmpty);
307            QString numFixed = zstat[i].section(" ",3,3,QString::SectionSkipEmpty);
308            QString numErr = zstat[i].section(" ",7,7,QString::SectionSkipEmpty);
309            QString timeRun = zstat[i].section(" ",5,5,QString::SectionSkipEmpty);
310            //Scrub finished previously
311            if(numFixed.toInt() > 0){ 
312              if(LOGS.value(60)!="ERROR"){ isnew=true; }
313              LOGS.insert(60, "ERROR"); 
314              LOGS.insert(62, QString(tr("Scrub repaired %1 bad blocks")).arg(numFixed) );
315              LOGS.insert(63, QString(tr("Scrub repaired %1 blocks in %2 with %3 errors")).arg(numFixed, timeRun, numErr) );
316            }else{ 
317              if(LOGS.value(60)!= " " && LOGS.value(60)!="FINISHED"){ isnew=true; }
318              LOGS.insert(60,"FINISHED"); 
319              LOGS.insert(62, tr("Scrub completed") );
320              LOGS.insert(63, tr("Scrub completed without needing repairs") );
321            }
322            LOGS.insert(61,pool);
323            LOGS.insert(64, timestamp);
324            LOGS.insert(65, timestamp.section(" ",3,3) );
325          }else{
326            //Scrub is running - parse the line
327            timestamp = "??";
328            QString percent = "??";
329            QString remain = "??";
330            if(LOGS.value(60) != "RUNNING"){isnew=true;}
331            LOGS.insert(60,"RUNNING");
332            LOGS.insert(61,pool);
333            LOGS.insert(62, QString(tr("Resilvering: %1")).arg(percent) );
334            LOGS.insert(63, QString(tr("Resilvering: %1 (%2 remaining)")).arg(percent, remain) );
335            LOGS.insert(64, timestamp);
336            LOGS.insert(65, timestamp.section(" ",3,3) );
337            qDebug() << "***Running Scrub: line needs parsing";
338          }
339          if(isnew){ emit MessageAvailable("scrub"); }
340          if(LOGS.value(50) == "RUNNING"){
341            //Resilvering is done - remove the info and send a ping
342            LOGS.insert(50,"FINISHED");
343            LOGS.insert(51,pool);
344            LOGS.insert(52, tr("Resilvering complete"));
345            LOGS.insert(53, tr("Resilvering completed successfully"));
346            LOGS.insert(54, timestamp);
347            LOGS.insert(55, timestamp.section(" ",3,3) );
348            emit MessageAvailable("resilvering");
349          }
350        // --------- RESILVERING -------
351        }else if(zstat[i].contains("resilver")){
352          //Setup the running re-silvering progress
353          if(LOGS.value(50)!= " " && LOGS.value(50)!="RUNNING"){newresilver=true; }
354          LOGS.insert(50, "RUNNING");
355          // 51 - need to put the actual device in here (not available on this line)
356          LOGS.insert(52, tr("Resilvering in progress"));
357          if(newresilver){ LOGS.insert(53, tr("Resilvering started") ); }
358          else{ LOGS.insert(53, tr("Resilvering in progress")); }
359          LOGS.insert(54, timestamp);
360          LOGS.insert(55, timestamp.section(" ",3,3) );
361          if(isnew){ emit MessageAvailable("resilvering"); }
362        }
363      }else if(zstat[i].startsWith("errors:")){
364        if(zstat[i] != "errors: No known data errors"){
365          qDebug() << "New zpool status error line that needs parsing:" << zstat[i];
366        }
367      }else if( state != "ONLINE" ){
368        //Check for state/resilvering of all real devices
369        if(zstat[i].contains("NAME\tSTATE\tREAD")){continue;} //nothing on this header line
370        else if(zstat[i].contains("(resilvering)")){ LOGS.insert(51, zstat[i].section("\t",0,0,QString::SectionSkipEmpty) ); }
371        else if(zstat[i].contains("ONLINE")){continue;} //do nothing for this device - it is good
372        else if(zstat[i].contains("OFFLINE")){ }
373        else if(zstat[i].contains("DEGRADED")){ }
374        else if(zstat[i].contains("FAULTED")){ }
375        else if(zstat[i].contains("REMOVED")){ }
376        else if(zstat[i].contains("UNAVAIL")){ }
377      }
378    } //end of loop over zpool status lines
379  if(newresilver){ emit MessageAvailable("resilvering"); }
380}
Note: See TracBrowser for help on using the repository browser.