source: src-qt4/life-preserver/LPWatcher.cpp @ 108626c

9.2-releasereleng/10.0releng/10.0.1releng/10.0.2releng/10.0.3
Last change on this file since 108626c was 108626c, checked in by Ken Moore <ken@…>, 11 months ago

Clean up the tray icons for life-preserver so they are easier to see, and fix the constant resilvering messages when a reilver is finished.

  • Property mode set to 100644
File size: 19.1 KB
Line 
1#include "LPWatcher.h"
2
3/* ------ HASH NUMBERING NOTE -----
4  Each set of 10 is a different type of status
5    "message" status: 10-19
6    "replication" status: 20-29
7    "critical" status: 30-39
8    "mirror" status: 40-49
9    "resilver" status: 50-59
10    "scrub" status: 60-69
11  Within each set:
12    *0 = ID Code (for internal identification as necessary)
13    *1 = zpool (example: tank1)
14    *2 = summary (shortened version of the message - tooltips)
15    *3 = message (full message)
16    *4 = timestamp (full date/time timestamp in readable format)
17    *5 = short timestamp (just time in readable format)
18
19  Valid Internal ID's:
20    SNAPCREATED -> new snapshot created
21    STARTED     -> Task started
22    RUNNING     -> Task running (I.E. status update)
23    FINISHED    -> Task finished
24    ERROR               -> Task failed
25   
26*/
27
28LPWatcher::LPWatcher() : QObject(){
29  //Initialize the path variables
30  FILE_LOG = "/var/log/lpreserver/lpreserver.log";
31  FILE_ERROR="/var/log/lpreserver/error.log";
32  FILE_REPLICATION=""; //this is set automatically based on the log file outputs
33  sysCheckTime = 300000; // 5 minutes
34  //initialize the watcher and timer
35  watcher = new QFileSystemWatcher(this);
36    connect(watcher, SIGNAL(fileChanged(QString)),this,SLOT(fileChanged(QString)) );
37  timer = new QTimer();
38    connect(timer, SIGNAL(timeout()), this, SLOT(checkPoolStatus()) );
39  //initialize the log file reader
40  logfile = new QFile(FILE_LOG, this);
41  LFSTREAM = new QTextStream(logfile);
42  //initialize the replication file reader
43  repfile = new QFile(this);
44}
45
46LPWatcher::~LPWatcher(){
47  //clean up the internal objects
48  delete watcher;
49  delete timer;
50  delete logfile;
51  delete LFSTREAM;
52}
53
54// -----------------------------------
55//    PUBLIC FUNCTIONS
56// -----------------------------------
57void LPWatcher::start(){
58  if(!logfile->exists()){
59    QString dir = FILE_LOG;
60          dir.chop( dir.section("/",-1).count()+1 );
61    if(!QFile::exists(dir)){ system( QString("mkdir -p "+dir).toUtf8() ); }
62    system( QString("touch "+FILE_LOG).toUtf8() );
63  }
64  //Read the current state of the log file
65  logfile->open(QIODevice::ReadOnly | QIODevice::Text);
66  readLogFile(true); //do this quietly the first time through
67  //Now start up the log file watcher
68  watcher->addPath(FILE_LOG);
69  //Now check for any current errors in the LPbackend
70  checkPoolStatus();
71  //And start up the error file watcher
72  if(!timer->isActive()){ timer->start(sysCheckTime); }
73}
74
75void LPWatcher::stop(){
76  watcher->removePaths(watcher->files());
77  logfile->close();
78  timer->stop();
79}
80
81QStringList LPWatcher::getMessages(QString type, QStringList msgList){
82  QStringList output;
83  type = type.toLower();
84  //Valid types - "critical"/"running"/"message"
85  //Valid messages - "dataset","message","summary","id", "timestamp", "time"
86  unsigned int base;
87  if(type=="message"){base=10;}
88  else if(type=="replicate"){base=20;}
89  else if(type=="critical"){base=30;}
90  else if(type=="mirror"){base=40;}
91  else if(type=="resilver"){base=50;}
92  else if(type=="scrub"){base=60;}
93  else{ return output; } //invalid input type
94  //Now fill the output array based upon requested outputs
95  for(int i=0; i<msgList.length(); i++){
96    msgList[i] = msgList[i].toLower();
97    if(msgList[i]=="id" && LOGS.contains(base)){ output << LOGS[base]; }
98    else if(msgList[i]=="device" && LOGS.contains(base+1)){ output << LOGS[base+1]; }
99    else if(msgList[i]=="summary" && LOGS.contains(base+2)){ output << LOGS[base+2]; }
100    else if(msgList[i]=="message" && LOGS.contains(base+3)){ output << LOGS[base+3]; }
101    else if(msgList[i]=="timestamp" && LOGS.contains(base+4)){ output << LOGS[base+4]; }
102    else if(msgList[i]=="time" && LOGS.contains(base+5)){ output << LOGS[base+5]; }
103    else{ output << ""; }
104  }
105  //Return the output list
106  return output;
107}
108
109QStringList LPWatcher::getAllCurrentMessages(){
110  //Useful for quickly displaying all latest messages in a tooltip or summary
111  QStringList output;
112  if(LOGS.contains(12) && LOGS.contains(14)){ output << LOGS[14]+" -- "+LOGS[12]; }
113  if(LOGS.contains(22) && LOGS.contains(24)){ output << LOGS[24]+" -- "+LOGS[22]; }
114  if(LOGS.contains(32) && LOGS.contains(34)){ output << LOGS[34]+" -- "+LOGS[32]; }
115  if(LOGS.contains(42) && LOGS.contains(44)){ output << LOGS[44]+" -- "+LOGS[42]; }
116  if(LOGS.contains(52) && LOGS.contains(54)){ output << LOGS[54]+" -- "+LOGS[52]; }
117  if(LOGS.contains(62) && LOGS.contains(64)){ output << LOGS[64]+" -- "+LOGS[62]; }
118  return output;
119}
120
121bool LPWatcher::isRunning(){
122  if(LOGS.value(20) == "STARTED" || LOGS.value(20) == "RUNNING"){ return true; }
123  else if(LOGS.value(40) == "STARTED" || LOGS.value(40) == "RUNNING"){ return true; }
124  else if(LOGS.value(50) == "STARTED" || LOGS.value(50) == "RUNNING"){ return true; }
125  else if(LOGS.value(60) == "STARTED" || LOGS.value(60) == "RUNNING"){ return true; }
126  else{ return false; }
127}
128
129bool LPWatcher::hasError(){
130  return (LOGS.value(20)=="ERROR" || LOGS.contains(30) || LOGS.value(40)=="ERROR" || LOGS.value(50)=="ERROR" || LOGS.value(60)=="ERROR");
131}
132
133// -------------------------------------
134//    PRIVATE FUNCTIONS
135// -------------------------------------
136void LPWatcher::readLogFile(bool quiet){
137  QTextStream in(logfile);
138  while(!LFSTREAM->atEnd()){
139    QString log = LFSTREAM->readLine();
140    //Divide up the log into it's sections
141    QString timestamp = log.section(":",0,2).simplified();
142    QString time = timestamp.section(" ",3,3).simplified();
143    QString message = log.section(":",3,3).toLower().simplified();
144    QString dev = log.section(":",4,4).simplified(); //dataset/snapshot/nothing
145    //Now decide what to do/show because of the log message
146    //qDebug() << "New Log Message:" << log;
147    if(message.contains("creating snapshot")){
148      dev = message.section(" ",-1).simplified();
149      //Setup the status of the message
150      LOGS.insert(10,"SNAPCREATED");
151      LOGS.insert(11,dev); //dataset
152      LOGS.insert(12, QString(tr("New snapshot of %1")).arg(dev) ); //summary
153      LOGS.insert(13, QString(tr("Creating snapshot for %1")).arg(dev) );
154      LOGS.insert(14, timestamp); //full timestamp
155      LOGS.insert(15, time); // time only
156      if(!quiet){ emit MessageAvailable("message"); }
157    }else if(message.contains("starting replication")){
158      //Setup the file watcher for this new log file
159      FILE_REPLICATION = dev;
160      startRepFileWatcher();
161      //Set the appropriate status variables
162      LOGS.insert(20,"REPSTARTED");
163      // 21 - dataset set on update ping
164      LOGS.insert(22, tr("Replication Started") ); //summary
165      // 23 - Full message set on update ping
166      LOGS.insert(24, timestamp); //full timestamp
167      LOGS.insert(25, time); // time only
168      if(!quiet){ emit MessageAvailable("replication"); }
169    }else if(message.contains("finished replication")){
170      stopRepFileWatcher();
171      dev = message.section(" ",-1).simplified();
172      //Now set the status of the process
173      LOGS.insert(20,"REPFINISHED");
174      LOGS.insert(21,dev); //dataset
175      LOGS.insert(22, tr("Finished Replication") ); //summary
176      LOGS.insert(23, QString(tr("Finished replication for %1")).arg(dev) );
177      LOGS.insert(24, timestamp); //full timestamp
178      LOGS.insert(25, time); // time only     
179      if(!quiet){ emit MessageAvailable("replication"); }
180    }else if( message.contains("FAILED replication") ){
181      stopRepFileWatcher();
182      //Now set the status of the process
183      dev = message.section(" ",-1).simplified();
184      QString file = log.section("LOGFILE:",1,1).simplified();
185      QString tt = QString(tr("Replication Failed for %1")).arg(dev) +"\n"+ QString(tr("Logfile available at: %1")).arg(file);
186      LOGS.insert(20,"REPERROR");
187      LOGS.insert(21,dev); //dataset
188      LOGS.insert(22, tr("Replication Failed") ); //summary
189      LOGS.insert(23, tt );
190      LOGS.insert(24, timestamp); //full timestamp
191      LOGS.insert(25, time); // time only     
192      if(!quiet){ emit MessageAvailable("replication"); }
193    }
194         
195  }
196}
197
198void LPWatcher::readReplicationFile(bool quiet){
199  QString stat;
200  while( !RFSTREAM->atEnd() ){ 
201    QString line = RFSTREAM->readLine(); 
202    if(line.contains("total estimated size")){ repTotK = line.section(" ",-1).simplified(); } //save the total size to replicate
203    else if(line.startsWith("send from ")){}
204    else if(line.startsWith("TIME ")){}
205    else{ stat = line; } //only save the relevant/latest status line
206  }
207  if(stat.isEmpty()){
208    qDebug() << "New Status Message:" << stat;
209    //Divide up the status message into sections
210    stat.replace("\t"," ");
211    QString dataset = stat.section(" ",2,2,QString::SectionSkipEmpty).section("/",0,0).simplified();
212    QString cSize = stat.section(" ",1,1,QString::SectionSkipEmpty);
213    //Now Setup the tooltip
214    if(cSize != lastSize){ //don't update the info if the same size info
215      QString percent;
216      if(!repTotK.isEmpty()){
217        //calculate the percentage
218        double tot = displayToDoubleK(repTotK);
219        double c = displayToDoubleK(cSize);
220        if( tot!=-1 & c!=-1){
221          double p = (c*100)/tot;
222          p = int(p*10)/10.0; //round to 1 decimel places
223          percent = QString::number(p) + "%";
224        }
225      }
226      if(repTotK.isEmpty()){ repTotK = "??"; }
227      //Format the info string
228      QString status = cSize+"/"+repTotK;
229      if(!percent.isEmpty()){ status.append(" ("+percent+")"); }
230      QString txt = QString(tr("Replicating %1: %2")).arg(dataset, status);
231      lastSize = cSize; //save the current size for later
232      //Now set the current process status
233      LOGS.insert(20,"RUNNING");
234      LOGS.insert(21,dataset);
235      LOGS.insert(23,txt);
236      if(!quiet){ emit MessageAvailable("replication"); }
237    }
238  }
239}
240
241void LPWatcher::startRepFileWatcher(){
242  if(FILE_REPLICATION.isEmpty()){ return; }
243  if(!QFile::exists(FILE_REPLICATION)){ system( QString("touch "+FILE_REPLICATION).toUtf8() ); }
244  repfile->setFileName(FILE_REPLICATION);
245  repfile->open(QIODevice::ReadOnly | QIODevice::Text);
246  RFSTREAM = new QTextStream(repfile);
247  watcher->addPath(FILE_REPLICATION);
248}
249
250void LPWatcher::stopRepFileWatcher(){
251  if(FILE_REPLICATION.isEmpty()){ return; }
252  watcher->removePath(FILE_REPLICATION);
253  repfile->close();
254  delete RFSTREAM;
255  FILE_REPLICATION.clear();
256  repTotK.clear();
257  lastSize.clear();
258}
259
260double LPWatcher::displayToDoubleK(QString displayNumber){
261  QStringList labels; 
262    labels << "K" << "M" << "G" << "T" << "P" << "E";
263  QString clab = displayNumber.right(1); //last character is the size label
264        displayNumber.chop(1); //remove the label from the number
265  double num = displayNumber.toDouble();
266  //Now format the number properly
267  bool ok = false;
268  for(int i=0; i<labels.length(); i++){
269    if(labels[i] == clab){ ok = true; break; }
270    else{ num = num*1024; } //get ready for the next size
271  }
272  if(!ok){ num = -1; } //could not determine the size
273  return num;
274}
275
276// ------------------------------
277//    PRIVATE SLOTS
278// ------------------------------
279void LPWatcher::fileChanged(QString file){
280  if(file == FILE_LOG){ readLogFile(); }
281  else  if(file == FILE_REPLICATION){ readReplicationFile(); }
282}
283
284void LPWatcher::checkPoolStatus(){
285  //Now check zpool status for bad/running statuses
286  QStringList zstat = LPBackend::getCmdOutput("zpool status");
287    //parse the output
288    QString pool, state, timestamp;
289    QStringList cDev, cStat, cMsg, cSummary;
290    //qDebug() << "-----zpool status------";
291    bool newresilver = false; bool newscrub = false; bool newerror = false;
292    for(int i=0; i<zstat.length(); i++){
293      zstat[i] = zstat[i].simplified();
294      if(zstat[i].isEmpty()){ continue; }
295      //qDebug() << zstat[i];
296      if(zstat[i].startsWith("pool:")){ pool = zstat[i].section(":",1,10).simplified(); }
297      else if(zstat[i].startsWith("state:")){ state = zstat[i].section(":",1,10).simplified(); }
298      else if(zstat[i].startsWith("scan:")){
299        //check for scrubs/resilvering progress
300        // ------ SCRUB ------
301        if(zstat[i].contains("scrub")){
302          if(zstat[i].contains(" scrub repaired ")){
303            //Scrub Finished
304            zstat[i]  = zstat[i].replace("\t"," ").simplified();
305            timestamp = zstat[i].section(" ",10,14,QString::SectionSkipEmpty);
306            QString numFixed = zstat[i].section(" ",3,3,QString::SectionSkipEmpty);
307            QString numErr = zstat[i].section(" ",7,7,QString::SectionSkipEmpty);
308            QString timeRun = zstat[i].section(" ",5,5,QString::SectionSkipEmpty);
309            //Scrub finished previously
310            if(numFixed.toInt() > 0){ 
311              if(LOGS.value(60)!="ERROR"){ newscrub=true; }
312              LOGS.insert(60, "ERROR"); 
313              LOGS.insert(62, QString(tr("Scrub repaired %1 bad blocks")).arg(numFixed) );
314              LOGS.insert(63, QString(tr("Scrub repaired %1 blocks in %2 with %3 errors")).arg(numFixed, timeRun, numErr) );
315            }else{ 
316              if(LOGS.contains(60) && LOGS.value(60)!="FINISHED"){ newscrub=true; }
317              LOGS.insert(60,"FINISHED"); 
318              LOGS.insert(62, tr("Scrub completed") );
319              LOGS.insert(63, tr("Scrub completed without needing repairs") );
320            }
321            LOGS.insert(61,pool);
322            LOGS.insert(64, timestamp);
323            LOGS.insert(65, timestamp.section(" ",3,3) );
324            if(timer->interval() != sysCheckTime){ timer->start(sysCheckTime); }
325          }else{
326            //Scrub is running - parse the line
327            timestamp = zstat[i].section(" ",5,9,QString::SectionSkipEmpty);
328            i++; QString remain = zstat[i].section(" ",7,7,QString::SectionSkipEmpty);
329            i++; QString percent = zstat[i].section(" ",2,2,QString::SectionSkipEmpty);
330            if(LOGS.value(60) != "RUNNING"){newscrub=true;}
331            LOGS.insert(60,"RUNNING");
332            LOGS.insert(61,pool);
333            LOGS.insert(62, QString(tr("Scrubbing %1: %2 (%3 remaining)")).arg(pool, percent, remain) );
334            LOGS.insert(63, QString(tr("Scrubbing %1: %2 (%3 remaining)")).arg(pool, percent, remain) );
335            LOGS.insert(64, timestamp);
336            LOGS.insert(65, timestamp.section(" ",3,3) );
337            if(timer->interval() != 60000){ timer->start(60000); } //put the timer on a 1 minute refresh since it is running
338          }
339          if(LOGS.contains(50) ){
340            //Only resilvering OR scrub is shown at a time - so remove the resilver info
341            LOGS.remove(50);
342            LOGS.remove(51);
343            LOGS.remove(52);
344            LOGS.remove(53);
345            LOGS.remove(54);
346            LOGS.remove(55);
347          }
348        // --------- RESILVERING -------
349        }else if(zstat[i].contains("resilver in progress")){
350          //Resilvering is currently running
351          timestamp = zstat[i].section(" ",5,9,QString::SectionSkipEmpty);
352          //need info from the next two lines
353          i++; QString timeleft = zstat[i].section(" ",7,7,QString::SectionSkipEmpty);
354          i++; QString percent = zstat[i].section(" ", 2,2,QString::SectionSkipEmpty);
355          //Setup the running re-silvering progress
356          if(LOGS.value(50)!="RUNNING"){newresilver=true; }
357          LOGS.insert(50, "RUNNING");
358          // 51 - need to put the actual device in here (not available on this line)
359          LOGS.insert(52, QString(tr("Resilvering: %1 (%2 remaining)")).arg(percent, timeleft) );
360          if(newresilver){ LOGS.insert(53, QString(tr("Resilvering Started: %1 remaining ")).arg( timeleft) ); }
361          else{ LOGS.insert(53,QString(tr("Resilvering: %1 (%2 remaining)")).arg(percent, timeleft) ); }
362          LOGS.insert(54, timestamp);
363          LOGS.insert(55, timestamp.section(" ",3,3) );
364          if(LOGS.contains(60) ){
365            //Only resilvering OR scrub is shown at a time - so remove the scrub info
366            LOGS.remove(60);
367            LOGS.remove(61);
368            LOGS.remove(62);
369            LOGS.remove(63);
370            LOGS.remove(64);
371            LOGS.remove(65);
372          }
373          if(timer->interval() != 60000){ timer->start(60000); }//put the timer on a 1 minute refresh since it is running
374        }else if(zstat[i].contains("resilvered")){
375          //Resilvering is finished
376          timestamp = zstat[i].section(" ",9,13,QString::SectionSkipEmpty);
377          QString timecomplete = zstat[i].section(" ",4,4,QString::SectionSkipEmpty);
378          QString errors = zstat[i].section(" ", 6,6,QString::SectionSkipEmpty);
379          //Setup the running re-silvering progress
380          if(LOGS.value(50) != "FINISHED" && LOGS.value(50) != "ERROR" ){newresilver=true; } //don't display message for first run
381          if(errors.toInt() > 0){ 
382            LOGS.insert(50, "ERROR");
383            LOGS.insert(52, QString(tr("Resilver completed in &1 with %2 errors")).arg(timecomplete, errors) );
384            LOGS.insert(53, QString(tr("Resilver completed in &1 with %2 errors")).arg(timecomplete, errors) );
385          }else{
386            LOGS.insert(50, "FINISHED");
387            LOGS.insert(52, QString(tr("Resilver completed successfully in &1")).arg(timecomplete) );
388            LOGS.insert(53, QString(tr("Resilver completed successfully in &1")).arg(timecomplete) ); 
389          }
390          // 51 - need to put the actual device in here (not available on this line)
391          LOGS.insert(54, timestamp);
392          LOGS.insert(55, timestamp.section(" ",3,3) );
393          if(LOGS.contains(60) ){
394            //Only resilvering OR scrub is shown at a time - so remove the scrub info
395            LOGS.remove(60);
396            LOGS.remove(61);
397            LOGS.remove(62);
398            LOGS.remove(63);
399            LOGS.remove(64);
400            LOGS.remove(65);
401          }
402          if(timer->interval() != sysCheckTime){ timer->start(sysCheckTime); }
403        }
404      }else if(zstat[i].startsWith("errors:")){
405        if(zstat[i] != "errors: No known data errors"){
406          qDebug() << "New zpool status error line that needs parsing:" << zstat[i];
407        }
408      }else if( state != "ONLINE" || !LOGS.value(50).isEmpty() ){
409        //Check for state/resilvering of all real devices
410        QString msg, summary, status;
411        QString device = zstat[i].section(" ",0,0,QString::SectionSkipEmpty);
412        if(zstat[i].contains("NAME STATE READ")){continue;} //nothing on this header line
413        else if(zstat[i].contains("(resilvering)")){ LOGS.insert(51, device ); continue;}
414        else if(zstat[i].contains("ONLINE")){continue;} //do nothing for this device - it is good
415        else if(zstat[i].contains("OFFLINE")){ continue; } //do nothing - this status must be set manually - it is not a "random" status
416        else if(zstat[i].contains("DEGRADED")){ 
417          cStat << "DEGRADED";
418          cMsg << tr("The device is in a degraded state, and should be replaced soon.");
419          cSummary << QString(tr("%1 is degraded.")).arg(device);
420          cDev << device;
421        }else if(zstat[i].contains("FAULTED")){ 
422          cStat << "FAULTED";
423          cMsg << tr("The device is faulty, and should be replaced.");
424          cSummary << QString(tr("%1 is faulty.")).arg(device);
425          cDev << device;
426        }else if(zstat[i].contains("REMOVED")){ 
427          cStat << "REMOVED";
428          cMsg << tr("The device was removed, and should be either be re-attached or replaced.");
429          cSummary << QString(tr("%1 was removed.")).arg(device);
430          cDev << device;
431        }else if(zstat[i].contains("UNAVAIL")){ 
432          cStat << "UNAVAILABLE";
433          cMsg << tr("The device is unavailable and should be re-added to the pool.");
434          cSummary << QString(tr("%1 is unavailable.")).arg(device);
435          cDev << device;
436        }
437      }
438    } //end of loop over zpool status lines
439   
440  //Add the critical messages to the hash
441  if(cStat.isEmpty()){
442    if(LOGS.contains(30)){
443      LOGS.remove(30);
444      LOGS.remove(31);
445      LOGS.remove(32);
446      LOGS.remove(33);
447      LOGS.remove(34);
448      LOGS.remove(35);
449    }
450  }else{
451    if(LOGS.value(30) != cStat.join(":::") ){ newerror = true; }
452    LOGS.insert(30, cStat.join(":::") );
453    LOGS.insert(31, cDev.join(":::") );
454    LOGS.insert(32, cSummary.join(":::") );
455    LOGS.insert(33, cMsg.join(":::") );
456    LOGS.insert(34, timestamp);
457    LOGS.insert(35, timestamp.section(" ",3,3) );
458  }
459   
460  //Now emit the appropriate signal
461  if(newerror){ emit MessageAvailable("critical"); }
462  else if(newresilver){ emit MessageAvailable("resilver"); }
463  else if(newscrub){ emit MessageAvailable("scrub"); }
464  else{ emit StatusUpdated(); }
465}
Note: See TracBrowser for help on using the repository browser.