source: src-qt4/life-preserver/LPWatcher.cpp @ 8c6a630

9.2-releasereleng/10.0releng/10.0.1releng/10.0.2releng/10.0.3
Last change on this file since 8c6a630 was 8c6a630, checked in by Ken Moore <ken@…>, 11 months ago

Completely finish cleaning up the replication functionality in the tray watcher it works great now.

  • Property mode set to 100644
File size: 20.2 KB
Line 
1#include "LPWatcher.h"
2
3/* ------ HASH NUMBERING NOTE -----
4  Each set of 10 is a different type of status
5    "message" status: 10-19
6    "replication" status: 20-29
7    "critical" status: 30-39
8    "mirror" status: 40-49
9    "resilver" status: 50-59
10    "scrub" status: 60-69
11  Within each set:
12    *0 = ID Code (for internal identification as necessary)
13    *1 = zpool (example: tank1)
14    *2 = summary (shortened version of the message - tooltips)
15    *3 = message (full message)
16    *4 = timestamp (full date/time timestamp in readable format)
17    *5 = short timestamp (just time in readable format)
18
19  Valid Internal ID's:
20    SNAPCREATED -> new snapshot created
21    STARTED     -> Task started
22    RUNNING     -> Task running (I.E. status update)
23    FINISHED    -> Task finished
24    ERROR               -> Task failed
25   
26*/
27
28LPWatcher::LPWatcher() : QObject(){
29  //Initialize the path variables
30  FILE_LOG = "/var/log/lpreserver/lpreserver.log";
31  FILE_ERROR="/var/log/lpreserver/error.log";
32  FILE_REPLICATION=""; //this is set automatically based on the log file outputs
33  sysCheckTime = 300000; // 5 minutes
34  //initialize the watcher and timer
35  watcher = new QFileSystemWatcher(this);
36    connect(watcher, SIGNAL(fileChanged(QString)),this,SLOT(fileChanged(QString)) );
37  timer = new QTimer();
38    connect(timer, SIGNAL(timeout()), this, SLOT(checkPoolStatus()) );
39  //initialize the log file reader
40  logfile = new QFile(FILE_LOG, this);
41  LFSTREAM = new QTextStream(logfile);
42  //initialize the replication file reader
43  repfile = new QFile(this);
44}
45
46LPWatcher::~LPWatcher(){
47  //clean up the internal objects
48  delete watcher;
49  delete timer;
50  delete logfile;
51  delete LFSTREAM;
52}
53
54// -----------------------------------
55//    PUBLIC FUNCTIONS
56// -----------------------------------
57void LPWatcher::start(){
58  if(!logfile->exists()){
59    QString dir = FILE_LOG;
60          dir.chop( dir.section("/",-1).count()+1 );
61    if(!QFile::exists(dir)){ system( QString("mkdir -p "+dir).toUtf8() ); }
62    system( QString("touch "+FILE_LOG).toUtf8() );
63  }
64  //Read the current state of the log file
65  logfile->open(QIODevice::ReadOnly | QIODevice::Text);
66  readLogFile(true); //do this quietly the first time through
67  //Now start up the log file watcher
68  watcher->addPath(FILE_LOG);
69  //Now check for any current errors in the LPbackend
70  checkPoolStatus();
71  //And start up the error file watcher
72  if(!timer->isActive()){ timer->start(sysCheckTime); }
73}
74
75void LPWatcher::stop(){
76  watcher->removePaths(watcher->files());
77  logfile->close();
78  timer->stop();
79}
80
81QStringList LPWatcher::getMessages(QString type, QStringList msgList){
82  QStringList output;
83  type = type.toLower();
84  //Valid types - "critical"/"running"/"message"
85  //Valid messages - "dataset","message","summary","id", "timestamp", "time"
86  unsigned int base;
87  if(type=="message"){base=10;}
88  else if(type=="replication"){base=20;}
89  else if(type=="critical"){base=30;}
90  else if(type=="mirror"){base=40;}
91  else if(type=="resilver"){base=50;}
92  else if(type=="scrub"){base=60;}
93  else{ return output; } //invalid input type
94  //Now fill the output array based upon requested outputs
95  for(int i=0; i<msgList.length(); i++){
96    msgList[i] = msgList[i].toLower();
97    if(msgList[i]=="id" && LOGS.contains(base)){ output << LOGS[base]; }
98    else if(msgList[i]=="device" && LOGS.contains(base+1)){ output << LOGS[base+1]; }
99    else if(msgList[i]=="summary" && LOGS.contains(base+2)){ output << LOGS[base+2]; }
100    else if(msgList[i]=="message" && LOGS.contains(base+3)){ output << LOGS[base+3]; }
101    else if(msgList[i]=="timestamp" && LOGS.contains(base+4)){ output << LOGS[base+4]; }
102    else if(msgList[i]=="time" && LOGS.contains(base+5)){ output << LOGS[base+5]; }
103    else{ output << ""; }
104  }
105  //Return the output list
106  return output;
107}
108
109QStringList LPWatcher::getAllCurrentMessages(){
110  //Useful for quickly displaying all latest messages in a tooltip or summary
111  QStringList output;
112  if(LOGS.contains(12) && LOGS.contains(14)){ output << LOGS[14]+" -- "+LOGS[12]; }
113  if(LOGS.contains(22) && LOGS.contains(24)){ output << LOGS[24]+" -- "+LOGS[22]; }
114  if(LOGS.contains(32) && LOGS.contains(34)){ output << LOGS[34]+" -- "+LOGS[32]; }
115  if(LOGS.contains(42) && LOGS.contains(44)){ output << LOGS[44]+" -- "+LOGS[42]; }
116  if(LOGS.contains(52) && LOGS.contains(54)){ output << LOGS[54]+" -- "+LOGS[52]; }
117  if(LOGS.contains(62) && LOGS.contains(64)){ output << LOGS[64]+" -- "+LOGS[62]; }
118  return output;
119}
120
121bool LPWatcher::isRunning(){
122  if(LOGS.value(20) == "STARTED" || LOGS.value(20) == "RUNNING"){ return true; }
123  else if(LOGS.value(40) == "STARTED" || LOGS.value(40) == "RUNNING"){ return true; }
124  else if(LOGS.value(50) == "STARTED" || LOGS.value(50) == "RUNNING"){ return true; }
125  else if(LOGS.value(60) == "STARTED" || LOGS.value(60) == "RUNNING"){ return true; }
126  else{ return false; }
127}
128
129bool LPWatcher::hasError(){
130  return (LOGS.value(20)=="ERROR" || LOGS.contains(30) || LOGS.value(40)=="ERROR" || LOGS.value(50)=="ERROR" || LOGS.value(60)=="ERROR");
131}
132
133// -------------------------------------
134//    PRIVATE FUNCTIONS
135// -------------------------------------
136void LPWatcher::readLogFile(bool quiet){
137  QTextStream in(logfile);
138  while(!LFSTREAM->atEnd()){
139    QString log = LFSTREAM->readLine();
140    //Divide up the log into it's sections
141    QString timestamp = log.section(":",0,2).simplified();
142    QString time = timestamp.section(" ",3,3).simplified();
143    QString message = log.section(":",3,3).toLower().simplified();
144    QString dev = log.section(":",4,4).simplified(); //dataset/snapshot/nothing
145    //Now decide what to do/show because of the log message
146    //qDebug() << "New Log Message:" << log;
147    if(message.contains("creating snapshot")){
148      dev = message.section(" ",-1).simplified();
149      //Setup the status of the message
150      LOGS.insert(10,"SNAPCREATED");
151      LOGS.insert(11,dev); //dataset
152      LOGS.insert(12, QString(tr("New snapshot of %1")).arg(dev) ); //summary
153      LOGS.insert(13, QString(tr("Creating snapshot for %1")).arg(dev) );
154      LOGS.insert(14, timestamp); //full timestamp
155      LOGS.insert(15, time); // time only
156      if(!quiet){ emit MessageAvailable("message"); }
157    }else if(message.contains("starting replication")){
158      //Setup the file watcher for this new log file
159      FILE_REPLICATION = dev;
160      dev = message.section(" ",5,5,QString::SectionSkipEmpty);
161      startRepFileWatcher();
162      //Set the appropriate status variables
163      LOGS.insert(20,"STARTED");
164      LOGS.insert(21, dev); //zpool
165      LOGS.insert(22, tr("Replication Starting") ); //summary
166      LOGS.insert(23, QString(tr("Starting replication for %1")).arg(dev) ); //Full message
167      LOGS.insert(24, timestamp); //full timestamp
168      LOGS.insert(25, time); // time only
169      if(!quiet){ emit MessageAvailable("replication"); }
170    }else if(message.contains("finished replication")){
171      stopRepFileWatcher();
172      dev = message.section(" ",-1).simplified();
173      //Now set the status of the process
174      LOGS.insert(20,"FINISHED");
175      LOGS.insert(21,dev); //dataset
176      LOGS.insert(22, tr("Finished Replication") ); //summary
177      LOGS.insert(23, QString(tr("Finished replication for %1")).arg(dev) );
178      LOGS.insert(24, timestamp); //full timestamp
179      LOGS.insert(25, time); // time only     
180      if(!quiet){ emit MessageAvailable("replication"); }
181    }else if( message.contains("failed replication") ){
182      stopRepFileWatcher();
183      //Now set the status of the process
184      dev = message.section(" ",-1).simplified();
185      QString file = log.section("LOGFILE:",1,1).simplified();
186      QString tt = QString(tr("Replication Failed for %1")).arg(dev) +"\n"+ QString(tr("Logfile available at: %1")).arg(file);
187      LOGS.insert(20,"ERROR");
188      LOGS.insert(21,dev); //dataset
189      LOGS.insert(22, tr("Replication Failed") ); //summary
190      LOGS.insert(23, tt );
191      LOGS.insert(24, timestamp); //full timestamp
192      LOGS.insert(25, time); // time only     
193      if(!quiet){ emit MessageAvailable("replication"); }
194    }
195         
196  }
197}
198
199void LPWatcher::readReplicationFile(){
200  QString stat;
201  while( !RFSTREAM->atEnd() ){ 
202    QString line = RFSTREAM->readLine(); 
203    if(line.contains("total estimated size")){ repTotK = line.section(" ",-1).simplified(); } //save the total size to replicate
204    else if(line.startsWith("send from ")){}
205    else if(line.startsWith("TIME ")){}
206    else if(line.startsWith("warning: ")){} //start of an error
207    else{ stat = line; } //only save the relevant/latest status line
208  }
209  if(!stat.isEmpty()){
210    //qDebug() << "New Status Message:" << stat;           
211    //Divide up the status message into sections
212    stat.replace("\t"," ");
213    QString dataset = stat.section(" ",2,2,QString::SectionSkipEmpty).section("/",0,0).simplified();
214    QString cSize = stat.section(" ",1,1,QString::SectionSkipEmpty);
215    //Now Setup the tooltip
216    if(cSize != lastSize){ //don't update the info if the same size info
217      QString percent;
218      if(!repTotK.isEmpty()){
219        //calculate the percentage
220        double tot = displayToDoubleK(repTotK);
221        double c = displayToDoubleK(cSize);
222        if( tot!=-1 & c!=-1){
223          double p = (c*100)/tot;
224          p = int(p*10)/10.0; //round to 1 decimel places
225          percent = QString::number(p) + "%";
226        }
227      }
228      if(repTotK.isEmpty()){ repTotK = "??"; }
229      //Format the info string
230      QString status = cSize+"/"+repTotK;
231      if(!percent.isEmpty()){ status.append(" ("+percent+")"); }
232      QString txt = QString(tr("Replicating %1: %2")).arg(dataset, status);
233      lastSize = cSize; //save the current size for later
234      //Now set the current process status
235      LOGS.insert(20,"RUNNING");
236      LOGS.insert(21,dataset);
237      LOGS.insert(22,txt);
238      LOGS.insert(23,txt);
239      emit MessageAvailable("");
240    }
241  }
242}
243
244void LPWatcher::startRepFileWatcher(){
245  //qDebug() << "Start Rep File Watcher:" << FILE_REPLICATION;
246  if(FILE_REPLICATION.isEmpty()){ return; }
247
248  if(watcher->files().contains(FILE_REPLICATION)){ return; } //duplicate - file already opened
249  else if(!watcher->files().isEmpty()){ 
250    //Check that the file watcher is not already operating on a file
251    // only one can be running at a time, so always cancel the previous instance (it is stale)
252    QString tmp = FILE_REPLICATION;
253    stopRepFileWatcher();
254    FILE_REPLICATION = tmp;
255  }
256  //Check to make sure that lpreserver actually has a process running before starting this
257  //if(!QFile::exists("/var/run/<something.pid>"){ FILE_REPLICATION.clear(); return; }
258  //Check for the existance of the file to watch and create it as necessary 
259  if(!QFile::exists(FILE_REPLICATION)){ system( QString("touch "+FILE_REPLICATION).toUtf8() ); }
260  //Now open the file and start watching it for changes
261  repfile->setFileName(FILE_REPLICATION);
262  repfile->open(QIODevice::ReadOnly | QIODevice::Text);
263  RFSTREAM = new QTextStream(repfile);
264  watcher->addPath(FILE_REPLICATION);
265  //qDebug() << "Finished starting rep file watcher";
266}
267
268void LPWatcher::stopRepFileWatcher(){
269  //qDebug() << "Stop Rep File Watcher:" << FILE_REPLICATION;
270  if(FILE_REPLICATION.isEmpty()){ return; }
271  watcher->removePath(FILE_REPLICATION);
272  repfile->close();
273  delete RFSTREAM;
274  FILE_REPLICATION.clear();
275  repTotK.clear();
276  lastSize.clear();
277  //qDebug() << "Finished stopping rep file watcher";
278}
279
280double LPWatcher::displayToDoubleK(QString displayNumber){
281  QStringList labels; 
282    labels << "K" << "M" << "G" << "T" << "P" << "E";
283  QString clab = displayNumber.right(1); //last character is the size label
284        displayNumber.chop(1); //remove the label from the number
285  double num = displayNumber.toDouble();
286  //Now format the number properly
287  bool ok = false;
288  for(int i=0; i<labels.length(); i++){
289    if(labels[i] == clab){ ok = true; break; }
290    else{ num = num*1024; } //get ready for the next size
291  }
292  if(!ok){ num = -1; } //could not determine the size
293  return num;
294}
295
296// ------------------------------
297//    PRIVATE SLOTS
298// ------------------------------
299void LPWatcher::fileChanged(QString file){
300  if(file == FILE_LOG){ readLogFile(); }
301  else  if(file == FILE_REPLICATION){ readReplicationFile(); }
302}
303
304void LPWatcher::checkPoolStatus(){
305  //Now check zpool status for bad/running statuses
306  QStringList zstat = LPBackend::getCmdOutput("zpool status");
307    //parse the output
308    QString pool, state, timestamp;
309    QStringList cDev, cStat, cMsg, cSummary;
310    //qDebug() << "-----zpool status------";
311    bool newresilver = false; bool newscrub = false; bool newerror = false;
312    for(int i=0; i<zstat.length(); i++){
313      zstat[i] = zstat[i].simplified();
314      if(zstat[i].isEmpty()){ continue; }
315      //qDebug() << zstat[i];
316      if(zstat[i].startsWith("pool:")){ pool = zstat[i].section(":",1,10).simplified(); }
317      else if(zstat[i].startsWith("state:")){ state = zstat[i].section(":",1,10).simplified(); }
318      else if(zstat[i].startsWith("scan:")){
319        //check for scrubs/resilvering progress
320        // ------ SCRUB ------
321        if(zstat[i].contains("scrub")){
322          if(zstat[i].contains(" scrub repaired ")){
323            //Scrub Finished
324            zstat[i]  = zstat[i].replace("\t"," ").simplified();
325            timestamp = zstat[i].section(" ",10,14,QString::SectionSkipEmpty);
326            QString numFixed = zstat[i].section(" ",3,3,QString::SectionSkipEmpty);
327            QString numErr = zstat[i].section(" ",7,7,QString::SectionSkipEmpty);
328            QString timeRun = zstat[i].section(" ",5,5,QString::SectionSkipEmpty);
329            //Scrub finished previously
330            if(numFixed.toInt() > 0){ 
331              if(LOGS.value(60)!="ERROR"){ newscrub=true; }
332              LOGS.insert(60, "ERROR"); 
333              LOGS.insert(62, QString(tr("Scrub repaired %1 bad blocks")).arg(numFixed) );
334              LOGS.insert(63, QString(tr("Scrub repaired %1 blocks in %2 with %3 errors")).arg(numFixed, timeRun, numErr) );
335            }else{ 
336              if(LOGS.contains(60) && LOGS.value(60)!="FINISHED"){ newscrub=true; }
337              LOGS.insert(60,"FINISHED"); 
338              LOGS.insert(62, tr("Scrub completed") );
339              LOGS.insert(63, tr("Scrub completed without needing repairs") );
340            }
341            LOGS.insert(61,pool);
342            LOGS.insert(64, timestamp);
343            LOGS.insert(65, timestamp.section(" ",3,3) );
344            if(timer->interval() != sysCheckTime){ timer->start(sysCheckTime); }
345          }else{
346            //Scrub is running - parse the line
347            timestamp = zstat[i].section(" ",5,9,QString::SectionSkipEmpty);
348            i++; QString remain = zstat[i].section(" ",7,7,QString::SectionSkipEmpty);
349            i++; QString percent = zstat[i].section(" ",2,2,QString::SectionSkipEmpty);
350            if(LOGS.value(60) != "RUNNING"){newscrub=true;}
351            LOGS.insert(60,"RUNNING");
352            LOGS.insert(61,pool);
353            LOGS.insert(62, QString(tr("Scrubbing %1: %2 (%3 remaining)")).arg(pool, percent, remain) );
354            LOGS.insert(63, QString(tr("Scrubbing %1: %2 (%3 remaining)")).arg(pool, percent, remain) );
355            LOGS.insert(64, timestamp);
356            LOGS.insert(65, timestamp.section(" ",3,3) );
357            if(timer->interval() != 60000){ timer->start(60000); } //put the timer on a 1 minute refresh since it is running
358          }
359          if(LOGS.contains(50) ){
360            //Only resilvering OR scrub is shown at a time - so remove the resilver info
361            LOGS.remove(50);
362            LOGS.remove(51);
363            LOGS.remove(52);
364            LOGS.remove(53);
365            LOGS.remove(54);
366            LOGS.remove(55);
367          }
368        // --------- RESILVERING -------
369        }else if(zstat[i].contains("resilver in progress")){
370          //Resilvering is currently running
371          timestamp = zstat[i].section(" ",5,9,QString::SectionSkipEmpty);
372          //need info from the next two lines
373          i++; QString timeleft = zstat[i].section(" ",7,7,QString::SectionSkipEmpty);
374          i++; QString percent = zstat[i].section(" ", 2,2,QString::SectionSkipEmpty);
375          //Setup the running re-silvering progress
376          if(LOGS.value(50)!="RUNNING"){newresilver=true; }
377          LOGS.insert(50, "RUNNING");
378          // 51 - need to put the actual device in here (not available on this line)
379          LOGS.insert(52, QString(tr("Resilvering: %1 (%2 remaining)")).arg(percent, timeleft) );
380          if(newresilver){ LOGS.insert(53, QString(tr("Resilvering Started: %1 remaining ")).arg( timeleft) ); }
381          else{ LOGS.insert(53,QString(tr("Resilvering: %1 (%2 remaining)")).arg(percent, timeleft) ); }
382          LOGS.insert(54, timestamp);
383          LOGS.insert(55, timestamp.section(" ",3,3) );
384          if(LOGS.contains(60) ){
385            //Only resilvering OR scrub is shown at a time - so remove the scrub info
386            LOGS.remove(60);
387            LOGS.remove(61);
388            LOGS.remove(62);
389            LOGS.remove(63);
390            LOGS.remove(64);
391            LOGS.remove(65);
392          }
393          if(timer->interval() != 60000){ timer->start(60000); }//put the timer on a 1 minute refresh since it is running
394        }else if(zstat[i].contains("resilvered")){
395          //Resilvering is finished
396          timestamp = zstat[i].section(" ",9,13,QString::SectionSkipEmpty);
397          QString timecomplete = zstat[i].section(" ",4,4,QString::SectionSkipEmpty);
398          QString errors = zstat[i].section(" ", 6,6,QString::SectionSkipEmpty);
399          //Setup the running re-silvering progress
400          if(LOGS.value(50) != "FINISHED" && LOGS.value(50) != "ERROR" ){newresilver=true; } //don't display message for first run
401          if(errors.toInt() > 0){ 
402            LOGS.insert(50, "ERROR");
403            LOGS.insert(52, QString(tr("Resilver completed in &1 with %2 errors")).arg(timecomplete, errors) );
404            LOGS.insert(53, QString(tr("Resilver completed in &1 with %2 errors")).arg(timecomplete, errors) );
405          }else{
406            LOGS.insert(50, "FINISHED");
407            LOGS.insert(52, QString(tr("Resilver completed successfully in &1")).arg(timecomplete) );
408            LOGS.insert(53, QString(tr("Resilver completed successfully in &1")).arg(timecomplete) ); 
409          }
410          // 51 - need to put the actual device in here (not available on this line)
411          LOGS.insert(54, timestamp);
412          LOGS.insert(55, timestamp.section(" ",3,3) );
413          if(LOGS.contains(60) ){
414            //Only resilvering OR scrub is shown at a time - so remove the scrub info
415            LOGS.remove(60);
416            LOGS.remove(61);
417            LOGS.remove(62);
418            LOGS.remove(63);
419            LOGS.remove(64);
420            LOGS.remove(65);
421          }
422          if(timer->interval() != sysCheckTime){ timer->start(sysCheckTime); }
423        }
424      }else if(zstat[i].startsWith("errors:")){
425        if(zstat[i] != "errors: No known data errors"){
426          qDebug() << "New zpool status error line that needs parsing:" << zstat[i];
427        }
428      }else if( state != "ONLINE" || !LOGS.value(50).isEmpty() ){
429        //Check for state/resilvering of all real devices
430        QString msg, summary, status;
431        QString device = zstat[i].section(" ",0,0,QString::SectionSkipEmpty);
432        if(zstat[i].contains("NAME STATE READ")){continue;} //nothing on this header line
433        else if(zstat[i].contains("(resilvering)")){ LOGS.insert(51, device ); continue;}
434        else if(zstat[i].contains("ONLINE")){continue;} //do nothing for this device - it is good
435        else if(zstat[i].contains("OFFLINE")){ continue; } //do nothing - this status must be set manually - it is not a "random" status
436        else if(zstat[i].contains("DEGRADED")){ 
437          cStat << "DEGRADED";
438          cMsg << tr("The device is in a degraded state, and should be replaced soon.");
439          cSummary << QString(tr("%1 is degraded.")).arg(device);
440          cDev << device;
441        }else if(zstat[i].contains("FAULTED")){ 
442          cStat << "FAULTED";
443          cMsg << tr("The device is faulty, and should be replaced.");
444          cSummary << QString(tr("%1 is faulty.")).arg(device);
445          cDev << device;
446        }else if(zstat[i].contains("REMOVED")){ 
447          cStat << "REMOVED";
448          cMsg << tr("The device was removed, and should be either be re-attached or replaced.");
449          cSummary << QString(tr("%1 was removed.")).arg(device);
450          cDev << device;
451        }else if(zstat[i].contains("UNAVAIL")){ 
452          cStat << "UNAVAILABLE";
453          cMsg << tr("The device is unavailable and should be re-added to the pool.");
454          cSummary << QString(tr("%1 is unavailable.")).arg(device);
455          cDev << device;
456        }
457      }
458    } //end of loop over zpool status lines
459   
460  //Add the critical messages to the hash
461  if(cStat.isEmpty()){
462    if(LOGS.contains(30)){
463      LOGS.remove(30);
464      LOGS.remove(31);
465      LOGS.remove(32);
466      LOGS.remove(33);
467      LOGS.remove(34);
468      LOGS.remove(35);
469    }
470  }else{
471    if(LOGS.value(30) != cStat.join(":::") ){ newerror = true; }
472    LOGS.insert(30, cStat.join(":::") );
473    LOGS.insert(31, cDev.join(":::") );
474    LOGS.insert(32, cSummary.join(":::") );
475    LOGS.insert(33, cMsg.join(":::") );
476    LOGS.insert(34, timestamp);
477    LOGS.insert(35, timestamp.section(" ",3,3) );
478  }
479   
480  //Now emit the appropriate signal
481  if(newerror){ emit MessageAvailable("critical"); }
482  else if(newresilver){ emit MessageAvailable("resilver"); }
483  else if(newscrub){ emit MessageAvailable("scrub"); }
484  else{ emit StatusUpdated(); }
485}
Note: See TracBrowser for help on using the repository browser.