xref: /glogg/src/data/logdata.cpp (revision 9f850936f193b6e0057829e50d76cbf76e3a62c7)
1 /*
2  * Copyright (C) 2009, 2010, 2013, 2014, 2015 Nicolas Bonnefon and other contributors
3  *
4  * This file is part of glogg.
5  *
6  * glogg is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * glogg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with glogg.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 // This file implements LogData, the content of a log file.
21 
22 #include <iostream>
23 
24 #include <cassert>
25 
26 #include <QFileInfo>
27 
28 #include "log.h"
29 
30 #include "logdata.h"
31 #include "logfiltereddata.h"
32 #if defined(GLOGG_SUPPORTS_INOTIFY) || defined(GLOGG_SUPPORTS_KQUEUE) || defined(WIN32)
33 #include "platformfilewatcher.h"
34 #else
35 #include "qtfilewatcher.h"
36 #endif
37 
38 // Implementation of the 'start' functions for each operation
39 
40 void LogData::AttachOperation::doStart(
41         LogDataWorkerThread& workerThread ) const
42 {
43     LOG(logDEBUG) << "Attaching " << filename_.toStdString();
44     workerThread.attachFile( filename_ );
45     workerThread.indexAll();
46 }
47 
48 void LogData::FullIndexOperation::doStart(
49         LogDataWorkerThread& workerThread ) const
50 {
51     LOG(logDEBUG) << "Reindexing (full)";
52     workerThread.indexAll();
53 }
54 
55 void LogData::PartialIndexOperation::doStart(
56         LogDataWorkerThread& workerThread ) const
57 {
58     LOG(logDEBUG) << "Reindexing (partial)";
59     workerThread.indexAdditionalLines();
60 }
61 
62 
63 // Constructs an empty log file.
64 // It must be displayed without error.
65 LogData::LogData() : AbstractLogData(), indexing_data_(),
66     fileMutex_(), workerThread_( &indexing_data_ )
67 {
68     // Start with an "empty" log
69     attached_file_ = nullptr;
70     currentOperation_ = nullptr;
71     nextOperation_    = nullptr;
72 
73     codec_ = QTextCodec::codecForName( "ISO-8859-1" );
74 
75 #if defined(GLOGG_SUPPORTS_INOTIFY) || defined(GLOGG_SUPPORTS_KQUEUE) || defined(WIN32)
76     fileWatcher_ = std::make_shared<PlatformFileWatcher>();
77 #else
78     fileWatcher_ = std::make_shared<QtFileWatcher>();
79 #endif
80 
81     // Initialise the file watcher
82     connect( fileWatcher_.get(), SIGNAL( fileChanged( const QString& ) ),
83             this, SLOT( fileChangedOnDisk() ) );
84     // Forward the update signal
85     connect( &workerThread_, SIGNAL( indexingProgressed( int ) ),
86             this, SIGNAL( loadingProgressed( int ) ) );
87     connect( &workerThread_, SIGNAL( indexingFinished( LoadingStatus ) ),
88             this, SLOT( indexingFinished( LoadingStatus ) ) );
89 
90     // Starts the worker thread
91     workerThread_.start();
92 }
93 
94 LogData::~LogData()
95 {
96     // Remove the current file from the watch list
97     if ( attached_file_ )
98         fileWatcher_->removeFile( attached_file_->fileName() );
99 
100     // FIXME
101     // workerThread_.stop();
102 }
103 
104 //
105 // Public functions
106 //
107 
108 void LogData::attachFile( const QString& fileName )
109 {
110     LOG(logDEBUG) << "LogData::attachFile " << fileName.toStdString();
111 
112     if ( attached_file_ ) {
113         // We cannot reattach
114         throw CantReattachErr();
115     }
116 
117     attached_file_.reset( new QFile( fileName ) );
118     attached_file_->open( QIODevice::ReadOnly );
119 
120     std::shared_ptr<const LogDataOperation> operation( new AttachOperation( fileName ) );
121     enqueueOperation( std::move( operation ) );
122 }
123 
124 void LogData::interruptLoading()
125 {
126     workerThread_.interrupt();
127 }
128 
129 qint64 LogData::getFileSize() const
130 {
131     return indexing_data_.getSize();
132 }
133 
134 QDateTime LogData::getLastModifiedDate() const
135 {
136     return lastModifiedDate_;
137 }
138 
139 // Return an initialised LogFilteredData. The search is not started.
140 LogFilteredData* LogData::getNewFilteredData() const
141 {
142     LogFilteredData* newFilteredData = new LogFilteredData( this );
143 
144     return newFilteredData;
145 }
146 
147 void LogData::reload()
148 {
149     workerThread_.interrupt();
150 
151     enqueueOperation( std::make_shared<FullIndexOperation>() );
152 }
153 
154 void LogData::setPollingInterval( uint32_t interval_ms )
155 {
156     fileWatcher_->setPollingInterval( interval_ms );
157 }
158 
159 //
160 // Private functions
161 //
162 
163 // Add an operation to the queue and perform it immediately if
164 // there is none ongoing.
165 void LogData::enqueueOperation( std::shared_ptr<const LogDataOperation> new_operation )
166 {
167     if ( currentOperation_ == nullptr )
168     {
169         // We do it immediately
170         currentOperation_ =  new_operation;
171         startOperation();
172     }
173     else
174     {
175         // An operation is in progress...
176         // ... we schedule the attach op for later
177         nextOperation_ = new_operation;
178     }
179 }
180 
181 // Performs the current operation asynchronously, a indexingFinished
182 // signal will be received when it's finished.
183 void LogData::startOperation()
184 {
185     if ( currentOperation_ )
186     {
187         LOG(logDEBUG) << "startOperation found something to do.";
188 
189         // Let the operation do its stuff
190         currentOperation_->start( workerThread_ );
191     }
192 }
193 
194 //
195 // Slots
196 //
197 
198 void LogData::fileChangedOnDisk()
199 {
200     const QString name = attached_file_->fileName();
201 
202     LOG(logDEBUG) << "signalFileChanged: " << name.toStdString();
203 
204     QFileInfo info( name );
205     qint64 file_size = indexing_data_.getSize();
206     LOG(logDEBUG) << "current indexed fileSize=" << file_size;
207     LOG(logDEBUG) << "info file_->size()=" << info.size();
208     LOG(logDEBUG) << "attached_file_->size()=" << attached_file_->size();
209     // In absence of any clearer information, we use the following size comparison
210     // to determine whether we are following the same file or not (i.e. the file
211     // has been moved and the inode we are following is now under a new name, if for
212     // instance log has been rotated). We want to follow the name so we have to reopen
213     // the file to ensure we are reading the right one.
214     // This is a crude heuristic but necessary for notification services that do not
215     // give details (e.g. kqueues)
216     if ( ( info.size() != attached_file_->size() )
217             || ( attached_file_->openMode() == QIODevice::NotOpen ) ) {
218         LOG(logINFO) << "Inconsistent size, the file might have changed, re-opening";
219         auto reopened = std::make_unique<QFile>( name );
220         reopened->open( QIODevice::ReadOnly );
221         QMutexLocker locker( &fileMutex_ );
222         attached_file_ = std::move( reopened );      // This will close the old one and open the new
223 
224         // We don't force a (slow) full reindex as this routinely happens if
225         // the file is appended quickly.
226         // This means we can occasionally have false negatives (should be dealt with at
227         // a lower level): e.g. if a new file is created with the same name as the old one
228         // and with a size greater than the old one (should be rare in practice).
229     }
230 
231     std::shared_ptr<LogDataOperation> newOperation;
232 
233     qint64 real_file_size = attached_file_->size();
234     if ( real_file_size < file_size ) {
235         fileChangedOnDisk_ = Truncated;
236         LOG(logINFO) << "File truncated";
237         newOperation = std::make_shared<FullIndexOperation>();
238     }
239     else if ( real_file_size == file_size ) {
240         LOG(logINFO) << "No change in file";
241     }
242     else if ( fileChangedOnDisk_ != DataAdded ) {
243         fileChangedOnDisk_ = DataAdded;
244         LOG(logINFO) << "New data on disk";
245         newOperation = std::make_shared<PartialIndexOperation>();
246     }
247 
248     if ( newOperation ) {
249         enqueueOperation( newOperation );
250         lastModifiedDate_ = info.lastModified();
251 
252         emit fileChanged( fileChangedOnDisk_ );
253     }
254 }
255 
256 void LogData::indexingFinished( LoadingStatus status )
257 {
258     LOG(logDEBUG) << "indexingFinished: " <<
259         ( status == LoadingStatus::Successful ) <<
260         ", found " << indexing_data_.getNbLines() << " lines.";
261 
262     if ( status == LoadingStatus::Successful ) {
263         // Start watching we watch the file for updates
264         fileChangedOnDisk_ = Unchanged;
265         fileWatcher_->addFile( attached_file_->fileName() );
266 
267         // Update the modified date/time if the file exists
268         lastModifiedDate_ = QDateTime();
269         QFileInfo fileInfo( *attached_file_ );
270         if ( fileInfo.exists() )
271             lastModifiedDate_ = fileInfo.lastModified();
272     }
273 
274     // FIXME be cleverer here as a notification might have arrived whilst we
275     // were indexing.
276     fileChangedOnDisk_ = Unchanged;
277 
278     LOG(logDEBUG) << "Sending indexingFinished.";
279     emit loadingFinished( status );
280 
281     // So now the operation is done, let's see if there is something
282     // else to do, in which case, do it!
283     assert( currentOperation_ );
284 
285     currentOperation_ = std::move( nextOperation_ );
286     nextOperation_.reset();
287 
288     if ( currentOperation_ ) {
289         LOG(logDEBUG) << "indexingFinished is performing the next operation";
290         startOperation();
291     }
292 }
293 
294 //
295 // Implementation of virtual functions
296 //
297 qint64 LogData::doGetNbLine() const
298 {
299     return indexing_data_.getNbLines();
300 }
301 
302 int LogData::doGetMaxLength() const
303 {
304     return indexing_data_.getMaxLength();
305 }
306 
307 int LogData::doGetLineLength( qint64 line ) const
308 {
309     if ( line >= indexing_data_.getNbLines() ) { return 0; /* exception? */ }
310 
311     int length = doGetExpandedLineString( line ).length();
312 
313     return length;
314 }
315 
316 void LogData::doSetDisplayEncoding( Encoding encoding )
317 {
318     LOG(logDEBUG) << "AbstractLogData::setDisplayEncoding: " << static_cast<int>( encoding );
319 
320     static const char* latin1_encoding = "iso-8859-1";
321     static const char* utf8_encoding   = "utf-8";
322     static const char* utf16le_encoding   = "utf-16le";
323     static const char* utf16be_encoding   = "utf-16be";
324     static const char* cp1251_encoding   = "CP1251";
325     static const char* cp1252_encoding   = "CP1252";
326 
327     const char* qt_encoding = latin1_encoding;
328 
329     // Default to 0, for 8bit encodings
330     int before_cr = 0;
331     int after_cr  = 0;
332 
333     switch ( encoding ) {
334         case Encoding::ENCODING_UTF8:
335             qt_encoding = utf8_encoding;
336             break;
337         case Encoding::ENCODING_UTF16LE:
338             qt_encoding = utf16le_encoding;
339             before_cr = 0;
340             after_cr  = 1;
341             break;
342         case Encoding::ENCODING_UTF16BE:
343             qt_encoding = utf16be_encoding;
344             before_cr = 1;
345             after_cr  = 0;
346             break;
347         case Encoding::ENCODING_CP1251:
348             qt_encoding = cp1251_encoding;
349             break;
350         case Encoding::ENCODING_CP1252:
351             qt_encoding = cp1252_encoding;
352             break;
353         case Encoding::ENCODING_ISO_8859_1:
354             qt_encoding = latin1_encoding;
355             break;
356         default:
357             LOG( logERROR ) << "Unknown encoding set!";
358             assert( false );
359             break;
360     }
361 
362     doSetMultibyteEncodingOffsets( before_cr, after_cr );
363     codec_ = QTextCodec::codecForName( qt_encoding );
364 }
365 
366 void LogData::doSetMultibyteEncodingOffsets( int before_cr, int after_cr )
367 {
368     before_cr_offset_ = before_cr;
369     after_cr_offset_ = after_cr;
370 }
371 
372 QString LogData::doGetLineString( qint64 line ) const
373 {
374     if ( line >= indexing_data_.getNbLines() ) { return 0; /* exception? */ }
375 
376     fileMutex_.lock();
377 
378     // end_byte is non-inclusive.(is not read)
379     const qint64 first_byte = (line == 0) ?
380         0 : ( indexing_data_.getPosForLine( line-1 ) + after_cr_offset_ );
381     const qint64 end_byte  = endOfLinePosition( line );
382 
383     attached_file_->seek( first_byte );
384 
385     QString string = codec_->toUnicode( attached_file_->read( end_byte - first_byte ) );
386 
387     fileMutex_.unlock();
388 
389     return string;
390 }
391 
392 QString LogData::doGetExpandedLineString( qint64 line ) const
393 {
394     if ( line >= indexing_data_.getNbLines() ) { return 0; /* exception? */ }
395 
396     fileMutex_.lock();
397 
398     // end_byte is non-inclusive.(is not read) We also exclude the final \r.
399     const qint64 first_byte = (line == 0) ?
400         0 : ( indexing_data_.getPosForLine( line-1 ) + after_cr_offset_ );
401     const qint64 end_byte  = endOfLinePosition( line );
402 
403     attached_file_->seek( first_byte );
404 
405     // LOG(logDEBUG) << "LogData::doGetExpandedLineString first_byte:" << first_byte << " end_byte:" << end_byte;
406     QByteArray rawString = attached_file_->read( end_byte - first_byte );
407 
408     fileMutex_.unlock();
409 
410     QString string = untabify( codec_->toUnicode( rawString ) );
411 
412     // LOG(logDEBUG) << "doGetExpandedLineString Line is: " << string.toStdString();
413 
414     return string;
415 }
416 
417 // Note this function is also called from the LogFilteredDataWorker thread, so
418 // data must be protected because they are changed in the main thread (by
419 // indexingFinished).
420 QStringList LogData::doGetLines( qint64 first_line, int number ) const
421 {
422     QStringList list;
423     const qint64 last_line = first_line + number - 1;
424 
425     // LOG(logDEBUG) << "LogData::doGetLines first_line:" << first_line << " nb:" << number;
426 
427     if ( number == 0 ) {
428         return QStringList();
429     }
430 
431     if ( last_line >= indexing_data_.getNbLines() ) {
432         LOG(logWARNING) << "LogData::doGetLines Lines out of bound asked for";
433         return QStringList(); /* exception? */
434     }
435 
436     fileMutex_.lock();
437 
438     const qint64 first_byte = (first_line == 0) ?
439         0 : ( indexing_data_.getPosForLine( first_line-1 ) + after_cr_offset_ );
440     const qint64 end_byte  = endOfLinePosition( last_line );
441     // LOG(logDEBUG) << "LogData::doGetLines first_byte:" << first_byte << " end_byte:" << end_byte;
442     attached_file_->seek( first_byte );
443     QByteArray blob = attached_file_->read( end_byte - first_byte );
444 
445     fileMutex_.unlock();
446 
447     qint64 beginning = 0;
448     qint64 end = 0;
449     for ( qint64 line = first_line; (line <= last_line); line++ ) {
450         end = endOfLinePosition( line ) - first_byte;
451         // LOG(logDEBUG) << "Getting line " << line << " beginning " << beginning << " end " << end;
452         QByteArray this_line = blob.mid( beginning, end - beginning );
453         // LOG(logDEBUG) << "Line is: " << QString( this_line ).toStdString();
454         list.append( codec_->toUnicode( this_line ) );
455         beginning = beginningOfNextLine( end );
456     }
457 
458     return list;
459 }
460 
461 QStringList LogData::doGetExpandedLines( qint64 first_line, int number ) const
462 {
463     QStringList list;
464     const qint64 last_line = first_line + number - 1;
465 
466     if ( number == 0 ) {
467         return QStringList();
468     }
469 
470     if ( last_line >= indexing_data_.getNbLines() ) {
471         LOG(logWARNING) << "LogData::doGetExpandedLines Lines out of bound asked for";
472         return QStringList(); /* exception? */
473     }
474 
475     fileMutex_.lock();
476 
477     // end_byte is non-inclusive.(is not read)
478     const qint64 first_byte = (first_line == 0) ?
479         0 : ( indexing_data_.getPosForLine( first_line-1 ) + after_cr_offset_ );
480     const qint64 end_byte  = endOfLinePosition( last_line );
481     LOG(logDEBUG) << "LogData::doGetExpandedLines first_byte:" << first_byte << " end_byte:" << end_byte;
482 
483     attached_file_->seek( first_byte );
484     QByteArray blob = attached_file_->read( end_byte - first_byte );
485 
486     fileMutex_.unlock();
487 
488     qint64 beginning = 0;
489     qint64 end = 0;
490     for ( qint64 line = first_line; (line <= last_line); line++ ) {
491         // end is non-inclusive
492         // LOG(logDEBUG) << "EoL " << line << ": " << indexing_data_.getPosForLine( line );
493         end = endOfLinePosition( line ) - first_byte;
494         // LOG(logDEBUG) << "Getting line " << line << " beginning " << beginning << " end " << end;
495         QByteArray this_line = blob.mid( beginning, end - beginning );
496         QString conv_line = codec_->toUnicode( this_line );
497         // LOG(logDEBUG) << "Line is: " << conv_line.toStdString();
498         list.append( untabify( conv_line ) );
499         beginning = beginningOfNextLine( end );
500     }
501 
502     return list;
503 }
504 
505 EncodingSpeculator::Encoding LogData::getDetectedEncoding() const
506 {
507     return indexing_data_.getEncodingGuess();
508 }
509 
510 // Given a line number, returns the position (offset in file) of
511 // the byte immediately past its end.
512 // e.g. in utf-16: T e s t \n2 n d l i n e \n
513 //                 --------------------------
514 //                           ^
515 //                   endOfLinePosition( 0 )
516 qint64 LogData::endOfLinePosition( qint64 line ) const
517 {
518     return indexing_data_.getPosForLine( line ) - 1 - before_cr_offset_;
519 }
520 
521 // Given the position (offset in file) of the end of a line, returns
522 // the position of the beginning of the following, taking into account
523 // encoding and newline signalling.
524 qint64 LogData::beginningOfNextLine( qint64 end_pos ) const
525 {
526     return end_pos + 1 + before_cr_offset_ + after_cr_offset_;
527 }
528