xref: /glogg/src/data/logdataworkerthread.cpp (revision de1abac61b191ce1317b0ceaa43904b8f2fb2bd3)
1 /*
2  * Copyright (C) 2009, 2010, 2014, 2015 Nicolas Bonnefon and other contributors
3  *
4  * This file is part of glogg.
5  *
6  * glogg is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * glogg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with glogg.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <QFile>
21 
22 #include "log.h"
23 
24 #include "logdata.h"
25 #include "logdataworkerthread.h"
26 
27 // Size of the chunk to read (5 MiB)
28 const int IndexOperation::sizeChunk = 5*1024*1024;
29 
getSize() const30 qint64 IndexingData::getSize() const
31 {
32     QMutexLocker locker( &dataMutex_ );
33 
34     return indexedSize_;
35 }
36 
getMaxLength() const37 int IndexingData::getMaxLength() const
38 {
39     QMutexLocker locker( &dataMutex_ );
40 
41     return maxLength_;
42 }
43 
getNbLines() const44 LineNumber IndexingData::getNbLines() const
45 {
46     QMutexLocker locker( &dataMutex_ );
47 
48     return linePosition_.size();
49 }
50 
getPosForLine(LineNumber line) const51 qint64 IndexingData::getPosForLine( LineNumber line ) const
52 {
53     QMutexLocker locker( &dataMutex_ );
54 
55     return linePosition_.at( line );
56 }
57 
getEncodingGuess() const58 EncodingSpeculator::Encoding IndexingData::getEncodingGuess() const
59 {
60     QMutexLocker locker( &dataMutex_ );
61 
62     return encoding_;
63 }
64 
addAll(qint64 size,int length,const FastLinePositionArray & linePosition,EncodingSpeculator::Encoding encoding)65 void IndexingData::addAll( qint64 size, int length,
66         const FastLinePositionArray& linePosition,
67         EncodingSpeculator::Encoding encoding )
68 
69 {
70     QMutexLocker locker( &dataMutex_ );
71 
72     indexedSize_  += size;
73     maxLength_     = qMax( maxLength_, length );
74     linePosition_.append_list( linePosition );
75 
76     encoding_      = encoding;
77 }
78 
clear()79 void IndexingData::clear()
80 {
81     maxLength_   = 0;
82     indexedSize_ = 0;
83     linePosition_ = LinePositionArray();
84     encoding_    = EncodingSpeculator::Encoding::ASCII7;
85 }
86 
LogDataWorkerThread(IndexingData * indexing_data)87 LogDataWorkerThread::LogDataWorkerThread( IndexingData* indexing_data )
88     : QThread(), mutex_(), operationRequestedCond_(),
89     nothingToDoCond_(), fileName_(), indexing_data_( indexing_data )
90 {
91     terminate_          = false;
92     interruptRequested_ = false;
93     operationRequested_ = NULL;
94 }
95 
~LogDataWorkerThread()96 LogDataWorkerThread::~LogDataWorkerThread()
97 {
98     {
99         QMutexLocker locker( &mutex_ );
100         terminate_ = true;
101         operationRequestedCond_.wakeAll();
102     }
103     wait();
104 }
105 
attachFile(const QString & fileName)106 void LogDataWorkerThread::attachFile( const QString& fileName )
107 {
108     QMutexLocker locker( &mutex_ );  // to protect fileName_
109 
110     fileName_ = fileName;
111 }
112 
indexAll()113 void LogDataWorkerThread::indexAll()
114 {
115     QMutexLocker locker( &mutex_ );  // to protect operationRequested_
116 
117     LOG(logDEBUG) << "FullIndex requested";
118 
119     // If an operation is ongoing, we will block
120     while ( (operationRequested_ != NULL) )
121         nothingToDoCond_.wait( &mutex_ );
122 
123     interruptRequested_ = false;
124     operationRequested_ = new FullIndexOperation( fileName_,
125             indexing_data_, &interruptRequested_, &encodingSpeculator_ );
126     operationRequestedCond_.wakeAll();
127 }
128 
indexAdditionalLines()129 void LogDataWorkerThread::indexAdditionalLines()
130 {
131     QMutexLocker locker( &mutex_ );  // to protect operationRequested_
132 
133     LOG(logDEBUG) << "AddLines requested";
134 
135     // If an operation is ongoing, we will block
136     while ( (operationRequested_ != NULL) )
137         nothingToDoCond_.wait( &mutex_ );
138 
139     interruptRequested_ = false;
140     operationRequested_ = new PartialIndexOperation( fileName_,
141             indexing_data_, &interruptRequested_, &encodingSpeculator_ );
142     operationRequestedCond_.wakeAll();
143 }
144 
interrupt()145 void LogDataWorkerThread::interrupt()
146 {
147     LOG(logDEBUG) << "Load interrupt requested";
148 
149     // No mutex here, setting a bool is probably atomic!
150     interruptRequested_ = true;
151 }
152 
153 // This is the thread's main loop
run()154 void LogDataWorkerThread::run()
155 {
156     QMutexLocker locker( &mutex_ );
157 
158     forever {
159         while ( (terminate_ == false) && (operationRequested_ == NULL) )
160             operationRequestedCond_.wait( &mutex_ );
161         LOG(logDEBUG) << "Worker thread signaled";
162 
163         // Look at what needs to be done
164         if ( terminate_ )
165             return;      // We must die
166 
167         if ( operationRequested_ ) {
168             connect( operationRequested_, SIGNAL( indexingProgressed( int ) ),
169                     this, SIGNAL( indexingProgressed( int ) ) );
170 
171             // Run the operation
172             try {
173                 if ( operationRequested_->start() ) {
174                     LOG(logDEBUG) << "... finished copy in workerThread.";
175                     emit indexingFinished( LoadingStatus::Successful );
176                 }
177                 else {
178                     emit indexingFinished( LoadingStatus::Interrupted );
179                 }
180             }
181             catch ( std::bad_alloc& ba ) {
182                 LOG(logERROR) << "Out of memory whilst indexing!";
183                 emit indexingFinished( LoadingStatus::NoMemory );
184             }
185 
186             delete operationRequested_;
187             operationRequested_ = NULL;
188             nothingToDoCond_.wakeAll();
189         }
190     }
191 }
192 
193 //
194 // Operations implementation
195 //
196 
IndexOperation(const QString & fileName,IndexingData * indexingData,bool * interruptRequest,EncodingSpeculator * encodingSpeculator)197 IndexOperation::IndexOperation( const QString& fileName,
198         IndexingData* indexingData, bool* interruptRequest,
199         EncodingSpeculator* encodingSpeculator )
200     : fileName_( fileName )
201 {
202     interruptRequest_ = interruptRequest;
203     indexing_data_ = indexingData;
204     encoding_speculator_ = encodingSpeculator;
205 }
206 
doIndex(IndexingData * indexing_data,EncodingSpeculator * encoding_speculator,qint64 initialPosition)207 void IndexOperation::doIndex( IndexingData* indexing_data,
208         EncodingSpeculator* encoding_speculator, qint64 initialPosition )
209 {
210     qint64 pos = initialPosition; // Absolute position of the start of current line
211     qint64 end = 0;               // Absolute position of the end of current line
212     int additional_spaces = 0;    // Additional spaces due to tabs
213 
214     QFile file( fileName_ );
215     if ( file.open( QIODevice::ReadOnly ) ) {
216         // Count the number of lines and max length
217         // (read big chunks to speed up reading from disk)
218         file.seek( pos );
219         while ( !file.atEnd() ) {
220             FastLinePositionArray line_positions;
221             int max_length = 0;
222 
223             if ( *interruptRequest_ )   // a bool is always read/written atomically isn't it?
224                 break;
225 
226             // Read a chunk of 5MB
227             const qint64 block_beginning = file.pos();
228             const QByteArray block = file.read( sizeChunk );
229 
230             // Count the number of lines in each chunk
231             qint64 pos_within_block = 0;
232             while ( pos_within_block != -1 ) {
233                 pos_within_block = qMax( pos - block_beginning, 0LL);
234                 // Looking for the next \n, expanding tabs in the process
235                 do {
236                     if ( pos_within_block < block.length() ) {
237                         const char c = block.at(pos_within_block);
238                         encoding_speculator->inject_byte( c );
239                         if ( c == '\n' )
240                             break;
241                         else if ( c == '\t' )
242                             additional_spaces += AbstractLogData::tabStop -
243                                 ( ( ( block_beginning - pos ) + pos_within_block
244                                     + additional_spaces ) % AbstractLogData::tabStop ) - 1;
245 
246                         pos_within_block++;
247                     }
248                     else {
249                         pos_within_block = -1;
250                     }
251                 } while ( pos_within_block != -1 );
252 
253                 // When a end of line has been found...
254                 if ( pos_within_block != -1 ) {
255                     end = pos_within_block + block_beginning;
256                     const int length = end-pos + additional_spaces;
257                     if ( length > max_length )
258                         max_length = length;
259                     pos = end + 1;
260                     additional_spaces = 0;
261                     line_positions.append( pos );
262                 }
263             }
264 
265             // Update the shared data
266             indexing_data->addAll( block.length(), max_length, line_positions,
267                    encoding_speculator->guess() );
268 
269             // Update the caller for progress indication
270             int progress = ( file.size() > 0 ) ? pos*100 / file.size() : 100;
271             emit indexingProgressed( progress );
272         }
273 
274         // Check if there is a non LF terminated line at the end of the file
275         qint64 file_size = file.size();
276         if ( !*interruptRequest_ && file_size > pos ) {
277             LOG( logWARNING ) <<
278                 "Non LF terminated file, adding a fake end of line";
279 
280             FastLinePositionArray line_position;
281             line_position.append( file_size + 1 );
282             line_position.setFakeFinalLF();
283 
284             indexing_data->addAll( 0, 0, line_position, encoding_speculator->guess() );
285         }
286     }
287     else {
288         // TODO: Check that the file is seekable?
289         // If the file cannot be open, we do as if it was empty
290         LOG(logWARNING) << "Cannot open file " << fileName_.toStdString();
291 
292         emit indexingProgressed( 100 );
293     }
294 }
295 
296 // Called in the worker thread's context
start()297 bool FullIndexOperation::start()
298 {
299     LOG(logDEBUG) << "FullIndexOperation::start(), file "
300         << fileName_.toStdString();
301 
302     LOG(logDEBUG) << "FullIndexOperation: Starting the count...";
303 
304     emit indexingProgressed( 0 );
305 
306     // First empty the index
307     indexing_data_->clear();
308 
309     doIndex( indexing_data_, encoding_speculator_, 0 );
310 
311     LOG(logDEBUG) << "FullIndexOperation: ... finished counting."
312         "interrupt = " << *interruptRequest_;
313 
314     return ( *interruptRequest_ ? false : true );
315 }
316 
start()317 bool PartialIndexOperation::start()
318 {
319     LOG(logDEBUG) << "PartialIndexOperation::start(), file "
320         << fileName_.toStdString();
321 
322     qint64 initial_position = indexing_data_->getSize();
323 
324     LOG(logDEBUG) << "PartialIndexOperation: Starting the count at "
325         << initial_position << " ...";
326 
327     emit indexingProgressed( 0 );
328 
329     doIndex( indexing_data_, encoding_speculator_, initial_position );
330 
331     LOG(logDEBUG) << "PartialIndexOperation: ... finished counting.";
332 
333     return ( *interruptRequest_ ? false : true );
334 }
335