1 /* 2 * Copyright (C) 2015 Nicolas Bonnefon and other contributors 3 * 4 * This file is part of glogg. 5 * 6 * glogg is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * glogg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with glogg. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <cassert> 21 #include <cstdlib> 22 #include <arpa/inet.h> 23 24 #include <iostream> 25 26 #include "data/compressedlinestorage.h" 27 28 namespace { 29 // Functions to manipulate blocks 30 31 // Create a new 32 bits block of the passed size, 32 // initialised at the passed position 33 char* block32_new( int block_size, uint32_t initial_position, 34 char** block_ptr ) 35 { 36 // malloc a block of the maximum possible size 37 // (where every line is >16384) 38 char* ptr = static_cast<char*>( malloc( 4 + block_size * 6 ) ); 39 40 if ( ptr ) { 41 // Write the initial_position 42 *(reinterpret_cast<uint32_t*>(ptr)) = initial_position; 43 *block_ptr = ptr + 4; 44 } 45 46 return ptr; 47 } 48 49 // Add a one byte relative delta (0-127) and inc pointer 50 // First bit is always 0 51 void block_add_one_byte_relative( char** ptr, uint8_t value ) 52 { 53 **ptr = value; 54 *ptr += sizeof( value ); 55 } 56 57 // Add a two bytes relative delta (0-16383) and inc pointer 58 // First 2 bits are always 10 59 void block_add_two_bytes_relative( char** ptr, uint16_t value ) 60 { 61 // Stored in big endian format in order to recognise the initial pattern: 62 // 10xx xxxx xxxx xxxx 63 // HO byte | LO byte 64 *(reinterpret_cast<uint16_t*>(*ptr)) = htons( value | (1 << 15) ); 65 *ptr += sizeof( value ); 66 } 67 68 // Add a signal and a 4 bytes absolute position and inc pointer 69 void block32_add_absolute( char** ptr, uint32_t value ) 70 { 71 // 2 bytes marker (actually only the first two bits are tested) 72 *(reinterpret_cast<uint16_t*>(*ptr)) = 0xFF; 73 *ptr += sizeof( uint16_t ); 74 // Absolute value (machine endian) 75 *(reinterpret_cast<uint32_t*>(*ptr)) = value; 76 *ptr += sizeof( uint32_t ); 77 } 78 79 // Initialise the passed block for reading, returning 80 // the initial position and a pointer to the second entry. 81 uint64_t block32_initial_pos( char* block, char** ptr ) 82 { 83 *ptr = block + sizeof( uint32_t ); 84 return *(reinterpret_cast<uint32_t*>(block)); 85 } 86 87 // Give the next position in the block based on the previous 88 // position, then increase the pointer. 89 uint64_t block32_next_pos( char** ptr, uint64_t previous_pos ) 90 { 91 uint64_t pos = previous_pos; 92 93 uint8_t byte = **ptr; 94 ++(*ptr); 95 if ( ! ( byte & 0x80 ) ) { 96 // High order bit is 0 97 pos += byte; 98 } 99 else if ( ( byte & 0xC0 ) == 0x80 ) { 100 // We need to read the low order byte 101 uint8_t lo_byte = **ptr; 102 ++(*ptr); 103 // Remove the starting 10b 104 byte &= ~0xC0; 105 // And form the displacement (stored as big endian) 106 pos += ( (uint16_t) byte << 8 ) | (uint16_t) lo_byte; 107 } 108 else { 109 // Skip the next byte (not used) 110 ++(*ptr); 111 // And read the new absolute pos (machine endian) 112 pos = *(reinterpret_cast<uint32_t*>(*ptr)); 113 *ptr += sizeof( uint32_t ); 114 } 115 116 return pos; 117 } 118 } 119 120 // FIXME: painfully slow temporary copy constructor 121 CompressedLinePositionStorage::CompressedLinePositionStorage( 122 const CompressedLinePositionStorage& orig ) 123 { 124 this->append_list( orig ); 125 } 126 127 // FIXME too 128 CompressedLinePositionStorage& CompressedLinePositionStorage::operator=( 129 const CompressedLinePositionStorage& orig ) 130 { 131 nb_lines_ = 0; 132 first_long_line_ = UINT32_MAX; 133 current_pos_ = 0; 134 block_pointer_ = nullptr; 135 previous_block_pointer_ = nullptr; 136 137 for ( char* block : block32_index_ ) { 138 void* p = static_cast<void*>( block ); 139 // std::cerr << "block = " << p << std::endl; 140 free( p ); 141 } 142 143 block32_index_.clear(); 144 145 this->append_list( orig ); 146 147 return *this; 148 } 149 150 void CompressedLinePositionStorage::move_from( 151 CompressedLinePositionStorage&& orig ) 152 { 153 nb_lines_ = orig.nb_lines_; 154 first_long_line_ = orig.first_long_line_; 155 current_pos_ = orig.current_pos_; 156 block_pointer_ = orig.block_pointer_; 157 previous_block_pointer_ = orig.previous_block_pointer_; 158 159 orig.nb_lines_ = 0; 160 } 161 162 // Move constructor 163 CompressedLinePositionStorage::CompressedLinePositionStorage( 164 CompressedLinePositionStorage&& orig ) 165 : block32_index_( std::move( orig.block32_index_ ) ) 166 { 167 move_from( std::move( orig ) ); 168 } 169 170 // Move assignement 171 CompressedLinePositionStorage& CompressedLinePositionStorage::operator=( 172 CompressedLinePositionStorage&& orig ) 173 { 174 block32_index_ = std::move( orig.block32_index_ ); 175 move_from( std::move( orig ) ); 176 177 return *this; 178 } 179 180 CompressedLinePositionStorage::~CompressedLinePositionStorage() 181 { 182 for ( char* block : block32_index_ ) { 183 void* p = static_cast<void*>( block ); 184 // std::cerr << "block = " << p << std::endl; 185 free( p ); 186 } 187 } 188 189 // template<int BLOCK_SIZE> 190 void CompressedLinePositionStorage::append( uint64_t pos ) 191 { 192 // Save the pointer in case we need to "pop_back" 193 previous_block_pointer_ = block_pointer_; 194 195 if ( ! block_pointer_ ) { 196 // We need to start a new block 197 block32_index_.push_back( 198 block32_new( BLOCK_SIZE, pos, &block_pointer_ ) ); 199 } 200 else { 201 uint64_t delta = pos - current_pos_; 202 if ( delta < 128 ) { 203 // Code relative on one byte 204 block_add_one_byte_relative( &block_pointer_, delta ); 205 } 206 else if ( delta < 16384 ) { 207 // Code relative on two bytes 208 block_add_two_bytes_relative( &block_pointer_, delta ); 209 } 210 else { 211 // Code absolute 212 block32_add_absolute( &block_pointer_, pos ); 213 } 214 } 215 216 current_pos_ = pos; 217 ++nb_lines_; 218 219 if ( nb_lines_ % BLOCK_SIZE == 0 ) { 220 // We have finished the block 221 222 // Let's reduce its size to what is actually used 223 int block_index = nb_lines_ / BLOCK_SIZE - 1; 224 char* block = block32_index_[block_index]; 225 // We allocate extra space for the last element in case it 226 // is replaced by an absolute value in the future (following a pop_back) 227 size_t new_size = ( previous_block_pointer_ 228 + sizeof( uint16_t ) + sizeof( uint32_t ) ) - block; 229 void* new_location = realloc( block, new_size ); 230 if ( new_location ) 231 block32_index_[block_index] = static_cast<char*>( new_location ); 232 233 block_pointer_ = nullptr; 234 } 235 } 236 237 // template<int BLOCK_SIZE> 238 uint64_t CompressedLinePositionStorage::at( int index ) const 239 { 240 char* block = block32_index_[ index / BLOCK_SIZE ]; 241 char* ptr; 242 uint64_t position = block32_initial_pos( block, &ptr ); 243 244 for ( int i = 0; i < index % BLOCK_SIZE; i++ ) { 245 // Go through all the lines in the block till the one we want 246 position = block32_next_pos( &ptr, position ); 247 } 248 249 return position; 250 } 251 252 void CompressedLinePositionStorage::append_list( 253 const CompressedLinePositionStorage& other ) 254 { 255 // This is not very clever, but caching should make it 256 // reasonably fast. 257 for ( uint32_t i = 0; i < other.size(); i++ ) 258 append( other.at( i ) ); 259 } 260 261 // template<int BLOCK_SIZE> 262 void CompressedLinePositionStorage::pop_back() 263 { 264 // Removing the last entered data, there are two cases 265 if ( previous_block_pointer_ ) { 266 // The last append was a normal entry in an existing block, 267 // so we can just revert the pointer 268 block_pointer_ = previous_block_pointer_; 269 previous_block_pointer_ = nullptr; 270 } 271 else { 272 // A new block has been created for the last entry, we need 273 // to de-alloc it. 274 275 // If we try to pop_back() twice, we're dead! 276 assert( ( nb_lines_ - 1 ) % BLOCK_SIZE == 0 ); 277 278 char* block = block32_index_.back(); 279 block32_index_.pop_back(); 280 free( block ); 281 282 283 block_pointer_ = nullptr; 284 } 285 286 --nb_lines_; 287 current_pos_ = at( nb_lines_ - 1 ); 288 } 289