1 /* 2 * Copyright (C) 2015 Nicolas Bonnefon and other contributors 3 * 4 * This file is part of glogg. 5 * 6 * glogg is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * glogg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with glogg. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <cassert> 21 #include <cstdlib> 22 #include <arpa/inet.h> 23 24 #include "data/compressedlinestorage.h" 25 26 namespace { 27 // Functions to manipulate blocks 28 29 // Create a new 32 bits block of the passed size, 30 // initialised at the passed position 31 char* block32_new( int block_size, uint32_t initial_position, 32 char** block_ptr ) 33 { 34 // malloc a block of the maximum possible size 35 // (where every line is >16384) 36 char* ptr = static_cast<char*>( malloc( 4 + block_size * 6 ) ); 37 38 if ( ptr ) { 39 // Write the initial_position 40 *(reinterpret_cast<uint32_t*>(ptr)) = initial_position; 41 *block_ptr = ptr + 4; 42 } 43 44 return ptr; 45 } 46 47 // Add a one byte relative delta (0-127) and inc pointer 48 // First bit is always 0 49 void block_add_one_byte_relative( char** ptr, uint8_t value ) 50 { 51 **ptr = value; 52 *ptr += sizeof( value ); 53 } 54 55 // Add a two bytes relative delta (0-16383) and inc pointer 56 // First 2 bits are always 10 57 void block_add_two_bytes_relative( char** ptr, uint16_t value ) 58 { 59 // Stored in big endian format in order to recognise the initial pattern: 60 // 10xx xxxx xxxx xxxx 61 // HO byte | LO byte 62 *(reinterpret_cast<uint16_t*>(*ptr)) = htons( value | (1 << 15) ); 63 *ptr += sizeof( value ); 64 } 65 66 // Add a signal and a 4 bytes absolute position and inc pointer 67 void block32_add_absolute( char** ptr, uint32_t value ) 68 { 69 // 2 bytes marker (actually only the first two bits are tested) 70 *(reinterpret_cast<uint16_t*>(*ptr)) = 0xFF; 71 *ptr += sizeof( uint16_t ); 72 // Absolute value (machine endian) 73 *(reinterpret_cast<uint32_t*>(*ptr)) = value; 74 *ptr += sizeof( uint32_t ); 75 } 76 77 // Initialise the passed block for reading, returning 78 // the initial position and a pointer to the second entry. 79 uint64_t block32_initial_pos( char* block, char** ptr ) 80 { 81 *ptr = block + sizeof( uint32_t ); 82 return *(reinterpret_cast<uint32_t*>(block)); 83 } 84 85 // Give the next position in the block based on the previous 86 // position, then increase the pointer. 87 uint64_t block32_next_pos( char** ptr, uint64_t previous_pos ) 88 { 89 uint64_t pos = previous_pos; 90 91 uint8_t byte = **ptr; 92 ++(*ptr); 93 if ( ! ( byte & 0x80 ) ) { 94 // High order bit is 0 95 pos += byte; 96 } 97 else if ( ( byte & 0xC0 ) == 0x80 ) { 98 // We need to read the low order byte 99 uint8_t lo_byte = **ptr; 100 ++(*ptr); 101 // Remove the starting 10b 102 byte &= ~0xC0; 103 // And form the displacement (stored as big endian) 104 pos += ( (uint16_t) byte << 8 ) | (uint16_t) lo_byte; 105 } 106 else { 107 // Skip the next byte (not used) 108 ++(*ptr); 109 // And read the new absolute pos (machine endian) 110 pos = *(reinterpret_cast<uint32_t*>(*ptr)); 111 *ptr += sizeof( uint32_t ); 112 } 113 114 return pos; 115 } 116 } 117 118 void CompressedLinePositionStorage::move_from( 119 CompressedLinePositionStorage&& orig ) 120 { 121 nb_lines_ = orig.nb_lines_; 122 first_long_line_ = orig.first_long_line_; 123 current_pos_ = orig.current_pos_; 124 block_pointer_ = orig.block_pointer_; 125 previous_block_pointer_ = orig.previous_block_pointer_; 126 127 orig.nb_lines_ = 0; 128 } 129 130 // Move constructor 131 CompressedLinePositionStorage::CompressedLinePositionStorage( 132 CompressedLinePositionStorage&& orig ) 133 : block32_index_( std::move( orig.block32_index_ ) ) 134 { 135 move_from( std::move( orig ) ); 136 } 137 138 // Move assignement 139 CompressedLinePositionStorage& CompressedLinePositionStorage::operator=( 140 CompressedLinePositionStorage&& orig ) 141 { 142 block32_index_ = std::move( orig.block32_index_ ); 143 move_from( std::move( orig ) ); 144 145 return *this; 146 } 147 148 CompressedLinePositionStorage::~CompressedLinePositionStorage() 149 { 150 for ( char* block : block32_index_ ) { 151 void* p = static_cast<void*>( block ); 152 free( p ); 153 } 154 } 155 156 // template<int BLOCK_SIZE> 157 void CompressedLinePositionStorage::append( uint64_t pos ) 158 { 159 // Save the pointer in case we need to "pop_back" 160 previous_block_pointer_ = block_pointer_; 161 162 if ( ! block_pointer_ ) { 163 // We need to start a new block 164 block32_index_.push_back( 165 block32_new( BLOCK_SIZE, pos, &block_pointer_ ) ); 166 } 167 else { 168 uint64_t delta = pos - current_pos_; 169 if ( delta < 128 ) { 170 // Code relative on one byte 171 block_add_one_byte_relative( &block_pointer_, delta ); 172 } 173 else if ( delta < 16384 ) { 174 // Code relative on two bytes 175 block_add_two_bytes_relative( &block_pointer_, delta ); 176 } 177 else { 178 // Code absolute 179 block32_add_absolute( &block_pointer_, pos ); 180 } 181 } 182 183 current_pos_ = pos; 184 ++nb_lines_; 185 186 if ( nb_lines_ % BLOCK_SIZE == 0 ) { 187 // We have finished the block 188 block_pointer_ = nullptr; 189 } 190 } 191 192 // template<int BLOCK_SIZE> 193 uint64_t CompressedLinePositionStorage::at( int index ) const 194 { 195 char* block = block32_index_[ index / BLOCK_SIZE ]; 196 char* ptr; 197 uint64_t position = block32_initial_pos( block, &ptr ); 198 199 for ( int i = 0; i < index % BLOCK_SIZE; i++ ) { 200 // Go through all the lines in the block till the one we want 201 position = block32_next_pos( &ptr, position ); 202 } 203 204 return position; 205 } 206 207 void CompressedLinePositionStorage::append_list( 208 const CompressedLinePositionStorage& other ) 209 { 210 // This is not very clever, but caching should make it 211 // reasonably fast. 212 for ( uint32_t i = 0; i < other.size(); i++ ) 213 append( other.at( i ) ); 214 } 215 216 // template<int BLOCK_SIZE> 217 void CompressedLinePositionStorage::pop_back() 218 { 219 // Removing the last entered data, there are two cases 220 if ( previous_block_pointer_ ) { 221 // The last append was a normal entry in an existing block, 222 // so we can just revert the pointer 223 block_pointer_ = previous_block_pointer_; 224 previous_block_pointer_ = nullptr; 225 } 226 else { 227 // A new block has been created for the last entry, we need 228 // to de-alloc it. 229 230 // If we try to pop_back() twice, we're dead! 231 assert( ( nb_lines_ - 1 ) % BLOCK_SIZE == 0 ); 232 233 char* block = block32_index_.back(); 234 block32_index_.pop_back(); 235 free( block ); 236 237 238 block_pointer_ = nullptr; 239 } 240 241 --nb_lines_; 242 current_pos_ = at( nb_lines_ - 1 ); 243 } 244