1 /* 2 * Copyright (C) 2015 Nicolas Bonnefon and other contributors 3 * 4 * This file is part of glogg. 5 * 6 * glogg is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * glogg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with glogg. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <cstdlib> 21 #include <arpa/inet.h> 22 23 #include <iostream> 24 25 #include "data/compressedlinestorage.h" 26 27 namespace { 28 // Functions to manipulate blocks 29 30 // Create a new 32 bits block of the passed size, 31 // initialised at the passed position 32 char* block32_new( int block_size, uint32_t initial_position, 33 char** block_ptr ) 34 { 35 // malloc a block of the maximum possible size 36 // (where every line is >16384) 37 char* ptr = static_cast<char*>( malloc( 4 + block_size * 6 ) ); 38 39 if ( ptr ) { 40 // Write the initial_position 41 *(reinterpret_cast<uint32_t*>(ptr)) = initial_position; 42 *block_ptr = ptr + 4; 43 } 44 45 return ptr; 46 } 47 48 // Add a one byte relative delta (0-127) and inc pointer 49 // First bit is always 0 50 void block_add_one_byte_relative( char** ptr, uint8_t value ) 51 { 52 **ptr = value; 53 *ptr += sizeof( value ); 54 } 55 56 // Add a two bytes relative delta (0-16383) and inc pointer 57 // First 2 bits are always 10 58 void block_add_two_bytes_relative( char** ptr, uint16_t value ) 59 { 60 // Stored in big endian format in order to recognise the initial pattern: 61 // 10xx xxxx xxxx xxxx 62 // HO byte | LO byte 63 *(reinterpret_cast<uint16_t*>(*ptr)) = htons( value | (1 << 15) ); 64 *ptr += sizeof( value ); 65 } 66 67 // Add a signal and a 4 bytes absolute position and inc pointer 68 void block32_add_absolute( char** ptr, uint32_t value ) 69 { 70 // 2 bytes marker (actually only the first two bits are tested) 71 *(reinterpret_cast<uint16_t*>(*ptr)) = 0xFF; 72 *ptr += sizeof( uint16_t ); 73 // Absolute value (machine endian) 74 *(reinterpret_cast<uint32_t*>(*ptr)) = value; 75 *ptr += sizeof( uint32_t ); 76 } 77 78 // Initialise the passed block for reading, returning 79 // the initial position and a pointer to the second entry. 80 uint64_t block32_initial_pos( char* block, char** ptr ) 81 { 82 *ptr = block + sizeof( uint32_t ); 83 return *(reinterpret_cast<uint32_t*>(block)); 84 } 85 86 // Give the next position in the block based on the previous 87 // position, then increase the pointer. 88 uint64_t block32_next_pos( char** ptr, uint64_t previous_pos ) 89 { 90 uint64_t pos = previous_pos; 91 92 uint8_t byte = **ptr; 93 ++(*ptr); 94 if ( ! ( byte & 0x80 ) ) { 95 // High order bit is 0 96 pos += byte; 97 } 98 else if ( ( byte & 0xC0 ) == 0x80 ) { 99 // We need to read the low order byte 100 uint8_t lo_byte = **ptr; 101 ++(*ptr); 102 // Remove the starting 10b 103 byte &= ~0xC0; 104 // And form the displacement (stored as big endian) 105 pos += ( (uint16_t) byte << 8 ) | (uint16_t) lo_byte; 106 } 107 else { 108 // Skip the next byte (not used) 109 ++(*ptr); 110 // And read the new absolute pos (machine endian) 111 pos = *(reinterpret_cast<uint32_t*>(*ptr)); 112 *ptr += sizeof( uint32_t ); 113 } 114 115 return pos; 116 } 117 } 118 119 // template<int BLOCK_SIZE> 120 void CompressedLinePositionStorage::append( uint64_t pos ) 121 { 122 if ( ! block_pointer_ ) { 123 // We need to start a new block 124 block32_index_.push_back( 125 block32_new( BLOCK_SIZE, pos, &block_pointer_ ) ); 126 } 127 else { 128 uint64_t delta = pos - current_pos_; 129 if ( delta < 128 ) { 130 // Code relative on one byte 131 block_add_one_byte_relative( &block_pointer_, delta ); 132 } 133 else if ( delta < 16384 ) { 134 // Code relative on two bytes 135 block_add_two_bytes_relative( &block_pointer_, delta ); 136 } 137 else { 138 // Code absolute 139 block32_add_absolute( &block_pointer_, pos ); 140 } 141 } 142 143 current_pos_ = pos; 144 ++nb_lines_; 145 146 if ( nb_lines_ % BLOCK_SIZE == 0 ) { 147 // We have finished the block 148 block_pointer_ = nullptr; 149 } 150 } 151 152 // template<int BLOCK_SIZE> 153 uint64_t CompressedLinePositionStorage::at( int index ) const 154 { 155 char* block = block32_index_[ index / BLOCK_SIZE ]; 156 char* ptr; 157 uint64_t position = block32_initial_pos( block, &ptr ); 158 159 for ( int i = 0; i < index % BLOCK_SIZE; i++ ) { 160 // Go through all the lines in the block till the one we want 161 position = block32_next_pos( &ptr, position ); 162 } 163 164 return position; 165 } 166 167 void CompressedLinePositionStorage::append_list( 168 const CompressedLinePositionStorage& other ) 169 { 170 // This is not very clever, but caching should make it 171 // reasonably fast. 172 for ( uint32_t i = 0; i < other.size(); i++ ) 173 append( other.at( i ) ); 174 } 175 176 // template<int BLOCK_SIZE> 177 void CompressedLinePositionStorage::pop_back() 178 { 179 } 180