xref: /glogg/src/data/compressedlinestorage.cpp (revision 7a2f3a57e884000357471fe3462e6bb0262e96af) !
1 /*
2  * Copyright (C) 2015 Nicolas Bonnefon and other contributors
3  *
4  * This file is part of glogg.
5  *
6  * glogg is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * glogg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with glogg.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <cstdlib>
21 #include <arpa/inet.h>
22 
23 #include <iostream>
24 
25 #include "data/compressedlinestorage.h"
26 
27 namespace {
28     // Functions to manipulate blocks
29 
30     // Create a new 32 bits block of the passed size,
31     // initialised at the passed position
32     char* block32_new( int block_size, uint32_t initial_position,
33            char** block_ptr )
34     {
35         // malloc a block of the maximum possible size
36         // (where every line is >16384)
37         char* ptr = static_cast<char*>( malloc( 4 + block_size * 6 ) );
38 
39         if ( ptr ) {
40             // Write the initial_position
41             *(reinterpret_cast<uint32_t*>(ptr)) = initial_position;
42             *block_ptr = ptr + 4;
43         }
44 
45         return ptr;
46     }
47 
48     // Add a one byte relative delta (0-127) and inc pointer
49     // First bit is always 0
50     void block_add_one_byte_relative( char** ptr, uint8_t value )
51     {
52         **ptr = value;
53         *ptr += sizeof( value );
54     }
55 
56     // Add a two bytes relative delta (0-16383) and inc pointer
57     // First 2 bits are always 10
58     void block_add_two_bytes_relative( char** ptr, uint16_t value )
59     {
60         // Stored in big endian format in order to recognise the initial pattern:
61         // 10xx xxxx xxxx xxxx
62         //  HO byte | LO byte
63         *(reinterpret_cast<uint16_t*>(*ptr)) = htons( value | (1 << 15) );
64         *ptr += sizeof( value );
65     }
66 
67     // Add a signal and a 4 bytes absolute position and inc pointer
68     void block32_add_absolute( char** ptr, uint32_t value )
69     {
70         // 2 bytes marker (actually only the first two bits are tested)
71         *(reinterpret_cast<uint16_t*>(*ptr)) = 0xFF;
72         *ptr += sizeof( uint16_t );
73         // Absolute value (machine endian)
74         *(reinterpret_cast<uint32_t*>(*ptr)) = value;
75         *ptr += sizeof( uint32_t );
76     }
77 
78     // Initialise the passed block for reading, returning
79     // the initial position and a pointer to the second entry.
80     uint64_t block32_initial_pos( char* block, char** ptr )
81     {
82         *ptr = block + sizeof( uint32_t );
83         return *(reinterpret_cast<uint32_t*>(block));
84     }
85 
86     // Give the next position in the block based on the previous
87     // position, then increase the pointer.
88     uint64_t block32_next_pos( char** ptr, uint64_t previous_pos )
89     {
90         uint64_t pos = previous_pos;
91 
92         uint8_t byte = **ptr;
93         ++(*ptr);
94         if ( ! ( byte & 0x80 ) ) {
95             // High order bit is 0
96             pos += byte;
97         }
98         else if ( ( byte & 0xC0 ) == 0x80 ) {
99             // We need to read the low order byte
100             uint8_t lo_byte = **ptr;
101             ++(*ptr);
102             // Remove the starting 10b
103             byte &= ~0xC0;
104             // And form the displacement (stored as big endian)
105             pos += ( (uint16_t) byte << 8 ) | (uint16_t) lo_byte;
106         }
107         else {
108             // Skip the next byte (not used)
109             ++(*ptr);
110             // And read the new absolute pos (machine endian)
111             pos = *(reinterpret_cast<uint32_t*>(*ptr));
112             *ptr += sizeof( uint32_t );
113         }
114 
115         return pos;
116     }
117 }
118 
119 // template<int BLOCK_SIZE>
120 void CompressedLinePositionStorage::append( uint64_t pos )
121 {
122     if ( ! block_pointer_ ) {
123         // We need to start a new block
124         block32_index_.push_back(
125             block32_new( BLOCK_SIZE, pos, &block_pointer_ ) );
126     }
127     else {
128         uint64_t delta = pos - current_pos_;
129         if ( delta < 128 ) {
130             // Code relative on one byte
131             block_add_one_byte_relative( &block_pointer_, delta );
132         }
133         else if ( delta < 16384 ) {
134             // Code relative on two bytes
135             block_add_two_bytes_relative( &block_pointer_, delta );
136         }
137         else {
138             // Code absolute
139             block32_add_absolute( &block_pointer_, pos );
140         }
141     }
142 
143     current_pos_ = pos;
144     ++nb_lines_;
145 
146     if ( nb_lines_ % BLOCK_SIZE == 0 ) {
147         // We have finished the block
148         block_pointer_ = nullptr;
149     }
150 }
151 
152 // template<int BLOCK_SIZE>
153 uint64_t CompressedLinePositionStorage::at( int index ) const
154 {
155     char* block = block32_index_[ index / BLOCK_SIZE ];
156     char* ptr;
157     uint64_t position = block32_initial_pos( block, &ptr );
158 
159     for ( int i = 0; i < index % BLOCK_SIZE; i++ ) {
160         // Go through all the lines in the block till the one we want
161         position = block32_next_pos( &ptr, position );
162     }
163 
164     return position;
165 }
166 
167 void CompressedLinePositionStorage::append_list(
168         const CompressedLinePositionStorage& other )
169 {
170     // This is not very clever, but caching should make it
171     // reasonably fast.
172     for ( uint32_t i = 0; i < other.size(); i++ )
173         append( other.at( i ) );
174 }
175 
176 // template<int BLOCK_SIZE>
177 void CompressedLinePositionStorage::pop_back()
178 {
179 }
180