/*
* Copyright (C) 2015 Nicolas Bonnefon and other contributors
*
* This file is part of glogg.
*
* glogg is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* glogg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with glogg. If not, see .
*/
#include
#include
#include "utils.h"
#include "data/compressedlinestorage.h"
#ifdef HAVE_HTONS
# include
#else
# define htons(a) glogg_htons(a)
#endif
namespace {
// Functions to manipulate blocks
// Create a new 32 bits block of the passed size,
// initialised at the passed position
char* block32_new( int block_size, uint32_t initial_position,
char** block_ptr )
{
// malloc a block of the maximum possible size
// (where every line is >16384)
char* ptr = static_cast( malloc( 4 + block_size * 6 ) );
if ( ptr ) {
// Write the initial_position
*(reinterpret_cast(ptr)) = initial_position;
*block_ptr = ptr + 4;
}
return ptr;
}
// Create a new 64 bits block of the passed size,
// initialised at the passed position
char* block64_new( int block_size, uint64_t initial_position,
char** block_ptr )
{
// malloc a block of the maximum possible size
// (where every line is >16384)
char* ptr = static_cast( malloc( 8 + block_size * 10 ) );
if ( ptr ) {
// Write the initial_position
*(reinterpret_cast(ptr)) = initial_position;
*block_ptr = ptr + 8;
}
return ptr;
}
// Add a one byte relative delta (0-127) and inc pointer
// First bit is always 0
void block_add_one_byte_relative( char** ptr, uint8_t value )
{
**ptr = value;
*ptr += sizeof( value );
}
// Add a two bytes relative delta (0-16383) and inc pointer
// First 2 bits are always 10
void block_add_two_bytes_relative( char** ptr, uint16_t value )
{
// Stored in big endian format in order to recognise the initial pattern:
// 10xx xxxx xxxx xxxx
// HO byte | LO byte
*(reinterpret_cast(*ptr)) = htons( value | (1 << 15) );
*ptr += sizeof( value );
}
// Add a signal and a 4 bytes absolute position and inc pointer
void block32_add_absolute( char** ptr, uint32_t value )
{
// 2 bytes marker (actually only the first two bits are tested)
*(reinterpret_cast(*ptr)) = 0xFF;
*ptr += sizeof( uint16_t );
// Absolute value (machine endian)
*(reinterpret_cast(*ptr)) = value;
*ptr += sizeof( uint32_t );
}
// Initialise the passed block for reading, returning
// the initial position and a pointer to the second entry.
uint64_t block32_initial_pos( char* block, char** ptr )
{
*ptr = block + sizeof( uint32_t );
return *(reinterpret_cast(block));
}
// Give the next position in the block based on the previous
// position, then increase the pointer.
uint64_t block32_next_pos( char** ptr, uint64_t previous_pos )
{
uint64_t pos = previous_pos;
uint8_t byte = **ptr;
++(*ptr);
if ( ! ( byte & 0x80 ) ) {
// High order bit is 0
pos += byte;
}
else if ( ( byte & 0xC0 ) == 0x80 ) {
// We need to read the low order byte
uint8_t lo_byte = **ptr;
++(*ptr);
// Remove the starting 10b
byte &= ~0xC0;
// And form the displacement (stored as big endian)
pos += ( (uint16_t) byte << 8 ) | (uint16_t) lo_byte;
}
else {
// Skip the next byte (not used)
++(*ptr);
// And read the new absolute pos (machine endian)
pos = *(reinterpret_cast(*ptr));
*ptr += sizeof( uint32_t );
}
return pos;
}
// Add a signal and a 8 bytes absolute position and inc pointer
void block64_add_absolute( char** ptr, uint64_t value )
{
// This is unaligned, can cause problem on some CPUs
// 2 bytes marker (actually only the first two bits are tested)
*(reinterpret_cast(*ptr)) = 0xFF;
*ptr += sizeof( uint16_t );
// Absolute value (machine endian)
*(reinterpret_cast(*ptr)) = value;
*ptr += sizeof( uint64_t );
}
// Initialise the passed block for reading, returning
// the initial position and a pointer to the second entry.
uint64_t block64_initial_pos( char* block, char** ptr )
{
*ptr = block + sizeof( uint64_t );
return *(reinterpret_cast(block));
}
// Give the next position in the block based on the previous
// position, then increase the pointer.
uint64_t block64_next_pos( char** ptr, uint64_t previous_pos )
{
uint64_t pos = previous_pos;
uint8_t byte = **ptr;
++(*ptr);
if ( ! ( byte & 0x80 ) ) {
// High order bit is 0
pos += byte;
}
else if ( ( byte & 0xC0 ) == 0x80 ) {
// We need to read the low order byte
uint8_t lo_byte = **ptr;
++(*ptr);
// Remove the starting 10b
byte &= ~0xC0;
// And form the displacement (stored as big endian)
pos += ( (uint16_t) byte << 8 ) | (uint16_t) lo_byte;
}
else {
// Skip the next byte (not used)
++(*ptr);
// And read the new absolute pos (machine endian)
pos = *(reinterpret_cast(*ptr));
*ptr += sizeof( uint64_t );
}
return pos;
}
}
void CompressedLinePositionStorage::move_from(
CompressedLinePositionStorage&& orig )
{
nb_lines_ = orig.nb_lines_;
first_long_line_ = orig.first_long_line_;
current_pos_ = orig.current_pos_;
block_pointer_ = orig.block_pointer_;
previous_block_pointer_ = orig.previous_block_pointer_;
orig.nb_lines_ = 0;
}
// Move constructor
CompressedLinePositionStorage::CompressedLinePositionStorage(
CompressedLinePositionStorage&& orig )
: block32_index_( std::move( orig.block32_index_ ) ),
block64_index_( std::move( orig.block64_index_ ) )
{
move_from( std::move( orig ) );
}
void CompressedLinePositionStorage::free_blocks()
{
for ( char* block : block32_index_ ) {
void* p = static_cast( block );
free( p );
}
for ( char* block : block64_index_ ) {
void* p = static_cast( block );
free( p );
}
}
// Move assignement
CompressedLinePositionStorage& CompressedLinePositionStorage::operator=(
CompressedLinePositionStorage&& orig )
{
free_blocks();
block32_index_ = std::move( orig.block32_index_ );
block64_index_ = std::move( orig.block64_index_ );
move_from( std::move( orig ) );
return *this;
}
CompressedLinePositionStorage::~CompressedLinePositionStorage()
{
free_blocks();
}
// template
void CompressedLinePositionStorage::append( uint64_t pos )
{
// Lines must be stored in order
assert( ( pos > current_pos_ ) || ( pos == 0 ) );
// Save the pointer in case we need to "pop_back"
previous_block_pointer_ = block_pointer_;
bool store_in_big = false;
if ( pos > UINT32_MAX ) {
store_in_big = true;
if ( first_long_line_ == UINT32_MAX ) {
// First "big" end of line, we will start a new (64) block
first_long_line_ = nb_lines_;
block_pointer_ = nullptr;
}
}
if ( ! block_pointer_ ) {
// We need to start a new block
if ( ! store_in_big )
block32_index_.push_back(
block32_new( BLOCK_SIZE, pos, &block_pointer_ ) );
else
block64_index_.push_back(
block64_new( BLOCK_SIZE, pos, &block_pointer_ ) );
}
else {
uint64_t delta = pos - current_pos_;
if ( delta < 128 ) {
// Code relative on one byte
block_add_one_byte_relative( &block_pointer_, delta );
}
else if ( delta < 16384 ) {
// Code relative on two bytes
block_add_two_bytes_relative( &block_pointer_, delta );
}
else {
// Code absolute
if ( ! store_in_big )
block32_add_absolute( &block_pointer_, pos );
else
block64_add_absolute( &block_pointer_, pos );
}
}
current_pos_ = pos;
++nb_lines_;
if ( ! store_in_big ) {
if ( nb_lines_ % BLOCK_SIZE == 0 ) {
// We have finished the block
// Let's reduce its size to what is actually used
int block_index = nb_lines_ / BLOCK_SIZE - 1;
char* block = block32_index_[block_index];
// We allocate extra space for the last element in case it
// is replaced by an absolute value in the future (following a pop_back)
size_t new_size = ( previous_block_pointer_
+ sizeof( uint16_t ) + sizeof( uint32_t ) ) - block;
void* new_location = realloc( block, new_size );
if ( new_location )
block32_index_[block_index] = static_cast( new_location );
block_pointer_ = nullptr;
previous_block_pointer_ = static_cast( new_location ) + ( previous_block_pointer_ - block );
}
}
else {
if ( ( nb_lines_ - first_long_line_ ) % BLOCK_SIZE == 0 ) {
// We have finished the block
// Let's reduce its size to what is actually used
int block_index = ( nb_lines_ - first_long_line_ ) / BLOCK_SIZE - 1;
char* block = block64_index_[block_index];
// We allocate extra space for the last element in case it
// is replaced by an absolute value in the future (following a pop_back)
size_t new_size = ( previous_block_pointer_
+ sizeof( uint16_t ) + sizeof( uint64_t ) ) - block;
void* new_location = realloc( block, new_size );
if ( new_location )
block64_index_[block_index] = static_cast( new_location );
block_pointer_ = nullptr;
previous_block_pointer_ = static_cast( new_location ) + ( previous_block_pointer_ - block );
}
}
}
// template
uint64_t CompressedLinePositionStorage::at( uint32_t index ) const
{
char* ptr;
uint64_t position;
Cache* last_read = last_read_.getPtr();
if ( index < first_long_line_ ) {
if ( ( index == last_read->index + 1 ) && ( index % BLOCK_SIZE != 0 ) ) {
position = last_read->position;
ptr = last_read->ptr;
position = block32_next_pos( &ptr, position );
}
else {
char* block = block32_index_[ index / BLOCK_SIZE ];
position = block32_initial_pos( block, &ptr );
for ( uint32_t i = 0; i < index % BLOCK_SIZE; i++ ) {
// Go through all the lines in the block till the one we want
position = block32_next_pos( &ptr, position );
}
}
}
else {
const uint32_t index_in_64 = index - first_long_line_;
if ( ( index == last_read->index + 1 ) && ( index_in_64 % BLOCK_SIZE != 0 ) ) {
position = last_read->position;
ptr = last_read->ptr;
position = block64_next_pos( &ptr, position );
}
else {
char* block = block64_index_[ index_in_64 / BLOCK_SIZE ];
position = block64_initial_pos( block, &ptr );
for ( uint32_t i = 0; i < index_in_64 % BLOCK_SIZE; i++ ) {
// Go through all the lines in the block till the one we want
position = block64_next_pos( &ptr, position );
}
}
}
// Populate our cache ready for next consecutive read
last_read->index = index;
last_read->position = position;
last_read->ptr = ptr;
return position;
}
void CompressedLinePositionStorage::append_list(
const std::vector& positions )
{
// This is not very clever, but caching should make it
// reasonably fast.
for ( uint32_t i = 0; i < positions.size(); i++ )
append( positions.at( i ) );
}
// template
void CompressedLinePositionStorage::pop_back()
{
// Removing the last entered data, there are two cases
if ( previous_block_pointer_ ) {
// The last append was a normal entry in an existing block,
// so we can just revert the pointer
block_pointer_ = previous_block_pointer_;
previous_block_pointer_ = nullptr;
}
else {
// A new block has been created for the last entry, we need
// to de-alloc it.
if ( first_long_line_ == UINT32_MAX ) {
// If we try to pop_back() twice, we're dead!
assert( ( nb_lines_ - 1 ) % BLOCK_SIZE == 0 );
char* block = block32_index_.back();
block32_index_.pop_back();
free( block );
}
else {
// If we try to pop_back() twice, we're dead!
assert( ( nb_lines_ - first_long_line_ - 1 ) % BLOCK_SIZE == 0 );
char* block = block64_index_.back();
block64_index_.pop_back();
free( block );
}
block_pointer_ = nullptr;
}
--nb_lines_;
current_pos_ = at( nb_lines_ - 1 );
}