1 /* 2 * Copyright (C) 2016 Nicolas Bonnefon and other contributors 3 * 4 * This file is part of glogg. 5 * 6 * glogg is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * glogg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with glogg. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #ifndef ENCODINGSPECULATOR_H 21 #define ENCODINGSPECULATOR_H 22 23 #include <cstdint> 24 25 // The encoder speculator tries to determine the likely encoding 26 // of the stream of bytes which is passed to it. 27 28 class EncodingSpeculator { 29 public: 30 enum class Encoding { 31 ASCII7, 32 ASCII8, 33 UTF8 34 }; 35 36 EncodingSpeculator() : state_( State::ASCIIOnly ) {} 37 38 // Inject one byte into the speculator 39 void inject_byte( uint8_t byte ); 40 41 // Returns the current guess based on the previously injected bytes 42 Encoding guess() const; 43 44 private: 45 enum class State { 46 ASCIIOnly, 47 Unknown8Bit, 48 UTF8LeadingByteSeen, 49 ValidUTF8, 50 }; 51 52 State state_; 53 uint32_t code_point_; 54 int continuation_left_; 55 uint32_t min_value_; 56 }; 57 58 #endif 59