1 /* 2 * Copyright (C) 2016 Nicolas Bonnefon and other contributors 3 * 4 * This file is part of glogg. 5 * 6 * glogg is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * glogg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with glogg. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #ifndef ENCODINGSPECULATOR_H 21 #define ENCODINGSPECULATOR_H 22 23 #include <cstdint> 24 25 // The encoder speculator tries to determine the likely encoding 26 // of the stream of bytes which is passed to it. 27 28 class EncodingSpeculator { 29 public: 30 enum class Encoding { 31 ASCII7, 32 ASCII8, 33 UTF8, 34 UTF16LE, 35 UTF16BE 36 }; 37 38 EncodingSpeculator() : state_( State::Start ) {} 39 40 // Inject one byte into the speculator 41 void inject_byte( uint8_t byte ); 42 43 // Returns the current guess based on the previously injected bytes 44 Encoding guess() const; 45 46 private: 47 enum class State { 48 Start, 49 ASCIIOnly, 50 Unknown8Bit, 51 UTF8LeadingByteSeen, 52 ValidUTF8, 53 UTF16BELeadingBOMByteSeen, 54 UTF16LELeadingBOMByteSeen, 55 ValidUTF16LE, 56 ValidUTF16BE, 57 }; 58 59 State state_; 60 uint32_t code_point_; 61 int continuation_left_; 62 uint32_t min_value_; 63 }; 64 65 #endif 66