1 /* 2 * Copyright (C) 2016 Nicolas Bonnefon and other contributors 3 * 4 * This file is part of glogg. 5 * 6 * glogg is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * glogg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with glogg. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #ifndef ENCODINGSPECULATOR_H 21 #define ENCODINGSPECULATOR_H 22 23 #include <cstdint> 24 25 // The encoder speculator tries to determine the likely encoding 26 // of the stream of bytes which is passed to it. 27 28 class EncodingSpeculator { 29 public: 30 enum class Encoding { 31 ASCII7, 32 ASCII8, 33 UTF8, 34 UTF16LE, 35 UTF16BE, 36 BIG5, 37 GB18030, 38 SHIFT_JIS, 39 KOI8R 40 }; 41 42 EncodingSpeculator() : state_( State::Start ) {} 43 44 // Inject one byte into the speculator 45 void inject_byte( uint8_t byte ); 46 47 // Returns the current guess based on the previously injected bytes 48 Encoding guess() const; 49 50 private: 51 enum class State { 52 Start, 53 ASCIIOnly, 54 OtherOrUnknown8Bit, 55 UTF8LeadingByteSeen, 56 ValidUTF8, 57 UTF16BELeadingBOMByteSeen, 58 UTF16LELeadingBOMByteSeen, 59 ValidUTF16LE, 60 ValidUTF16BE, 61 }; 62 63 State state_; 64 uint32_t code_point_; 65 int continuation_left_; 66 uint32_t min_value_; 67 }; 68 69 #endif 70