#include const QByteArray BOM_UTF16_LE = "\xff\xfe"; const QByteArray BOM_UTF16_BE = "\xfe\xff"; const QByteArray BOM_UTF8 = "\xef\xbb\xbf"; int main(int argc, char *argv[]) { QCoreApplication a(argc, argv); QString sFileName = "test_utf16_le.txt"; QStringConverter::Encoding eEncoding; QStringDecoder m_decoder; QFile file(sFileName); if (file.open(QIODevice::ReadOnly)) { QByteArray baLine = file.readLine(); if (baLine.startsWith(BOM_UTF8)) eEncoding = QStringConverter::Utf8; else if (baLine.startsWith(BOM_UTF16_LE)) eEncoding = QStringConverter::Utf16LE; else if (baLine.startsWith(BOM_UTF16_BE)) eEncoding = QStringConverter::Utf16BE; else eEncoding = QStringConverter::System; m_decoder = QStringDecoder(eEncoding); file.reset(); } // BUG INFORMATION: //-------------------------------------------------------------------------- // if the encoding of the text file is Utf-16 LE (Little Endian), line breaks // are handled incorrectly. // // In Utf16-LE, linefeeds are encoded as "0x0a 0x00", but "readLine" seems to cut off the second byte. // So the line ends with "0x0a" and the next starts with "0x00". Besides, none of them are valid UTF-16 characters. // See also the HINTs below. bool bEof = file.atEnd(); int nCounter = 0; while (!bEof) { nCounter++; // read the next line // ==> the decoder automatically skips BOMs QByteArray baLine = file.readLine(); // HINT: // - each row ends with a half line feed character '0x0a', the number of read bytes is odd. // ==> BUG: this is invalid for UTF-16 // - starting with line 2, each line starts with a leading 0-byte. --> INCORRECT! QString sLine = m_decoder.decode(baLine); // HINT: // - trailing line feeds are cut off. --> INCORRECT! // - starting with line 2, each line starts with a leading line feed. --> INCORRECT! bEof = file.atEnd(); } return a.exec(); }