Uploaded image for project: 'Qt'
  1. Qt
  2. QTBUG-10919

Unable to insert unicode chars with codepoint > 255 in nvarchar2 column on oracle

    XMLWordPrintable

Details

    • Bug
    • Resolution: Done
    • P2: Important
    • 4.7.0
    • 4.6.2
    • SQL Support
    • f8f255553f3640355d3e196e100778256741701c

    Description

      I am unable to insert or retrieve unicode chars with codepoint > 255 in nvarchar2 columents on oracle 10.2, using QSql.

      I believe this may be because of some combination of qt and the servers NLS settings.

      The settings on the server/database is like this:
      "Parameter" "Value"
      "NLS_CALENDAR" "GREGORIAN"
      "NLS_CHARACTERSET" "WE8ISO8859P1"
      "NLS_COMP" "BINARY"
      "NLS_CURRENCY" "kr"
      "NLS_DATE_FORMAT" "RR-MM-DD"
      "NLS_DATE_LANGUAGE" "DANISH"
      "NLS_DUAL_CURRENCY" "¿"
      "NLS_ISO_CURRENCY" "DENMARK"
      "NLS_LANGUAGE" "DANISH"
      "NLS_LENGTH_SEMANTICS" "BYTE"
      "NLS_NCHAR_CHARACTERSET" "AL16UTF16"
      "NLS_NCHAR_CONV_EXCP" "FALSE"
      "NLS_NUMERIC_CHARACTERS" ",."
      "NLS_SORT" "DANISH"
      "NLS_TERRITORY" "DENMARK"
      "NLS_TIME_FORMAT" "HH24:MI:SSXFF"
      "NLS_TIMESTAMP_FORMAT" "RR-MM-DD HH24:MI:SSXFF"
      "NLS_TIMESTAMP_TZ_FORMAT" "RR-MM-DD HH24:MI:SSXFF TZR"
      "NLS_TIME_TZ_FORMAT" "HH24:MI:SSXFF TZR"

      Note, that the NLS_CHARACTERSET is more or less Latin1, while the NLS_NCHAR_CHARACTERSET is more or less utf16.

      I run an example program like this:

      int die( const QString & desc, const QSqlQuery & q ) {
      cerr << "Error: " << desc << endl;
      cerr << "Error: " << q.lastError().text() << endl;
      cerr << "Error: " << dest_sql_db.lastError().text() << endl;
      exit( 1 );
      }

      int main( int argc, char ** argv ) {

      ////////////////////////////////////////////////////////////
      // Need to set up Qt first - among other things to get the encoding right.
      QCoreApplication app( argc, argv );

      QSqlDatabase db = QSqlDatabase::addDatabase( QString( "QOCI" ), "db" );
      db.setDatabaseName( server );
      db.setUserName( username );
      db.setPassword( password );
      QSqlQuery query( db );

      db.open() || die( "An error occured while connecting to the database", query );

      query.prepare( "create table utf8_tst(col1 nvarchar2(1))" );
      query.exec() || die( "Unable to create table utf8_tst", query );

      QString A = "A";
      QString ligaturae = QString::fromUtf8( QByteArray( "\x0c3\x0a6" ) );
      QString bullseye = QString::fromUtf8( QByteArray( "\x0E2\x097\x08E" ) );

      cout << "If you have an utf8 terminal, you should see ascii A, ligatur ae, and a bullseye symbol" << endl;
      cout << "A: '" << A << "', ligatur ae: '" << ligaturae << "', bullseye: '" << bullseye << "'" << endl;

      query.prepare( "Insert into utf8_tst values ( :mychar )" );
      query.bindValue( ":mychar", A );
      query.exec() || die ( "Failed to insert value A", query );

      query.prepare( "Insert into utf8_tst values ( :mychar )" );
      query.bindValue( ":mychar", ligaturae );
      query.exec() || die ( "Failed to insert value ligaturae", query );

      query.prepare( "Insert into utf8_tst values ( :mychar )" );
      query.bindValue( ":mychar", bullseye );
      query.exec() || die ( "Failed to insert value bullseye", query );

      cout << "About to select back" << endl;
      query.prepare ( "select col1, vsize(col1), dump(col1, 1010) Decimal_bytes, dump(col1, 1016) Hex_Bytes from utf8_tst" );
      // query.prepare ( "select col1 from utf8_tst" );
      query.exec() || die( "Unable to select", query );
      query.first() || die( "No records found???", query );
      do

      { cout << "col1: '" << query.value(0).toString().toUtf8().constData() << "'" << ", size: " << query.value(1).toInt() << ", dump dec: '" << query.value(2).toString().toUtf8().constData() << "'" << ", dump hex: '" << query.value(3).toString().toUtf8().constData() << "'" << endl; }

      while (query.next());

      db.close();

      return 42;
      }

      and, the output is like this:

      -------------------
      If you have an utf8 terminal, you should see ascii A, ligatur ae, and a bullseye symbol
      A: 'A', ligatur ae: 'æ', bullseye: '◎'
      About to select back
      col1: 'A', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,65', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,41'
      col1: 'æ', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,230', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,e6'
      col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,191', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,bf'
      -------------------

      The value of the third row should have been 25,ce hexidecimal, I believe.

      If I try to insert the values directly from e.g. sqldeveloper:

      create table utf8_tst(col1 nvarchar2(1));

      – Euro is U+20AC
      insert into utf8_tst values (unistr('\20AC'));
      – Small Greek Gamma U+03B3
      insert into utf8_tst values (unistr('\03B3'));
      – Ascii A
      insert into utf8_tst values (unistr('A'));
      – Ligatur ae
      insert into utf8_tst values (unistr('\00E6'));
      – Bulls eye
      insert into utf8_tst values (unistr('\25CE'));

      I can select them:

      select col1, vsize(col1), dump(col1, 1010) Decimal_bytes, dump(col1, 1016) Hex_Bytes from utf8_tst;

      And, will get this (in sqldeveloper):

      ---------------------
      "COL1" "VSIZE(COL1)" "DECIMAL_BYTES" "HEX_BYTES"
      "æ" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 0,230""Typ=1 Len=2 CharacterSet=AL16UTF16: 0,e6"
      "◎" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 37,206""Typ=1 Len=2 CharacterSet=AL16UTF16: 25,ce"
      "€" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 32,172""Typ=1 Len=2 CharacterSet=AL16UTF16: 20,ac"
      "γ" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 3,179""Typ=1 Len=2 CharacterSet=AL16UTF16: 3,b3"
      "A" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 0,65""Typ=1 Len=2 CharacterSet=AL16UTF16: 0,41"
      ---------------------

      Keeping these values in the database, and running the test program above, modified to just dump the data, I get this:

      ---------------------
      About to select back
      col1: 'æ', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,230', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,e6'
      col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 37,206', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 25,ce'
      col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 32,172', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 20,ac'
      col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 3,179', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 3,b3'
      col1: 'A', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,65', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,41'
      ---------------------

      I am speculating that Qt and oracle (wrongly) decides to encode my strings for nvarchar2 columns using the charset in NLS_CHARACTERSET, instead of the one in NLS_NCHAR_CHARACTERSET?

      Very much looking forward to your response.

      Attachments

        No reviews matched the request. Check your Options in the drop-down menu of this sections header.

        Activity

          People

            charles Charles Yin (closed Nokia identity) (Inactive)
            mbd@dbc.dk Mads Bondo Dydensborg
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Gerrit Reviews

                There are no open Gerrit changes