Details
-
Bug
-
Resolution: Done
-
P2: Important
-
4.6.2
-
Linux Debian 5.0.4
Linux 2.6.28.6-dbc-k8 #1 SMP Thu Feb 19 00:03:29 CET 2009 x86_64 GNU/Linux
Oracle:
"Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bi"
"PL/SQL Release 10.2.0.4.0 - Production"
"CORE 10.2.0.4.0 Production"
"TNS for Linux: Version 10.2.0.4.0 - Production"
"NLSRTL Version 10.2.0.4.0 - Production"
Linux Debian 5.0.4 Linux 2.6.28.6-dbc-k8 #1 SMP Thu Feb 19 00:03:29 CET 2009 x86_64 GNU/Linux Oracle: "Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bi" "PL/SQL Release 10.2.0.4.0 - Production" "CORE 10.2.0.4.0 Production" "TNS for Linux: Version 10.2.0.4.0 - Production" "NLSRTL Version 10.2.0.4.0 - Production"
-
f8f255553f3640355d3e196e100778256741701c
Description
I am unable to insert or retrieve unicode chars with codepoint > 255 in nvarchar2 columents on oracle 10.2, using QSql.
I believe this may be because of some combination of qt and the servers NLS settings.
The settings on the server/database is like this:
"Parameter" "Value"
"NLS_CALENDAR" "GREGORIAN"
"NLS_CHARACTERSET" "WE8ISO8859P1"
"NLS_COMP" "BINARY"
"NLS_CURRENCY" "kr"
"NLS_DATE_FORMAT" "RR-MM-DD"
"NLS_DATE_LANGUAGE" "DANISH"
"NLS_DUAL_CURRENCY" "¿"
"NLS_ISO_CURRENCY" "DENMARK"
"NLS_LANGUAGE" "DANISH"
"NLS_LENGTH_SEMANTICS" "BYTE"
"NLS_NCHAR_CHARACTERSET" "AL16UTF16"
"NLS_NCHAR_CONV_EXCP" "FALSE"
"NLS_NUMERIC_CHARACTERS" ",."
"NLS_SORT" "DANISH"
"NLS_TERRITORY" "DENMARK"
"NLS_TIME_FORMAT" "HH24:MI:SSXFF"
"NLS_TIMESTAMP_FORMAT" "RR-MM-DD HH24:MI:SSXFF"
"NLS_TIMESTAMP_TZ_FORMAT" "RR-MM-DD HH24:MI:SSXFF TZR"
"NLS_TIME_TZ_FORMAT" "HH24:MI:SSXFF TZR"
Note, that the NLS_CHARACTERSET is more or less Latin1, while the NLS_NCHAR_CHARACTERSET is more or less utf16.
I run an example program like this:
int die( const QString & desc, const QSqlQuery & q ) {
cerr << "Error: " << desc << endl;
cerr << "Error: " << q.lastError().text() << endl;
cerr << "Error: " << dest_sql_db.lastError().text() << endl;
exit( 1 );
}
int main( int argc, char ** argv ) {
////////////////////////////////////////////////////////////
// Need to set up Qt first - among other things to get the encoding right.
QCoreApplication app( argc, argv );
QSqlDatabase db = QSqlDatabase::addDatabase( QString( "QOCI" ), "db" );
db.setDatabaseName( server );
db.setUserName( username );
db.setPassword( password );
QSqlQuery query( db );
db.open() || die( "An error occured while connecting to the database", query );
query.prepare( "create table utf8_tst(col1 nvarchar2(1))" );
query.exec() || die( "Unable to create table utf8_tst", query );
QString A = "A";
QString ligaturae = QString::fromUtf8( QByteArray( "\x0c3\x0a6" ) );
QString bullseye = QString::fromUtf8( QByteArray( "\x0E2\x097\x08E" ) );
cout << "If you have an utf8 terminal, you should see ascii A, ligatur ae, and a bullseye symbol" << endl;
cout << "A: '" << A << "', ligatur ae: '" << ligaturae << "', bullseye: '" << bullseye << "'" << endl;
query.prepare( "Insert into utf8_tst values ( :mychar )" );
query.bindValue( ":mychar", A );
query.exec() || die ( "Failed to insert value A", query );
query.prepare( "Insert into utf8_tst values ( :mychar )" );
query.bindValue( ":mychar", ligaturae );
query.exec() || die ( "Failed to insert value ligaturae", query );
query.prepare( "Insert into utf8_tst values ( :mychar )" );
query.bindValue( ":mychar", bullseye );
query.exec() || die ( "Failed to insert value bullseye", query );
cout << "About to select back" << endl;
query.prepare ( "select col1, vsize(col1), dump(col1, 1010) Decimal_bytes, dump(col1, 1016) Hex_Bytes from utf8_tst" );
// query.prepare ( "select col1 from utf8_tst" );
query.exec() || die( "Unable to select", query );
query.first() || die( "No records found???", query );
do
while (query.next());
db.close();
return 42;
}
and, the output is like this:
-------------------
If you have an utf8 terminal, you should see ascii A, ligatur ae, and a bullseye symbol
A: 'A', ligatur ae: 'æ', bullseye: '◎'
About to select back
col1: 'A', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,65', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,41'
col1: 'æ', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,230', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,e6'
col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,191', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,bf'
-------------------
The value of the third row should have been 25,ce hexidecimal, I believe.
If I try to insert the values directly from e.g. sqldeveloper:
create table utf8_tst(col1 nvarchar2(1));
– Euro is U+20AC
insert into utf8_tst values (unistr('\20AC'));
– Small Greek Gamma U+03B3
insert into utf8_tst values (unistr('\03B3'));
– Ascii A
insert into utf8_tst values (unistr('A'));
– Ligatur ae
insert into utf8_tst values (unistr('\00E6'));
– Bulls eye
insert into utf8_tst values (unistr('\25CE'));
I can select them:
select col1, vsize(col1), dump(col1, 1010) Decimal_bytes, dump(col1, 1016) Hex_Bytes from utf8_tst;
And, will get this (in sqldeveloper):
---------------------
"COL1" "VSIZE(COL1)" "DECIMAL_BYTES" "HEX_BYTES"
"æ" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 0,230""Typ=1 Len=2 CharacterSet=AL16UTF16: 0,e6"
"◎" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 37,206""Typ=1 Len=2 CharacterSet=AL16UTF16: 25,ce"
"€" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 32,172""Typ=1 Len=2 CharacterSet=AL16UTF16: 20,ac"
"γ" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 3,179""Typ=1 Len=2 CharacterSet=AL16UTF16: 3,b3"
"A" "2" "Typ=1 Len=2 CharacterSet=AL16UTF16: 0,65""Typ=1 Len=2 CharacterSet=AL16UTF16: 0,41"
---------------------
Keeping these values in the database, and running the test program above, modified to just dump the data, I get this:
---------------------
About to select back
col1: 'æ', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,230', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,e6'
col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 37,206', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 25,ce'
col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 32,172', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 20,ac'
col1: '¿', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 3,179', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 3,b3'
col1: 'A', size: 2, dump dec: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,65', dump hex: 'Typ=1 Len=2 CharacterSet=AL16UTF16: 0,41'
---------------------
I am speculating that Qt and oracle (wrongly) decides to encode my strings for nvarchar2 columns using the charset in NLS_CHARACTERSET, instead of the one in NLS_NCHAR_CHARACTERSET?
Very much looking forward to your response.