sqlite: UTF-8 fix for older Windows
This commit is contained in:
parent
67325a144e
commit
e44dd98dbf
|
@ -123,6 +123,163 @@ int sqlite3_os_type = 0;
|
||||||
}
|
}
|
||||||
#endif /* OS_WINCE */
|
#endif /* OS_WINCE */
|
||||||
|
|
||||||
|
/*** UTF16<-->UTF8 functions minicking MultiByteToWideChar/WideCharToMultiByte ***/
|
||||||
|
int utf8GetMaskIndex(unsigned char n) {
|
||||||
|
if((unsigned char)(n + 2) < 0xc2) return 1; // 00~10111111, fe, ff
|
||||||
|
if(n < 0xe0) return 2; // 110xxxxx
|
||||||
|
if(n < 0xf0) return 3; // 1110xxxx
|
||||||
|
if(n < 0xf8) return 4; // 11110xxx
|
||||||
|
if(n < 0xfc) return 5; // 111110xx
|
||||||
|
return 6; // 1111110x
|
||||||
|
}
|
||||||
|
|
||||||
|
int wc2Utf8Len(wchar_t ** n, int *len) {
|
||||||
|
wchar_t *ch = *n, ch2;
|
||||||
|
int qch;
|
||||||
|
if((0xD800 <= *ch && *ch <= 0xDBFF) && *len) {
|
||||||
|
ch2 = *(ch + 1);
|
||||||
|
if(0xDC00 <= ch2 && ch2 <= 0xDFFF) {
|
||||||
|
qch = 0x10000 + (((*ch - 0xD800) & 0x3ff) << 10) + ((ch2 - 0xDC00) & 0x3ff);
|
||||||
|
(*n)++;
|
||||||
|
(*len)--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
qch = (int) *ch;
|
||||||
|
|
||||||
|
if (qch <= 0x7f) return 1;
|
||||||
|
else if (qch <= 0x7ff) return 2;
|
||||||
|
else if (qch <= 0xffff) return 3;
|
||||||
|
else if (qch <= 0x1fffff) return 4;
|
||||||
|
else if (qch <= 0x3ffffff) return 5;
|
||||||
|
else return 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Utf8ToWideChar(unsigned int unused1, unsigned long unused2, char *sb, int ss, wchar_t * wb, int ws) {
|
||||||
|
static const unsigned char utf8mask[] = { 0, 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
|
||||||
|
char *p = (char *)(sb);
|
||||||
|
char *e = (char *)(sb + ss);
|
||||||
|
wchar_t *w = wb;
|
||||||
|
int cnt = 0, t, qch;
|
||||||
|
|
||||||
|
if (ss < 1) {
|
||||||
|
ss = lstrlenA(sb);
|
||||||
|
e = (char *)(sb + ss);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wb && ws) {
|
||||||
|
for (; p < e; ++w) {
|
||||||
|
t = utf8GetMaskIndex(*p);
|
||||||
|
qch = (*p++ & utf8mask[t]);
|
||||||
|
while(p < e && --t)
|
||||||
|
qch <<= 6, qch |= (*p++) & 0x3f;
|
||||||
|
if(qch < 0x10000) {
|
||||||
|
if(cnt <= ws)
|
||||||
|
*w = (wchar_t) qch;
|
||||||
|
cnt++;
|
||||||
|
} else {
|
||||||
|
if (cnt + 2 <= ws) {
|
||||||
|
*w++ = (wchar_t) (0xD800 + (((qch - 0x10000) >> 10) & 0x3ff)),
|
||||||
|
*w = (wchar_t) (0xDC00 + (((qch - 0x10000)) & 0x3ff));
|
||||||
|
}
|
||||||
|
cnt += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(cnt < ws) {
|
||||||
|
*(wb+cnt) = 0;
|
||||||
|
return cnt;
|
||||||
|
} else {
|
||||||
|
*(wb+ws) = 0;
|
||||||
|
return ws;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (t; p < e;) {
|
||||||
|
t = utf8GetMaskIndex(*p);
|
||||||
|
qch = (*p++ & utf8mask[t]);
|
||||||
|
while (p < e && --t)
|
||||||
|
qch <<= 6, qch |= (*p++) & 0x3f;
|
||||||
|
if (qch < 0x10000)
|
||||||
|
cnt++;
|
||||||
|
else
|
||||||
|
cnt += 2;
|
||||||
|
}
|
||||||
|
return cnt+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int WideCharToUtf8(unsigned int unused1, unsigned long unused2, wchar_t * wb, int ws, char *sb, int ss) {
|
||||||
|
wchar_t *p = (wchar_t *)(wb);
|
||||||
|
wchar_t *e = (wchar_t *)(wb + ws);
|
||||||
|
wchar_t *oldp;
|
||||||
|
char *s = sb;
|
||||||
|
int cnt = 0, qch, t;
|
||||||
|
|
||||||
|
if (ws < 1) {
|
||||||
|
ws = lstrlenW(wb);
|
||||||
|
e = (wchar_t *)(wb + ws);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb && ss) {
|
||||||
|
for (t; p < e; ++p) {
|
||||||
|
oldp = p;
|
||||||
|
t = wc2Utf8Len(&p, &ws);
|
||||||
|
|
||||||
|
if (p != oldp) { /* unicode surrogates encountered */
|
||||||
|
qch = 0x10000 + (((*oldp - 0xD800) & 0x3ff) << 10) + ((*p - 0xDC00) & 0x3ff);
|
||||||
|
} else
|
||||||
|
qch = *p;
|
||||||
|
|
||||||
|
if (qch <= 0x7f)
|
||||||
|
*s++ = (char) (qch),
|
||||||
|
cnt++;
|
||||||
|
else if (qch <= 0x7ff)
|
||||||
|
*s++ = 0xc0 | (char) (qch >> 6),
|
||||||
|
*s++ = 0x80 | (char) (qch & 0x3f),
|
||||||
|
cnt += 2;
|
||||||
|
else if (qch <= 0xffff)
|
||||||
|
*s++ = 0xe0 | (char) (qch >> 12),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 6) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) (qch & 0x3f),
|
||||||
|
cnt += 3;
|
||||||
|
else if (qch <= 0x1fffff)
|
||||||
|
*s++ = 0xf0 | (char) (qch >> 18),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 12) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 6) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) (qch & 0x3f),
|
||||||
|
cnt += 4;
|
||||||
|
else if (qch <= 0x3ffffff)
|
||||||
|
*s++ = 0xf8 | (char) (qch >> 24),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 18) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 12) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 6) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) (qch & 0x3f),
|
||||||
|
cnt += 5;
|
||||||
|
else
|
||||||
|
*s++ = 0xfc | (char) (qch >> 30),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 24) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 18) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 12) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) ((qch >> 6) & 0x3f),
|
||||||
|
*s++ = 0x80 | (char) (qch & 0x3f),
|
||||||
|
cnt += 6;
|
||||||
|
}
|
||||||
|
if(cnt < ss) {
|
||||||
|
*(sb+cnt) = 0;
|
||||||
|
return cnt;
|
||||||
|
} else {
|
||||||
|
*(sb+ss) = 0;
|
||||||
|
return ss;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (t; p < e; ++p) {
|
||||||
|
t = wc2Utf8Len(&p, &ws);
|
||||||
|
cnt += t;
|
||||||
|
}
|
||||||
|
return cnt+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*** Ends ***/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Convert a UTF-8 string to UTF-32. Space to hold the returned string
|
** Convert a UTF-8 string to UTF-32. Space to hold the returned string
|
||||||
** is obtained from sqliteMalloc.
|
** is obtained from sqliteMalloc.
|
||||||
|
@ -132,20 +289,12 @@ static WCHAR *utf8ToUnicode(const char *zFilename){
|
||||||
int nCharcpy;
|
int nCharcpy;
|
||||||
WCHAR *zWideFilename;
|
WCHAR *zWideFilename;
|
||||||
|
|
||||||
nCharcpy = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);
|
nCharcpy = Utf8ToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);
|
||||||
if( zFilename!=0 && nCharcpy == 0) {
|
|
||||||
/* UTF8 conversion failed. This happens on NT 3.51 and 95. Make do with ACP conversion instead. */
|
|
||||||
nCharcpy = MultiByteToWideChar(CP_ACP, 0, zFilename, -1, NULL, 0);
|
|
||||||
}
|
|
||||||
zWideFilename = sqliteMalloc( nCharcpy*sizeof(zWideFilename[0]) );
|
zWideFilename = sqliteMalloc( nCharcpy*sizeof(zWideFilename[0]) );
|
||||||
if( zWideFilename==0 ){
|
if( zWideFilename==0 ){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nCharcpy);
|
nChar = Utf8ToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nCharcpy);
|
||||||
if( zFilename!=0 && nChar == 0) {
|
|
||||||
/* UTF8 conversion failed. This happens on NT 3.51 and 95. Make do with ACP conversion instead. */
|
|
||||||
nChar = MultiByteToWideChar(CP_ACP, 0, zFilename, -1, zWideFilename, nCharcpy);
|
|
||||||
}
|
|
||||||
|
|
||||||
if( nChar==0 ){
|
if( nChar==0 ){
|
||||||
sqliteFree(zWideFilename);
|
sqliteFree(zWideFilename);
|
||||||
|
@ -163,22 +312,13 @@ static char *unicodeToUtf8(const WCHAR *zWideFilename){
|
||||||
int nBytecpy;
|
int nBytecpy;
|
||||||
char *zFilename;
|
char *zFilename;
|
||||||
|
|
||||||
nBytecpy = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);
|
nBytecpy = WideCharToUtf8(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);
|
||||||
if( zWideFilename!=0 && nBytecpy == 0) {
|
|
||||||
/* UTF8 conversion failed. This happens on NT 3.51 and 95. Make do with ACP conversion instead. */
|
|
||||||
nBytecpy = WideCharToMultiByte(CP_ACP, 0, zWideFilename, -1, 0, 0, 0, 0);
|
|
||||||
}
|
|
||||||
zFilename = sqliteMalloc( nBytecpy );
|
zFilename = sqliteMalloc( nBytecpy );
|
||||||
if( zFilename==0 ){
|
if( zFilename==0 ){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nBytecpy,
|
nByte = WideCharToUtf8(CP_UTF8, 0, zWideFilename, -1, zFilename, nBytecpy,
|
||||||
0, 0);
|
0, 0);
|
||||||
if( zWideFilename!=0 && nByte == 0) {
|
|
||||||
/* UTF8 conversion failed. This happens on NT 3.51 and 95. Make do with ACP conversion instead. */
|
|
||||||
nByte = WideCharToMultiByte(CP_ACP, 0, zWideFilename, -1, zFilename, nBytecpy,
|
|
||||||
0, 0);
|
|
||||||
}
|
|
||||||
if( nByte == 0 ){
|
if( nByte == 0 ){
|
||||||
sqliteFree(zFilename);
|
sqliteFree(zFilename);
|
||||||
zFilename = 0;
|
zFilename = 0;
|
||||||
|
|
Loading…
Reference in New Issue