blob: ae66d14f4416dcf4db82db18de1a2d68a9232689 [file] [log] [blame]
// Common/StringConvert.cpp
#include "StdAfx.h"
#include "StringConvert.h"
#ifndef _WIN32
#include <stdlib.h>
#endif
static const char k_DefultChar = '_';
#ifdef _WIN32
/*
MultiByteToWideChar(CodePage, DWORD dwFlags,
LPCSTR lpMultiByteStr, int cbMultiByte,
LPWSTR lpWideCharStr, int cchWideChar)
if (cbMultiByte == 0)
return: 0. ERR: ERROR_INVALID_PARAMETER
if (cchWideChar == 0)
return: the required buffer size in characters.
if (supplied buffer size was not large enough)
return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
If there are illegal characters:
if MB_ERR_INVALID_CHARS is set in dwFlags:
- the function stops conversion on illegal character.
- Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
in Vista+: illegal character is not dropped (MSDN). Undocumented: illegal
character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
*/
void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
{
dest.Empty();
if (src.IsEmpty())
return;
{
/*
wchar_t *d = dest.GetBuf(src.Len());
const char *s = (const char *)src;
unsigned i;
for (i = 0;;)
{
Byte c = (Byte)s[i];
if (c >= 0x80 || c == 0)
break;
d[i++] = (wchar_t)c;
}
if (i != src.Len())
{
unsigned len = MultiByteToWideChar(codePage, 0, s + i,
src.Len() - i, d + i,
src.Len() + 1 - i);
if (len == 0)
throw 282228;
i += len;
}
d[i] = 0;
dest.ReleaseBuf_SetLen(i);
*/
unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
if (len == 0)
{
if (GetLastError() != 0)
throw 282228;
}
else
{
len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
if (len == 0)
throw 282228;
dest.ReleaseBuf_SetEnd(len);
}
}
}
/*
int WideCharToMultiByte(
UINT CodePage, DWORD dwFlags,
LPCWSTR lpWideCharStr, int cchWideChar,
LPSTR lpMultiByteStr, int cbMultiByte,
LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
if (lpDefaultChar == NULL),
- it uses system default value.
if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
return: 0. ERR: ERROR_INVALID_PARAMETER.
The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
*/
static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
{
dest.Empty();
defaultCharWasUsed = false;
if (src.IsEmpty())
return;
{
/*
unsigned numRequiredBytes = src.Len() * 2;
char *d = dest.GetBuf(numRequiredBytes);
const wchar_t *s = (const wchar_t *)src;
unsigned i;
for (i = 0;;)
{
wchar_t c = s[i];
if (c >= 0x80 || c == 0)
break;
d[i++] = (char)c;
}
if (i != src.Len())
{
BOOL defUsed = FALSE;
defaultChar = defaultChar;
bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
d + i, numRequiredBytes + 1 - i,
(isUtf ? NULL : &defaultChar),
(isUtf ? NULL : &defUsed));
defaultCharWasUsed = (defUsed != FALSE);
if (len == 0)
throw 282229;
i += len;
}
d[i] = 0;
dest.ReleaseBuf_SetLen(i);
*/
/*
if (codePage != CP_UTF7)
{
const wchar_t *s = (const wchar_t *)src;
unsigned i;
for (i = 0;; i++)
{
wchar_t c = s[i];
if (c >= 0x80 || c == 0)
break;
}
if (s[i] == 0)
{
char *d = dest.GetBuf(src.Len());
for (i = 0;;)
{
wchar_t c = s[i];
if (c == 0)
break;
d[i++] = (char)c;
}
d[i] = 0;
dest.ReleaseBuf_SetLen(i);
return;
}
}
*/
unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
if (len == 0)
{
if (GetLastError() != 0)
throw 282228;
}
else
{
BOOL defUsed = FALSE;
bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
// defaultChar = defaultChar;
len = WideCharToMultiByte(codePage, 0, src, src.Len(),
dest.GetBuf(len), len,
(isUtf ? NULL : &defaultChar),
(isUtf ? NULL : &defUsed)
);
if (!isUtf)
defaultCharWasUsed = (defUsed != FALSE);
if (len == 0)
throw 282228;
dest.ReleaseBuf_SetEnd(len);
}
}
}
/*
#ifndef UNDER_CE
AString SystemStringToOemString(const CSysString &src)
{
AString dest;
const unsigned len = src.Len() * 2;
CharToOem(src, dest.GetBuf(len));
dest.ReleaseBuf_CalcLen(len);
return dest;
}
#endif
*/
#else
void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
{
dest.Empty();
if (src.IsEmpty())
return;
size_t limit = ((size_t)src.Len() + 1) * 2;
wchar_t *d = dest.GetBuf((unsigned)limit);
size_t len = mbstowcs(d, src, limit);
if (len != (size_t)-1)
{
dest.ReleaseBuf_SetEnd((unsigned)len);
return;
}
{
unsigned i;
const char *s = (const char *)src;
for (i = 0;;)
{
Byte c = (Byte)s[i];
if (c == 0)
break;
d[i++] = (wchar_t)c;
}
d[i] = 0;
dest.ReleaseBuf_SetLen(i);
}
}
static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
{
dest.Empty();
defaultCharWasUsed = false;
if (src.IsEmpty())
return;
size_t limit = ((size_t)src.Len() + 1) * 6;
char *d = dest.GetBuf((unsigned)limit);
size_t len = wcstombs(d, src, limit);
if (len != (size_t)-1)
{
dest.ReleaseBuf_SetEnd((unsigned)len);
return;
}
{
const wchar_t *s = (const wchar_t *)src;
unsigned i;
for (i = 0;;)
{
wchar_t c = s[i];
if (c == 0)
break;
if (c >= 0x100)
{
c = defaultChar;
defaultCharWasUsed = true;
}
d[i++] = (char)c;
}
d[i] = 0;
dest.ReleaseBuf_SetLen(i);
}
}
#endif
UString MultiByteToUnicodeString(const AString &src, UINT codePage)
{
UString dest;
MultiByteToUnicodeString2(dest, src, codePage);
return dest;
}
void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
{
bool defaultCharWasUsed;
UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
}
AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
{
AString dest;
UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
return dest;
}
AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
{
AString dest;
bool defaultCharWasUsed;
UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
return dest;
}