blob: e9b3691174e435adec08731b09d307c788c20237 [file] [log] [blame]
/****************************************************************************
**
** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
** All rights reserved.
** Contact: Nokia Corporation (qt-info@nokia.com)
**
** This file is part of the plugins of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** GNU Lesser General Public License Usage
** This file may be used under the terms of the GNU Lesser General Public
** License version 2.1 as published by the Free Software Foundation and
** appearing in the file LICENSE.LGPL included in the packaging of this
** file. Please review the following information to ensure the GNU Lesser
** General Public License version 2.1 requirements will be met:
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU General
** Public License version 3.0 as published by the Free Software Foundation
** and appearing in the file LICENSE.GPL included in the packaging of this
** file. Please review the following information to ensure the GNU General
** Public License version 3.0 requirements will be met:
** http://www.gnu.org/copyleft/gpl.html.
**
** Other Usage
** Alternatively, this file may be used in accordance with the terms and
** conditions contained in a signed written agreement between you and Nokia.
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
// Most of the code here was originally written by Serika Kurusugawa,
// a.k.a. Junji Takagi, and is included in Qt with the author's permission
// and the grateful thanks of the Qt team.
/*! \class QJisCodec
\reentrant
\internal
*/
#include "qjiscodec.h"
#include "qlist.h"
QT_BEGIN_NAMESPACE
#ifndef QT_NO_TEXTCODEC
enum {
Esc = 0x1b,
So = 0x0e, // Shift Out
Si = 0x0f, // Shift In
ReverseSolidus = 0x5c,
YenSign = 0x5c,
Tilde = 0x7e,
Overline = 0x7e
};
#define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
#define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e))
#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
enum Iso2022State{ Ascii, MinState = Ascii,
JISX0201_Latin, JISX0201_Kana,
JISX0208_1978, JISX0208_1983,
JISX0212, MaxState = JISX0212,
UnknownState };
static const char Esc_CHARS[] = "()*+-./";
static const char Esc_Ascii[] = {Esc, '(', 'B', 0 };
static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 };
static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 };
static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 };
static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 };
static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 };
static const char * const Esc_SEQ[] = { Esc_Ascii,
Esc_JISX0201_Latin,
Esc_JISX0201_Kana,
Esc_JISX0208_1978,
Esc_JISX0208_1983,
Esc_JISX0212 };
/*! \internal */
QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
{
}
/*! \internal */
QJisCodec::~QJisCodec()
{
delete (QJpUnicodeConv*)conv;
conv = 0;
}
QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
{
char replacement = '?';
if (cs) {
if (cs->flags & ConvertInvalidToNull)
replacement = 0;
}
int invalid = 0;
QByteArray result;
Iso2022State state = Ascii;
Iso2022State prev = Ascii;
for (int i = 0; i < len; i++) {
QChar ch = uc[i];
uint j;
if (ch.row() == 0x00 && ch.cell() < 0x80) {
// Ascii
if (state != JISX0201_Latin ||
ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
state = Ascii;
}
j = ch.cell();
} else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
if (j < 0x80) {
// JIS X 0201 Latin
if (state != Ascii ||
ch.cell() == YenSign || ch.cell() == Overline) {
state = JISX0201_Latin;
}
} else {
// JIS X 0201 Kana
state = JISX0201_Kana;
j &= 0x7f;
}
} else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
// JIS X 0208
state = JISX0208_1983;
} else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
// JIS X 0212
state = JISX0212;
} else {
// Invalid
state = UnknownState;
j = replacement;
++invalid;
}
if (state != prev) {
if (state == UnknownState) {
result += Esc_Ascii;
} else {
result += Esc_SEQ[state - MinState];
}
prev = state;
}
if (j < 0x0100) {
result += j & 0xff;
} else {
result += (j >> 8) & 0xff;
result += j & 0xff;
}
}
if (prev != Ascii) {
result += Esc_Ascii;
}
if (cs) {
cs->invalidChars += invalid;
}
return result;
}
QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
{
uchar buf[4] = {0, 0, 0, 0};
int nbuf = 0;
Iso2022State state = Ascii, prev = Ascii;
bool esc = false;
QChar replacement = QChar::ReplacementCharacter;
if (cs) {
if (cs->flags & ConvertInvalidToNull)
replacement = QChar::Null;
nbuf = cs->remainingChars;
buf[0] = (cs->state_data[0] >> 24) & 0xff;
buf[1] = (cs->state_data[0] >> 16) & 0xff;
buf[2] = (cs->state_data[0] >> 8) & 0xff;
buf[3] = (cs->state_data[0] >> 0) & 0xff;
state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff);
prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff);
esc = cs->state_data[2];
}
int invalid = 0;
QString result;
for (int i=0; i<len; i++) {
uchar ch = chars[i];
if (esc) {
// Escape sequence
state = UnknownState;
switch (nbuf) {
case 0:
if (ch == '$' || strchr(Esc_CHARS, ch)) {
buf[nbuf++] = ch;
} else {
nbuf = 0;
esc = false;
}
break;
case 1:
if (buf[0] == '$') {
if (strchr(Esc_CHARS, ch)) {
buf[nbuf++] = ch;
} else {
switch (ch) {
case '@':
state = JISX0208_1978; // Esc $ @
break;
case 'B':
state = JISX0208_1983; // Esc $ B
break;
}
nbuf = 0;
esc = false;
}
} else {
if (buf[0] == '(') {
switch (ch) {
case 'B':
state = Ascii; // Esc (B
break;
case 'I':
state = JISX0201_Kana; // Esc (I
break;
case 'J':
state = JISX0201_Latin; // Esc (J
break;
}
}
nbuf = 0;
esc = false;
}
break;
case 2:
if (buf[1] == '(') {
switch (ch) {
case 'D':
state = JISX0212; // Esc $ (D
break;
}
}
nbuf = 0;
esc = false;
break;
}
} else {
if (ch == Esc) {
// Escape sequence
nbuf = 0;
esc = true;
} else if (ch == So) {
// Shift out
prev = state;
state = JISX0201_Kana;
nbuf = 0;
} else if (ch == Si) {
// Shift in
if (prev == Ascii || prev == JISX0201_Latin) {
state = prev;
} else {
state = Ascii;
}
nbuf = 0;
} else {
uint u;
switch (nbuf) {
case 0:
switch (state) {
case Ascii:
if (ch < 0x80) {
result += QLatin1Char(ch);
break;
}
/* fall through */
case JISX0201_Latin:
u = conv->jisx0201ToUnicode(ch);
result += QValidChar(u);
break;
case JISX0201_Kana:
u = conv->jisx0201ToUnicode(ch | 0x80);
result += QValidChar(u);
break;
case JISX0208_1978:
case JISX0208_1983:
case JISX0212:
buf[nbuf++] = ch;
break;
default:
result += QChar::ReplacementCharacter;
break;
}
break;
case 1:
switch (state) {
case JISX0208_1978:
case JISX0208_1983:
u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
result += QValidChar(u);
break;
case JISX0212:
u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
result += QValidChar(u);
break;
default:
result += replacement;
++invalid;
break;
}
nbuf = 0;
break;
}
}
}
}
if (cs) {
cs->remainingChars = nbuf;
cs->invalidChars += invalid;
cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
cs->state_data[1] = (prev << 8) + state;
cs->state_data[2] = esc;
}
return result;
}
/*! \internal */
int QJisCodec::_mibEnum()
{
return 39;
}
/*! \internal */
QByteArray QJisCodec::_name()
{
return "ISO-2022-JP";
}
/*!
Returns the codec's mime name.
*/
QList<QByteArray> QJisCodec::_aliases()
{
QList<QByteArray> list;
list << "JIS7"; // Qt 3 compat
return list;
}
#endif // QT_NO_TEXTCODEC
QT_END_NAMESPACE