blob: a852a39b497b604088eb7a45980e97de8761f60b [file] [log] [blame]
/*
**********************************************************************
* Copyright (c) 2006-2007, Google and others. All Rights Reserved.
**********************************************************************
* Author: Mark Davis
**********************************************************************
*/
package org.unicode.cldr.util;
import java.io.IOException;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ICUUncheckedIOException;
/**
* @author markdavis
*/
// TODO optimize this
public class Utf8StringByteConverter extends StringByteConverter {
char lead = 0;
@Override
public int toBytes(char ch, byte[] output, int bytePosition) {
// we may have state, if we were processing a supplemental char
if (lead != 0) {
if (UTF16.isTrailSurrogate(ch)) {
int cp = UCharacter.getCodePoint(lead, ch);
output[bytePosition++] = (byte) (0xF0 | (cp >>> 18));
output[bytePosition++] = (byte) (0x80 | ((cp >>> 12) & 0x3F));
output[bytePosition++] = (byte) (0x80 | ((cp >>> 6) & 0x3F));
output[bytePosition++] = (byte) (0x80 | (cp & 0x3F));
lead = 0;
return bytePosition;
}
// write lead
output[bytePosition++] = (byte) (0xE0 | (lead >>> 12));
output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F));
output[bytePosition++] = (byte) (0x80 | (lead & 0x3F));
lead = 0;
}
if (ch < 0x80) {
output[bytePosition++] = (byte) ch;
} else if (ch < 0x800) {
output[bytePosition++] = (byte) (0xC0 | (ch >>> 6));
output[bytePosition++] = (byte) (0x80 | (ch & 0x3F));
} else if (ch >= 0xD800 && ch < 0xDC00) {
lead = ch;
} else {
output[bytePosition++] = (byte) (0xE0 | (ch >>> 12));
output[bytePosition++] = (byte) (0x80 | ((ch >>> 6) & 0x3F));
output[bytePosition++] = (byte) (0x80 | (ch & 0x3F));
}
return bytePosition;
}
@Override
public int toBytes(byte[] output, int bytePosition) {
if (lead != 0) {
output[bytePosition++] = (byte) (0xE0 | (lead >>> 12));
output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F));
output[bytePosition++] = (byte) (0x80 | (lead & 0x3F));
lead = 0;
}
return bytePosition;
}
@Override
public int getMaxBytesPerChar() {
return 4;
}
@Override
public Appendable fromBytes(byte[] input, int byteStart, int byteLength,
Appendable result) {
try {
while (byteStart < byteLength) {
char b = (char) (input[byteStart++] & 0xFF);
if (b < 0x80) {
// fall through
} else if (b < 0xE0) {
b &= 0x1F;
b <<= 6;
b |= (char) (input[byteStart++] & 0x3F);
} else if (b < 0xF0) {
b &= 0xF;
b <<= 6;
b |= (char) (input[byteStart++] & 0x3F);
b <<= 6;
b |= (char) (input[byteStart++] & 0x3F);
} else {
// surrogate
int cp = (b & 0x7) << 6;
cp |= (char) (input[byteStart++] & 0x3F);
cp <<= 6;
cp |= (char) (input[byteStart++] & 0x3F);
cp <<= 6;
cp |= (char) (input[byteStart++] & 0x3F);
result.append(UTF16.getLeadSurrogate(cp));
b = UTF16.getTrailSurrogate(cp);
}
result.append(b);
}
return result;
} catch (IOException e) {
throw new ICUUncheckedIOException("Internal error", e);
}
}
}