tools/java/org/unicode/cldr/util/Utf8StringByteConverter.java - platform/external/cldr - Git at Google

 /*
  **********************************************************************
  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
  **********************************************************************
  * Author: Mark Davis
  **********************************************************************
  */
 package org.unicode.cldr.util;

 import java.io.IOException;

 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.util.ICUUncheckedIOException;

 /**
  * @author markdavis
  */
 // TODO optimize this
 public class Utf8StringByteConverter extends StringByteConverter {
     char lead = 0;

     @Override
     public int toBytes(char ch, byte[] output, int bytePosition) {
         // we may have state, if we were processing a supplemental char
         if (lead != 0) {
             if (UTF16.isTrailSurrogate(ch)) {
                 int cp = UCharacter.getCodePoint(lead, ch);
                 output[bytePosition++] = (byte) (0xF0 | (cp >>> 18));
                 output[bytePosition++] = (byte) (0x80 | ((cp >>> 12) & 0x3F));
                 output[bytePosition++] = (byte) (0x80 | ((cp >>> 6) & 0x3F));
                 output[bytePosition++] = (byte) (0x80 | (cp & 0x3F));
                 lead = 0;
                 return bytePosition;
             }
             // write lead
             output[bytePosition++] = (byte) (0xE0 | (lead >>> 12));
             output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F));
             output[bytePosition++] = (byte) (0x80 | (lead & 0x3F));
             lead = 0;
         }
         if (ch < 0x80) {
             output[bytePosition++] = (byte) ch;
         } else if (ch < 0x800) {
             output[bytePosition++] = (byte) (0xC0 | (ch >>> 6));
             output[bytePosition++] = (byte) (0x80 | (ch & 0x3F));
         } else if (ch >= 0xD800 && ch < 0xDC00) {
             lead = ch;
         } else {
             output[bytePosition++] = (byte) (0xE0 | (ch >>> 12));
             output[bytePosition++] = (byte) (0x80 | ((ch >>> 6) & 0x3F));
             output[bytePosition++] = (byte) (0x80 | (ch & 0x3F));
         }
         return bytePosition;
     }

     @Override
     public int toBytes(byte[] output, int bytePosition) {
         if (lead != 0) {
             output[bytePosition++] = (byte) (0xE0 | (lead >>> 12));
             output[bytePosition++] = (byte) (0x80 | ((lead >>> 6) & 0x3F));
             output[bytePosition++] = (byte) (0x80 | (lead & 0x3F));
             lead = 0;
         }
         return bytePosition;
     }

     @Override
     public int getMaxBytesPerChar() {
         return 4;
     }

     @Override
     public Appendable fromBytes(byte[] input, int byteStart, int byteLength,
         Appendable result) {
         try {
             while (byteStart < byteLength) {
                 char b = (char) (input[byteStart++] & 0xFF);
                 if (b < 0x80) {
                     // fall through
                 } else if (b < 0xE0) {
                     b &= 0x1F;
                     b <<= 6;
                     b |= (char) (input[byteStart++] & 0x3F);
                 } else if (b < 0xF0) {
                     b &= 0xF;
                     b <<= 6;
                     b |= (char) (input[byteStart++] & 0x3F);
                     b <<= 6;
                     b |= (char) (input[byteStart++] & 0x3F);
                 } else {
                     // surrogate
                     int cp = (b & 0x7) << 6;
                     cp |= (char) (input[byteStart++] & 0x3F);
                     cp <<= 6;
                     cp |= (char) (input[byteStart++] & 0x3F);
                     cp <<= 6;
                     cp |= (char) (input[byteStart++] & 0x3F);
                     result.append(UTF16.getLeadSurrogate(cp));
                     b = UTF16.getTrailSurrogate(cp);
                 }
                 result.append(b);
             }
             return result;
         } catch (IOException e) {
             throw new ICUUncheckedIOException("Internal error", e);
         }
     }
 }
	/*
	**********************************************************************
	* Copyright (c) 2006-2007, Google and others. All Rights Reserved.
	**********************************************************************
	* Author: Mark Davis
	**********************************************************************
	*/
	package org.unicode.cldr.util;

	import java.io.IOException;

	import com.ibm.icu.lang.UCharacter;
	import com.ibm.icu.text.UTF16;
	import com.ibm.icu.util.ICUUncheckedIOException;

	/**
	* @author markdavis
	*/
	// TODO optimize this
	public class Utf8StringByteConverter extends StringByteConverter {
	char lead = 0;

	@Override
	public int toBytes(char ch, byte[] output, int bytePosition) {
	// we may have state, if we were processing a supplemental char
	if (lead != 0) {
	if (UTF16.isTrailSurrogate(ch)) {
	int cp = UCharacter.getCodePoint(lead, ch);
	output[bytePosition++] = (byte) (0xF0 \| (cp >>> 18));
	output[bytePosition++] = (byte) (0x80 \| ((cp >>> 12) & 0x3F));
	output[bytePosition++] = (byte) (0x80 \| ((cp >>> 6) & 0x3F));
	output[bytePosition++] = (byte) (0x80 \| (cp & 0x3F));
	lead = 0;
	return bytePosition;
	}
	// write lead
	output[bytePosition++] = (byte) (0xE0 \| (lead >>> 12));
	output[bytePosition++] = (byte) (0x80 \| ((lead >>> 6) & 0x3F));
	output[bytePosition++] = (byte) (0x80 \| (lead & 0x3F));
	lead = 0;
	}
	if (ch < 0x80) {
	output[bytePosition++] = (byte) ch;
	} else if (ch < 0x800) {
	output[bytePosition++] = (byte) (0xC0 \| (ch >>> 6));
	output[bytePosition++] = (byte) (0x80 \| (ch & 0x3F));
	} else if (ch >= 0xD800 && ch < 0xDC00) {
	lead = ch;
	} else {
	output[bytePosition++] = (byte) (0xE0 \| (ch >>> 12));
	output[bytePosition++] = (byte) (0x80 \| ((ch >>> 6) & 0x3F));
	output[bytePosition++] = (byte) (0x80 \| (ch & 0x3F));
	}
	return bytePosition;
	}

	@Override
	public int toBytes(byte[] output, int bytePosition) {
	if (lead != 0) {
	output[bytePosition++] = (byte) (0xE0 \| (lead >>> 12));
	output[bytePosition++] = (byte) (0x80 \| ((lead >>> 6) & 0x3F));
	output[bytePosition++] = (byte) (0x80 \| (lead & 0x3F));
	lead = 0;
	}
	return bytePosition;
	}

	@Override
	public int getMaxBytesPerChar() {
	return 4;
	}

	@Override
	public Appendable fromBytes(byte[] input, int byteStart, int byteLength,
	Appendable result) {
	try {
	while (byteStart < byteLength) {
	char b = (char) (input[byteStart++] & 0xFF);
	if (b < 0x80) {
	// fall through
	} else if (b < 0xE0) {
	b &= 0x1F;
	b <<= 6;
	b \|= (char) (input[byteStart++] & 0x3F);
	} else if (b < 0xF0) {
	b &= 0xF;
	b <<= 6;
	b \|= (char) (input[byteStart++] & 0x3F);
	b <<= 6;
	b \|= (char) (input[byteStart++] & 0x3F);
	} else {
	// surrogate
	int cp = (b & 0x7) << 6;
	cp \|= (char) (input[byteStart++] & 0x3F);
	cp <<= 6;
	cp \|= (char) (input[byteStart++] & 0x3F);
	cp <<= 6;
	cp \|= (char) (input[byteStart++] & 0x3F);
	result.append(UTF16.getLeadSurrogate(cp));
	b = UTF16.getTrailSurrogate(cp);
	}
	result.append(b);
	}
	return result;
	} catch (IOException e) {
	throw new ICUUncheckedIOException("Internal error", e);
	}
	}
	}