blob: e49389b4637bcac83f4ad1d23e08689b6b95d4ab [file] [log] [blame]
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FilterReader;
import java.io.FilterWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import static java.nio.charset.StandardCharsets.*;
/**
* Simple utility to convert from native encoding file to ascii or reverse
* including \udddd Unicode notation.
*/
public class Native2Ascii {
final Charset cs;
final CharsetEncoder encoder;
public Native2Ascii(Charset cs) {
this.cs = cs;
this.encoder = cs.newEncoder();
}
/**
* ASCII to Native conversion
*/
public void asciiToNative(Path infile, Path outfile) throws IOException {
try (BufferedReader in = Files.newBufferedReader(infile, US_ASCII);
BufferedReader reader = new BufferedReader(new A2NFilter(in));
BufferedWriter writer = Files.newBufferedWriter(outfile, cs)) {
String line;
while ((line = reader.readLine()) != null) {
writer.write(line.toCharArray());
writer.newLine();
}
}
}
/**
* Native to ASCII conversion
*/
public void nativeToAscii(Path infile, Path outfile) throws IOException {
try (BufferedReader reader = Files.newBufferedReader(infile, cs);
BufferedWriter out = Files.newBufferedWriter(outfile, US_ASCII);
BufferedWriter writer = new BufferedWriter(new N2AFilter(out))) {
String line;
while ((line = reader.readLine()) != null) {
writer.write(line.toCharArray());
writer.newLine();
}
}
}
// A copy of native2ascii N2AFilter
class N2AFilter extends FilterWriter {
public N2AFilter(Writer out) { super(out); }
public void write(char b) throws IOException {
char[] buf = new char[1];
buf[0] = b;
write(buf, 0, 1);
}
public void write(char[] buf, int off, int len) throws IOException {
for (int i = 0; i < len; i++) {
if ((buf[i] > '\u007f')) {
// write \udddd
out.write('\\');
out.write('u');
String hex = Integer.toHexString(buf[i]);
StringBuilder hex4 = new StringBuilder(hex);
hex4.reverse();
int length = 4 - hex4.length();
for (int j = 0; j < length; j++) {
hex4.append('0');
}
for (int j = 0; j < 4; j++) {
out.write(hex4.charAt(3 - j));
}
} else
out.write(buf[i]);
}
}
}
// A copy of native2ascii A2NFilter
class A2NFilter extends FilterReader {
// maintain a trailing buffer to hold any incompleted
// unicode escaped sequences
private char[] trailChars = null;
public A2NFilter(Reader in) {
super(in);
}
public int read(char[] buf, int off, int len) throws IOException {
int numChars = 0; // how many characters have been read
int retChars = 0; // how many characters we'll return
char[] cBuf = new char[len];
int cOffset = 0; // offset at which we'll start reading
boolean eof = false;
// copy trailing chars from previous invocation to input buffer
if (trailChars != null) {
for (int i = 0; i < trailChars.length; i++)
cBuf[i] = trailChars[i];
numChars = trailChars.length;
trailChars = null;
}
int n = in.read(cBuf, numChars, len - numChars);
if (n < 0) {
eof = true;
if (numChars == 0)
return -1; // EOF;
} else {
numChars += n;
}
for (int i = 0; i < numChars; ) {
char c = cBuf[i++];
if (c != '\\' || (eof && numChars <= 5)) {
// Not a backslash, so copy and continue
// Always pass non backslash chars straight thru
// for regular encoding. If backslash occurs in
// input stream at the final 5 chars then don't
// attempt to read-ahead and de-escape since these
// are literal occurrences of U+005C which need to
// be encoded verbatim in the target encoding.
buf[retChars++] = c;
continue;
}
int remaining = numChars - i;
if (remaining < 5) {
// Might be the first character of a unicode escape, but we
// don't have enough characters to tell, so save it and finish
trailChars = new char[1 + remaining];
trailChars[0] = c;
for (int j = 0; j < remaining; j++)
trailChars[1 + j] = cBuf[i + j];
break;
}
// At this point we have at least five characters remaining
c = cBuf[i++];
if (c != 'u') {
// Not a unicode escape, so copy and continue
buf[retChars++] = '\\';
buf[retChars++] = c;
continue;
}
// The next four characters are the hex part of a unicode escape
char rc = 0;
boolean isUE = true;
try {
rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16);
} catch (NumberFormatException x) {
isUE = false;
}
if (isUE && encoder.canEncode(rc)) {
// We'll be able to convert this
buf[retChars++] = rc;
i += 4; // Align beyond the current uXXXX sequence
} else {
// We won't, so just retain the original sequence
buf[retChars++] = '\\';
buf[retChars++] = 'u';
continue;
}
}
return retChars;
}
public int read() throws IOException {
char[] buf = new char[1];
if (read(buf, 0, 1) == -1)
return -1;
else
return (int) buf[0];
}
}
}