blob: 3ed0ecf4afb663ddf653d9577ce4897480747d7c [file] [log] [blame]
/*
* Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This file has been modified from its original form in Barcode4J.
*/
package com.google.zxing.pdf417.encoder;
import com.google.zxing.WriterException;
import com.google.zxing.common.CharacterSetECI;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
/**
* PDF417 high-level encoder following the algorithm described in ISO/IEC 15438:2001(E) in
* annex P.
*/
final class PDF417HighLevelEncoder {
/**
* code for Text compaction
*/
private static final int TEXT_COMPACTION = 0;
/**
* code for Byte compaction
*/
private static final int BYTE_COMPACTION = 1;
/**
* code for Numeric compaction
*/
private static final int NUMERIC_COMPACTION = 2;
/**
* Text compaction submode Alpha
*/
private static final int SUBMODE_ALPHA = 0;
/**
* Text compaction submode Lower
*/
private static final int SUBMODE_LOWER = 1;
/**
* Text compaction submode Mixed
*/
private static final int SUBMODE_MIXED = 2;
/**
* Text compaction submode Punctuation
*/
private static final int SUBMODE_PUNCTUATION = 3;
/**
* mode latch to Text Compaction mode
*/
private static final int LATCH_TO_TEXT = 900;
/**
* mode latch to Byte Compaction mode (number of characters NOT a multiple of 6)
*/
private static final int LATCH_TO_BYTE_PADDED = 901;
/**
* mode latch to Numeric Compaction mode
*/
private static final int LATCH_TO_NUMERIC = 902;
/**
* mode shift to Byte Compaction mode
*/
private static final int SHIFT_TO_BYTE = 913;
/**
* mode latch to Byte Compaction mode (number of characters a multiple of 6)
*/
private static final int LATCH_TO_BYTE = 924;
/**
* identifier for a user defined Extended Channel Interpretation (ECI)
*/
private static final int ECI_USER_DEFINED = 925;
/**
* identifier for a general purpose ECO format
*/
private static final int ECI_GENERAL_PURPOSE = 926;
/**
* identifier for an ECI of a character set of code page
*/
private static final int ECI_CHARSET = 927;
/**
* Raw code table for text compaction Mixed sub-mode
*/
private static final byte[] TEXT_MIXED_RAW = {
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0, 32, 0, 0, 0};
/**
* Raw code table for text compaction: Punctuation sub-mode
*/
private static final byte[] TEXT_PUNCTUATION_RAW = {
59, 60, 62, 64, 91, 92, 93, 95, 96, 126, 33, 13, 9, 44, 58,
10, 45, 46, 36, 47, 34, 124, 42, 40, 41, 63, 123, 125, 39, 0};
private static final byte[] MIXED = new byte[128];
private static final byte[] PUNCTUATION = new byte[128];
private static final Charset DEFAULT_ENCODING = StandardCharsets.ISO_8859_1;
private PDF417HighLevelEncoder() {
}
static {
//Construct inverse lookups
Arrays.fill(MIXED, (byte) -1);
for (int i = 0; i < TEXT_MIXED_RAW.length; i++) {
byte b = TEXT_MIXED_RAW[i];
if (b > 0) {
MIXED[b] = (byte) i;
}
}
Arrays.fill(PUNCTUATION, (byte) -1);
for (int i = 0; i < TEXT_PUNCTUATION_RAW.length; i++) {
byte b = TEXT_PUNCTUATION_RAW[i];
if (b > 0) {
PUNCTUATION[b] = (byte) i;
}
}
}
/**
* Performs high-level encoding of a PDF417 message using the algorithm described in annex P
* of ISO/IEC 15438:2001(E). If byte compaction has been selected, then only byte compaction
* is used.
*
* @param msg the message
* @param compaction compaction mode to use
* @param encoding character encoding used to encode in default or byte compaction
* or {@code null} for default / not applicable
* @return the encoded message (the char values range from 0 to 928)
*/
static String encodeHighLevel(String msg, Compaction compaction, Charset encoding) throws WriterException {
//the codewords 0..928 are encoded as Unicode characters
StringBuilder sb = new StringBuilder(msg.length());
if (encoding == null) {
encoding = DEFAULT_ENCODING;
} else if (!DEFAULT_ENCODING.equals(encoding)) {
CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding.name());
if (eci != null) {
encodingECI(eci.getValue(), sb);
}
}
int len = msg.length();
int p = 0;
int textSubMode = SUBMODE_ALPHA;
// User selected encoding mode
switch (compaction) {
case TEXT:
encodeText(msg, p, len, sb, textSubMode);
break;
case BYTE:
byte[] msgBytes = msg.getBytes(encoding);
encodeBinary(msgBytes, p, msgBytes.length, BYTE_COMPACTION, sb);
break;
case NUMERIC:
sb.append((char) LATCH_TO_NUMERIC);
encodeNumeric(msg, p, len, sb);
break;
default:
int encodingMode = TEXT_COMPACTION; //Default mode, see 4.4.2.1
while (p < len) {
int n = determineConsecutiveDigitCount(msg, p);
if (n >= 13) {
sb.append((char) LATCH_TO_NUMERIC);
encodingMode = NUMERIC_COMPACTION;
textSubMode = SUBMODE_ALPHA; //Reset after latch
encodeNumeric(msg, p, n, sb);
p += n;
} else {
int t = determineConsecutiveTextCount(msg, p);
if (t >= 5 || n == len) {
if (encodingMode != TEXT_COMPACTION) {
sb.append((char) LATCH_TO_TEXT);
encodingMode = TEXT_COMPACTION;
textSubMode = SUBMODE_ALPHA; //start with submode alpha after latch
}
textSubMode = encodeText(msg, p, t, sb, textSubMode);
p += t;
} else {
int b = determineConsecutiveBinaryCount(msg, p, encoding);
if (b == 0) {
b = 1;
}
byte[] bytes = msg.substring(p, p + b).getBytes(encoding);
if (bytes.length == 1 && encodingMode == TEXT_COMPACTION) {
//Switch for one byte (instead of latch)
encodeBinary(bytes, 0, 1, TEXT_COMPACTION, sb);
} else {
//Mode latch performed by encodeBinary()
encodeBinary(bytes, 0, bytes.length, encodingMode, sb);
encodingMode = BYTE_COMPACTION;
textSubMode = SUBMODE_ALPHA; //Reset after latch
}
p += b;
}
}
}
break;
}
return sb.toString();
}
/**
* Encode parts of the message using Text Compaction as described in ISO/IEC 15438:2001(E),
* chapter 4.4.2.
*
* @param msg the message
* @param startpos the start position within the message
* @param count the number of characters to encode
* @param sb receives the encoded codewords
* @param initialSubmode should normally be SUBMODE_ALPHA
* @return the text submode in which this method ends
*/
private static int encodeText(CharSequence msg,
int startpos,
int count,
StringBuilder sb,
int initialSubmode) {
StringBuilder tmp = new StringBuilder(count);
int submode = initialSubmode;
int idx = 0;
while (true) {
char ch = msg.charAt(startpos + idx);
switch (submode) {
case SUBMODE_ALPHA:
if (isAlphaUpper(ch)) {
if (ch == ' ') {
tmp.append((char) 26); //space
} else {
tmp.append((char) (ch - 65));
}
} else {
if (isAlphaLower(ch)) {
submode = SUBMODE_LOWER;
tmp.append((char) 27); //ll
continue;
} else if (isMixed(ch)) {
submode = SUBMODE_MIXED;
tmp.append((char) 28); //ml
continue;
} else {
tmp.append((char) 29); //ps
tmp.append((char) PUNCTUATION[ch]);
break;
}
}
break;
case SUBMODE_LOWER:
if (isAlphaLower(ch)) {
if (ch == ' ') {
tmp.append((char) 26); //space
} else {
tmp.append((char) (ch - 97));
}
} else {
if (isAlphaUpper(ch)) {
tmp.append((char) 27); //as
tmp.append((char) (ch - 65));
//space cannot happen here, it is also in "Lower"
break;
} else if (isMixed(ch)) {
submode = SUBMODE_MIXED;
tmp.append((char) 28); //ml
continue;
} else {
tmp.append((char) 29); //ps
tmp.append((char) PUNCTUATION[ch]);
break;
}
}
break;
case SUBMODE_MIXED:
if (isMixed(ch)) {
tmp.append((char) MIXED[ch]);
} else {
if (isAlphaUpper(ch)) {
submode = SUBMODE_ALPHA;
tmp.append((char) 28); //al
continue;
} else if (isAlphaLower(ch)) {
submode = SUBMODE_LOWER;
tmp.append((char) 27); //ll
continue;
} else {
if (startpos + idx + 1 < count) {
char next = msg.charAt(startpos + idx + 1);
if (isPunctuation(next)) {
submode = SUBMODE_PUNCTUATION;
tmp.append((char) 25); //pl
continue;
}
}
tmp.append((char) 29); //ps
tmp.append((char) PUNCTUATION[ch]);
}
}
break;
default: //SUBMODE_PUNCTUATION
if (isPunctuation(ch)) {
tmp.append((char) PUNCTUATION[ch]);
} else {
submode = SUBMODE_ALPHA;
tmp.append((char) 29); //al
continue;
}
}
idx++;
if (idx >= count) {
break;
}
}
char h = 0;
int len = tmp.length();
for (int i = 0; i < len; i++) {
boolean odd = (i % 2) != 0;
if (odd) {
h = (char) ((h * 30) + tmp.charAt(i));
sb.append(h);
} else {
h = tmp.charAt(i);
}
}
if ((len % 2) != 0) {
sb.append((char) ((h * 30) + 29)); //ps
}
return submode;
}
/**
* Encode parts of the message using Byte Compaction as described in ISO/IEC 15438:2001(E),
* chapter 4.4.3. The Unicode characters will be converted to binary using the cp437
* codepage.
*
* @param bytes the message converted to a byte array
* @param startpos the start position within the message
* @param count the number of bytes to encode
* @param startmode the mode from which this method starts
* @param sb receives the encoded codewords
*/
private static void encodeBinary(byte[] bytes,
int startpos,
int count,
int startmode,
StringBuilder sb) {
if (count == 1 && startmode == TEXT_COMPACTION) {
sb.append((char) SHIFT_TO_BYTE);
} else {
if ((count % 6) == 0) {
sb.append((char) LATCH_TO_BYTE);
} else {
sb.append((char) LATCH_TO_BYTE_PADDED);
}
}
int idx = startpos;
// Encode sixpacks
if (count >= 6) {
char[] chars = new char[5];
while ((startpos + count - idx) >= 6) {
long t = 0;
for (int i = 0; i < 6; i++) {
t <<= 8;
t += bytes[idx + i] & 0xff;
}
for (int i = 0; i < 5; i++) {
chars[i] = (char) (t % 900);
t /= 900;
}
for (int i = chars.length - 1; i >= 0; i--) {
sb.append(chars[i]);
}
idx += 6;
}
}
//Encode rest (remaining n<5 bytes if any)
for (int i = idx; i < startpos + count; i++) {
int ch = bytes[i] & 0xff;
sb.append((char) ch);
}
}
private static void encodeNumeric(String msg, int startpos, int count, StringBuilder sb) {
int idx = 0;
StringBuilder tmp = new StringBuilder(count / 3 + 1);
BigInteger num900 = BigInteger.valueOf(900);
BigInteger num0 = BigInteger.valueOf(0);
while (idx < count) {
tmp.setLength(0);
int len = Math.min(44, count - idx);
String part = '1' + msg.substring(startpos + idx, startpos + idx + len);
BigInteger bigint = new BigInteger(part);
do {
tmp.append((char) bigint.mod(num900).intValue());
bigint = bigint.divide(num900);
} while (!bigint.equals(num0));
//Reverse temporary string
for (int i = tmp.length() - 1; i >= 0; i--) {
sb.append(tmp.charAt(i));
}
idx += len;
}
}
private static boolean isDigit(char ch) {
return ch >= '0' && ch <= '9';
}
private static boolean isAlphaUpper(char ch) {
return ch == ' ' || (ch >= 'A' && ch <= 'Z');
}
private static boolean isAlphaLower(char ch) {
return ch == ' ' || (ch >= 'a' && ch <= 'z');
}
private static boolean isMixed(char ch) {
return MIXED[ch] != -1;
}
private static boolean isPunctuation(char ch) {
return PUNCTUATION[ch] != -1;
}
private static boolean isText(char ch) {
return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
}
/**
* Determines the number of consecutive characters that are encodable using numeric compaction.
*
* @param msg the message
* @param startpos the start position within the message
* @return the requested character count
*/
private static int determineConsecutiveDigitCount(CharSequence msg, int startpos) {
int count = 0;
int len = msg.length();
int idx = startpos;
if (idx < len) {
char ch = msg.charAt(idx);
while (isDigit(ch) && idx < len) {
count++;
idx++;
if (idx < len) {
ch = msg.charAt(idx);
}
}
}
return count;
}
/**
* Determines the number of consecutive characters that are encodable using text compaction.
*
* @param msg the message
* @param startpos the start position within the message
* @return the requested character count
*/
private static int determineConsecutiveTextCount(CharSequence msg, int startpos) {
int len = msg.length();
int idx = startpos;
while (idx < len) {
char ch = msg.charAt(idx);
int numericCount = 0;
while (numericCount < 13 && isDigit(ch) && idx < len) {
numericCount++;
idx++;
if (idx < len) {
ch = msg.charAt(idx);
}
}
if (numericCount >= 13) {
return idx - startpos - numericCount;
}
if (numericCount > 0) {
//Heuristic: All text-encodable chars or digits are binary encodable
continue;
}
ch = msg.charAt(idx);
//Check if character is encodable
if (!isText(ch)) {
break;
}
idx++;
}
return idx - startpos;
}
/**
* Determines the number of consecutive characters that are encodable using binary compaction.
*
* @param msg the message
* @param startpos the start position within the message
* @param encoding the charset used to convert the message to a byte array
* @return the requested character count
*/
private static int determineConsecutiveBinaryCount(String msg, int startpos, Charset encoding)
throws WriterException {
CharsetEncoder encoder = encoding.newEncoder();
int len = msg.length();
int idx = startpos;
while (idx < len) {
char ch = msg.charAt(idx);
int numericCount = 0;
while (numericCount < 13 && isDigit(ch)) {
numericCount++;
//textCount++;
int i = idx + numericCount;
if (i >= len) {
break;
}
ch = msg.charAt(i);
}
if (numericCount >= 13) {
return idx - startpos;
}
ch = msg.charAt(idx);
if (!encoder.canEncode(ch)) {
throw new WriterException("Non-encodable character detected: " + ch + " (Unicode: " + (int) ch + ')');
}
idx++;
}
return idx - startpos;
}
private static void encodingECI(int eci, StringBuilder sb) throws WriterException {
if (eci >= 0 && eci < 900) {
sb.append((char) ECI_CHARSET);
sb.append((char) eci);
} else if (eci < 810900) {
sb.append((char) ECI_GENERAL_PURPOSE);
sb.append((char) (eci / 900 - 1));
sb.append((char) (eci % 900));
} else if (eci < 811800) {
sb.append((char) ECI_USER_DEFINED);
sb.append((char) (810900 - eci));
} else {
throw new WriterException("ECI number not in valid range from 0..811799, but was " + eci);
}
}
}