blob: 8a20fd57b8f762c1ed0776afbb802462fea626f9 [file] [log] [blame]
/*
* Copyright 2013 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.aztec.encoder;
import com.google.zxing.common.BitArray;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
/**
* This produces nearly optimal encodings of text into the first-level of
* encoding used by Aztec code.
*
* It uses a dynamic algorithm. For each prefix of the string, it determines
* a set of encodings that could lead to this prefix. We repeatedly add a
* character and generate a new set of optimal encodings until we have read
* through the entire input.
*
* @author Frank Yellin
* @author Rustam Abdullaev
*/
public final class HighLevelEncoder {
static final String[] MODE_NAMES = {"UPPER", "LOWER", "DIGIT", "MIXED", "PUNCT"};
static final int MODE_UPPER = 0; // 5 bits
static final int MODE_LOWER = 1; // 5 bits
static final int MODE_DIGIT = 2; // 4 bits
static final int MODE_MIXED = 3; // 5 bits
static final int MODE_PUNCT = 4; // 5 bits
// The Latch Table shows, for each pair of Modes, the optimal method for
// getting from one mode to another. In the worst possible case, this can
// be up to 14 bits. In the best possible case, we are already there!
// The high half-word of each entry gives the number of bits.
// The low half-word of each entry are the actual bits necessary to change
static final int[][] LATCH_TABLE = {
{
0,
(5 << 16) + 28, // UPPER -> LOWER
(5 << 16) + 30, // UPPER -> DIGIT
(5 << 16) + 29, // UPPER -> MIXED
(10 << 16) + (29 << 5) + 30, // UPPER -> MIXED -> PUNCT
},
{
(9 << 16) + (30 << 4) + 14, // LOWER -> DIGIT -> UPPER
0,
(5 << 16) + 30, // LOWER -> DIGIT
(5 << 16) + 29, // LOWER -> MIXED
(10 << 16) + (29 << 5) + 30, // LOWER -> MIXED -> PUNCT
},
{
(4 << 16) + 14, // DIGIT -> UPPER
(9 << 16) + (14 << 5) + 28, // DIGIT -> UPPER -> LOWER
0,
(9 << 16) + (14 << 5) + 29, // DIGIT -> UPPER -> MIXED
(14 << 16) + (14 << 10) + (29 << 5) + 30,
// DIGIT -> UPPER -> MIXED -> PUNCT
},
{
(5 << 16) + 29, // MIXED -> UPPER
(5 << 16) + 28, // MIXED -> LOWER
(10 << 16) + (29 << 5) + 30, // MIXED -> UPPER -> DIGIT
0,
(5 << 16) + 30, // MIXED -> PUNCT
},
{
(5 << 16) + 31, // PUNCT -> UPPER
(10 << 16) + (31 << 5) + 28, // PUNCT -> UPPER -> LOWER
(10 << 16) + (31 << 5) + 30, // PUNCT -> UPPER -> DIGIT
(10 << 16) + (31 << 5) + 29, // PUNCT -> UPPER -> MIXED
0,
},
};
// A reverse mapping from [mode][char] to the encoding for that character
// in that mode. An entry of 0 indicates no mapping exists.
private static final int[][] CHAR_MAP = new int[5][256];
static {
CHAR_MAP[MODE_UPPER][' '] = 1;
for (int c = 'A'; c <= 'Z'; c++) {
CHAR_MAP[MODE_UPPER][c] = c - 'A' + 2;
}
CHAR_MAP[MODE_LOWER][' '] = 1;
for (int c = 'a'; c <= 'z'; c++) {
CHAR_MAP[MODE_LOWER][c] = c - 'a' + 2;
}
CHAR_MAP[MODE_DIGIT][' '] = 1;
for (int c = '0'; c <= '9'; c++) {
CHAR_MAP[MODE_DIGIT][c] = c - '0' + 2;
}
CHAR_MAP[MODE_DIGIT][','] = 12;
CHAR_MAP[MODE_DIGIT]['.'] = 13;
int[] mixedTable = {
'\0', ' ', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\b', '\t', '\n',
'\13', '\f', '\r', '\33', '\34', '\35', '\36', '\37', '@', '\\', '^',
'_', '`', '|', '~', '\177'
};
for (int i = 0; i < mixedTable.length; i++) {
CHAR_MAP[MODE_MIXED][mixedTable[i]] = i;
}
int[] punctTable = {
'\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'',
'(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
'[', ']', '{', '}'
};
for (int i = 0; i < punctTable.length; i++) {
if (punctTable[i] > 0) {
CHAR_MAP[MODE_PUNCT][punctTable[i]] = i;
}
}
}
// A map showing the available shift codes. (The shifts to BINARY are not
// shown
static final int[][] SHIFT_TABLE = new int[6][6]; // mode shift codes, per table
static {
for (int[] table : SHIFT_TABLE) {
Arrays.fill(table, -1);
}
SHIFT_TABLE[MODE_UPPER][MODE_PUNCT] = 0;
SHIFT_TABLE[MODE_LOWER][MODE_PUNCT] = 0;
SHIFT_TABLE[MODE_LOWER][MODE_UPPER] = 28;
SHIFT_TABLE[MODE_MIXED][MODE_PUNCT] = 0;
SHIFT_TABLE[MODE_DIGIT][MODE_PUNCT] = 0;
SHIFT_TABLE[MODE_DIGIT][MODE_UPPER] = 15;
}
private final byte[] text;
public HighLevelEncoder(byte[] text) {
this.text = text;
}
/**
* @return text represented by this encoder encoded as a {@link BitArray}
*/
public BitArray encode() {
Collection<State> states = Collections.singletonList(State.INITIAL_STATE);
for (int index = 0; index < text.length; index++) {
int pairCode;
int nextChar = index + 1 < text.length ? text[index + 1] : 0;
switch (text[index]) {
case '\r':
pairCode = nextChar == '\n' ? 2 : 0;
break;
case '.' :
pairCode = nextChar == ' ' ? 3 : 0;
break;
case ',' :
pairCode = nextChar == ' ' ? 4 : 0;
break;
case ':' :
pairCode = nextChar == ' ' ? 5 : 0;
break;
default:
pairCode = 0;
}
if (pairCode > 0) {
// We have one of the four special PUNCT pairs. Treat them specially.
// Get a new set of states for the two new characters.
states = updateStateListForPair(states, index, pairCode);
index++;
} else {
// Get a new set of states for the new character.
states = updateStateListForChar(states, index);
}
}
// We are left with a set of states. Find the shortest one.
State minState = Collections.min(states, new Comparator<State>() {
@Override
public int compare(State a, State b) {
return a.getBitCount() - b.getBitCount();
}
});
// Convert it to a bit array, and return.
return minState.toBitArray(text);
}
// We update a set of states for a new character by updating each state
// for the new character, merging the results, and then removing the
// non-optimal states.
private Collection<State> updateStateListForChar(Iterable<State> states, int index) {
Collection<State> result = new LinkedList<>();
for (State state : states) {
updateStateForChar(state, index, result);
}
return simplifyStates(result);
}
// Return a set of states that represent the possible ways of updating this
// state for the next character. The resulting set of states are added to
// the "result" list.
private void updateStateForChar(State state, int index, Collection<State> result) {
char ch = (char) (text[index] & 0xFF);
boolean charInCurrentTable = CHAR_MAP[state.getMode()][ch] > 0;
State stateNoBinary = null;
for (int mode = 0; mode <= MODE_PUNCT; mode++) {
int charInMode = CHAR_MAP[mode][ch];
if (charInMode > 0) {
if (stateNoBinary == null) {
// Only create stateNoBinary the first time it's required.
stateNoBinary = state.endBinaryShift(index);
}
// Try generating the character by latching to its mode
if (!charInCurrentTable || mode == state.getMode() || mode == MODE_DIGIT) {
// If the character is in the current table, we don't want to latch to
// any other mode except possibly digit (which uses only 4 bits). Any
// other latch would be equally successful *after* this character, and
// so wouldn't save any bits.
State latchState = stateNoBinary.latchAndAppend(mode, charInMode);
result.add(latchState);
}
// Try generating the character by switching to its mode.
if (!charInCurrentTable && SHIFT_TABLE[state.getMode()][mode] >= 0) {
// It never makes sense to temporarily shift to another mode if the
// character exists in the current mode. That can never save bits.
State shiftState = stateNoBinary.shiftAndAppend(mode, charInMode);
result.add(shiftState);
}
}
}
if (state.getBinaryShiftByteCount() > 0 || CHAR_MAP[state.getMode()][ch] == 0) {
// It's never worthwhile to go into binary shift mode if you're not already
// in binary shift mode, and the character exists in your current mode.
// That can never save bits over just outputting the char in the current mode.
State binaryState = state.addBinaryShiftChar(index);
result.add(binaryState);
}
}
private static Collection<State> updateStateListForPair(Iterable<State> states, int index, int pairCode) {
Collection<State> result = new LinkedList<>();
for (State state : states) {
updateStateForPair(state, index, pairCode, result);
}
return simplifyStates(result);
}
private static void updateStateForPair(State state, int index, int pairCode, Collection<State> result) {
State stateNoBinary = state.endBinaryShift(index);
// Possibility 1. Latch to MODE_PUNCT, and then append this code
result.add(stateNoBinary.latchAndAppend(MODE_PUNCT, pairCode));
if (state.getMode() != MODE_PUNCT) {
// Possibility 2. Shift to MODE_PUNCT, and then append this code.
// Every state except MODE_PUNCT (handled above) can shift
result.add(stateNoBinary.shiftAndAppend(MODE_PUNCT, pairCode));
}
if (pairCode == 3 || pairCode == 4) {
// both characters are in DIGITS. Sometimes better to just add two digits
State digitState = stateNoBinary
.latchAndAppend(MODE_DIGIT, 16 - pairCode) // period or comma in DIGIT
.latchAndAppend(MODE_DIGIT, 1); // space in DIGIT
result.add(digitState);
}
if (state.getBinaryShiftByteCount() > 0) {
// It only makes sense to do the characters as binary if we're already
// in binary mode.
State binaryState = state.addBinaryShiftChar(index).addBinaryShiftChar(index + 1);
result.add(binaryState);
}
}
private static Collection<State> simplifyStates(Iterable<State> states) {
Collection<State> result = new LinkedList<>();
for (State newState : states) {
boolean add = true;
for (Iterator<State> iterator = result.iterator(); iterator.hasNext();) {
State oldState = iterator.next();
if (oldState.isBetterThanOrEqualTo(newState)) {
add = false;
break;
}
if (newState.isBetterThanOrEqualTo(oldState)) {
iterator.remove();
}
}
if (add) {
result.add(newState);
}
}
return result;
}
}