core/src/main/java/com/google/zxing/aztec/encoder/HighLevelEncoder.java - platform/external/zxing - Git at Google

 /*
  * Copyright 2013 ZXing authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.google.zxing.aztec.encoder;

 import com.google.zxing.common.BitArray;

 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedList;

 /**
  * This produces nearly optimal encodings of text into the first-level of
  * encoding used by Aztec code.
  *
  * It uses a dynamic algorithm.  For each prefix of the string, it determines
  * a set of encodings that could lead to this prefix.  We repeatedly add a
  * character and generate a new set of optimal encodings until we have read
  * through the entire input.
  *
  * @author Frank Yellin
  * @author Rustam Abdullaev
  */
 public final class HighLevelEncoder {

   static final String[] MODE_NAMES = {"UPPER", "LOWER", "DIGIT", "MIXED", "PUNCT"};

   static final int MODE_UPPER = 0; // 5 bits
   static final int MODE_LOWER = 1; // 5 bits
   static final int MODE_DIGIT = 2; // 4 bits
   static final int MODE_MIXED = 3; // 5 bits
   static final int MODE_PUNCT = 4; // 5 bits

   // The Latch Table shows, for each pair of Modes, the optimal method for
   // getting from one mode to another.  In the worst possible case, this can
   // be up to 14 bits.  In the best possible case, we are already there!
   // The high half-word of each entry gives the number of bits.
   // The low half-word of each entry are the actual bits necessary to change
   static final int[][] LATCH_TABLE = {
     {
       0,
       (5 << 16) + 28,              // UPPER -> LOWER
       (5 << 16) + 30,              // UPPER -> DIGIT
       (5 << 16) + 29,              // UPPER -> MIXED
       (10 << 16) + (29 << 5) + 30, // UPPER -> MIXED -> PUNCT
     },
     {
       (9 << 16) + (30 << 4) + 14,  // LOWER -> DIGIT -> UPPER
       0,
       (5 << 16) + 30,              // LOWER -> DIGIT
       (5 << 16) + 29,              // LOWER -> MIXED
       (10 << 16) + (29 << 5) + 30, // LOWER -> MIXED -> PUNCT
     },
     {
       (4 << 16) + 14,              // DIGIT -> UPPER
       (9 << 16) + (14 << 5) + 28,  // DIGIT -> UPPER -> LOWER
       0,
       (9 << 16) + (14 << 5) + 29,  // DIGIT -> UPPER -> MIXED
       (14 << 16) + (14 << 10) + (29 << 5) + 30,
                                    // DIGIT -> UPPER -> MIXED -> PUNCT
     },
     {
       (5 << 16) + 29,              // MIXED -> UPPER
       (5 << 16) + 28,              // MIXED -> LOWER
       (10 << 16) + (29 << 5) + 30, // MIXED -> UPPER -> DIGIT
       0,
       (5 << 16) + 30,              // MIXED -> PUNCT
     },
     {
       (5 << 16) + 31,              // PUNCT -> UPPER
       (10 << 16) + (31 << 5) + 28, // PUNCT -> UPPER -> LOWER
       (10 << 16) + (31 << 5) + 30, // PUNCT -> UPPER -> DIGIT
       (10 << 16) + (31 << 5) + 29, // PUNCT -> UPPER -> MIXED
       0,
     },
   };

   // A reverse mapping from [mode][char] to the encoding for that character
   // in that mode.  An entry of 0 indicates no mapping exists.
   private static final int[][] CHAR_MAP = new int[5][256];
   static {
     CHAR_MAP[MODE_UPPER][' '] = 1;
     for (int c = 'A'; c <= 'Z'; c++) {
       CHAR_MAP[MODE_UPPER][c] = c - 'A' + 2;
     }
     CHAR_MAP[MODE_LOWER][' '] = 1;
     for (int c = 'a'; c <= 'z'; c++) {
       CHAR_MAP[MODE_LOWER][c] = c - 'a' + 2;
     }
     CHAR_MAP[MODE_DIGIT][' '] = 1;
     for (int c = '0'; c <= '9'; c++) {
       CHAR_MAP[MODE_DIGIT][c] = c - '0' + 2;
     }
     CHAR_MAP[MODE_DIGIT][','] = 12;
     CHAR_MAP[MODE_DIGIT]['.'] = 13;
     int[] mixedTable = {
         '\0', ' ', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\b', '\t', '\n',
         '\13', '\f', '\r', '\33', '\34', '\35', '\36', '\37', '@', '\\', '^',
         '_', '`', '|', '~', '\177'
     };
     for (int i = 0; i < mixedTable.length; i++) {
       CHAR_MAP[MODE_MIXED][mixedTable[i]] = i;
     }
     int[] punctTable = {
         '\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'',
         '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
         '[', ']', '{', '}'
     };
     for (int i = 0; i < punctTable.length; i++) {
       if (punctTable[i] > 0) {
         CHAR_MAP[MODE_PUNCT][punctTable[i]] = i;
       }
     }
   }

   // A map showing the available shift codes.  (The shifts to BINARY are not
   // shown
   static final int[][] SHIFT_TABLE = new int[6][6]; // mode shift codes, per table
   static {
     for (int[] table : SHIFT_TABLE) {
       Arrays.fill(table, -1);
     }
     SHIFT_TABLE[MODE_UPPER][MODE_PUNCT] = 0;

     SHIFT_TABLE[MODE_LOWER][MODE_PUNCT] = 0;
     SHIFT_TABLE[MODE_LOWER][MODE_UPPER] = 28;

     SHIFT_TABLE[MODE_MIXED][MODE_PUNCT] = 0;

     SHIFT_TABLE[MODE_DIGIT][MODE_PUNCT] = 0;
     SHIFT_TABLE[MODE_DIGIT][MODE_UPPER] = 15;
   }

   private final byte[] text;

   public HighLevelEncoder(byte[] text) {
     this.text = text;
   }

   /**
    * @return text represented by this encoder encoded as a {@link BitArray}
    */
   public BitArray encode() {
     Collection<State> states = Collections.singletonList(State.INITIAL_STATE);
     for (int index = 0; index < text.length; index++) {
       int pairCode;
       int nextChar = index + 1 < text.length ? text[index + 1] : 0;
       switch (text[index]) {
         case '\r':
           pairCode = nextChar == '\n' ? 2 : 0;
           break;
         case '.' :
           pairCode = nextChar == ' ' ? 3 : 0;
           break;
         case ',' :
           pairCode = nextChar == ' ' ? 4 : 0;
           break;
         case ':' :
           pairCode = nextChar == ' ' ? 5 : 0;
           break;
         default:
           pairCode = 0;
       }
       if (pairCode > 0) {
         // We have one of the four special PUNCT pairs.  Treat them specially.
         // Get a new set of states for the two new characters.
         states = updateStateListForPair(states, index, pairCode);
         index++;
       } else {
         // Get a new set of states for the new character.
         states = updateStateListForChar(states, index);
       }
     }
     // We are left with a set of states.  Find the shortest one.
     State minState = Collections.min(states, new Comparator<State>() {
       @Override
       public int compare(State a, State b) {
         return a.getBitCount() - b.getBitCount();
       }
     });
     // Convert it to a bit array, and return.
     return minState.toBitArray(text);
   }

   // We update a set of states for a new character by updating each state
   // for the new character, merging the results, and then removing the
   // non-optimal states.
   private Collection<State> updateStateListForChar(Iterable<State> states, int index) {
     Collection<State> result = new LinkedList<>();
     for (State state : states) {
       updateStateForChar(state, index, result);
     }
     return simplifyStates(result);
   }

   // Return a set of states that represent the possible ways of updating this
   // state for the next character.  The resulting set of states are added to
   // the "result" list.
   private void updateStateForChar(State state, int index, Collection<State> result) {
     char ch = (char) (text[index] & 0xFF);
     boolean charInCurrentTable = CHAR_MAP[state.getMode()][ch] > 0;
     State stateNoBinary = null;
     for (int mode = 0; mode <= MODE_PUNCT; mode++) {
       int charInMode = CHAR_MAP[mode][ch];
       if (charInMode > 0) {
         if (stateNoBinary == null) {
           // Only create stateNoBinary the first time it's required.
           stateNoBinary = state.endBinaryShift(index);
         }
         // Try generating the character by latching to its mode
         if (!charInCurrentTable || mode == state.getMode() || mode == MODE_DIGIT) {
           // If the character is in the current table, we don't want to latch to
           // any other mode except possibly digit (which uses only 4 bits).  Any
           // other latch would be equally successful *after* this character, and
           // so wouldn't save any bits.
           State latchState = stateNoBinary.latchAndAppend(mode, charInMode);
           result.add(latchState);
         }
         // Try generating the character by switching to its mode.
         if (!charInCurrentTable && SHIFT_TABLE[state.getMode()][mode] >= 0) {
           // It never makes sense to temporarily shift to another mode if the
           // character exists in the current mode.  That can never save bits.
           State shiftState = stateNoBinary.shiftAndAppend(mode, charInMode);
           result.add(shiftState);
         }
       }
     }
     if (state.getBinaryShiftByteCount() > 0 || CHAR_MAP[state.getMode()][ch] == 0) {
       // It's never worthwhile to go into binary shift mode if you're not already
       // in binary shift mode, and the character exists in your current mode.
       // That can never save bits over just outputting the char in the current mode.
       State binaryState = state.addBinaryShiftChar(index);
       result.add(binaryState);
     }
   }

   private static Collection<State> updateStateListForPair(Iterable<State> states, int index, int pairCode) {
     Collection<State> result = new LinkedList<>();
     for (State state : states) {
       updateStateForPair(state, index, pairCode, result);
     }
     return simplifyStates(result);
   }

   private static void updateStateForPair(State state, int index, int pairCode, Collection<State> result) {
     State stateNoBinary = state.endBinaryShift(index);
     // Possibility 1.  Latch to MODE_PUNCT, and then append this code
     result.add(stateNoBinary.latchAndAppend(MODE_PUNCT, pairCode));
     if (state.getMode() != MODE_PUNCT) {
       // Possibility 2.  Shift to MODE_PUNCT, and then append this code.
       // Every state except MODE_PUNCT (handled above) can shift
       result.add(stateNoBinary.shiftAndAppend(MODE_PUNCT, pairCode));
     }
     if (pairCode == 3 || pairCode == 4) {
       // both characters are in DIGITS.  Sometimes better to just add two digits
       State digitState = stateNoBinary
           .latchAndAppend(MODE_DIGIT, 16 - pairCode)  // period or comma in DIGIT
           .latchAndAppend(MODE_DIGIT, 1);             // space in DIGIT
       result.add(digitState);
     }
     if (state.getBinaryShiftByteCount() > 0) {
       // It only makes sense to do the characters as binary if we're already
       // in binary mode.
       State binaryState = state.addBinaryShiftChar(index).addBinaryShiftChar(index + 1);
       result.add(binaryState);
     }
   }

   private static Collection<State> simplifyStates(Iterable<State> states) {
     Collection<State> result = new LinkedList<>();
     for (State newState : states) {
       boolean add = true;
       for (Iterator<State> iterator = result.iterator(); iterator.hasNext();) {
         State oldState = iterator.next();
         if (oldState.isBetterThanOrEqualTo(newState)) {
           add = false;
           break;
         }
         if (newState.isBetterThanOrEqualTo(oldState)) {
           iterator.remove();
         }
       }
       if (add) {
         result.add(newState);
       }
     }
     return result;
   }

 }
	/*
	* Copyright 2013 ZXing authors
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.google.zxing.aztec.encoder;

	import com.google.zxing.common.BitArray;

	import java.util.Arrays;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.Iterator;
	import java.util.LinkedList;

	/**
	* This produces nearly optimal encodings of text into the first-level of
	* encoding used by Aztec code.
	*
	* It uses a dynamic algorithm. For each prefix of the string, it determines
	* a set of encodings that could lead to this prefix. We repeatedly add a
	* character and generate a new set of optimal encodings until we have read
	* through the entire input.
	*
	* @author Frank Yellin
	* @author Rustam Abdullaev
	*/
	public final class HighLevelEncoder {

	static final String[] MODE_NAMES = {"UPPER", "LOWER", "DIGIT", "MIXED", "PUNCT"};

	static final int MODE_UPPER = 0; // 5 bits
	static final int MODE_LOWER = 1; // 5 bits
	static final int MODE_DIGIT = 2; // 4 bits
	static final int MODE_MIXED = 3; // 5 bits
	static final int MODE_PUNCT = 4; // 5 bits

	// The Latch Table shows, for each pair of Modes, the optimal method for
	// getting from one mode to another. In the worst possible case, this can
	// be up to 14 bits. In the best possible case, we are already there!
	// The high half-word of each entry gives the number of bits.
	// The low half-word of each entry are the actual bits necessary to change
	static final int[][] LATCH_TABLE = {
	{
	0,
	(5 << 16) + 28, // UPPER -> LOWER
	(5 << 16) + 30, // UPPER -> DIGIT
	(5 << 16) + 29, // UPPER -> MIXED
	(10 << 16) + (29 << 5) + 30, // UPPER -> MIXED -> PUNCT
	},
	{
	(9 << 16) + (30 << 4) + 14, // LOWER -> DIGIT -> UPPER
	0,
	(5 << 16) + 30, // LOWER -> DIGIT
	(5 << 16) + 29, // LOWER -> MIXED
	(10 << 16) + (29 << 5) + 30, // LOWER -> MIXED -> PUNCT
	},
	{
	(4 << 16) + 14, // DIGIT -> UPPER
	(9 << 16) + (14 << 5) + 28, // DIGIT -> UPPER -> LOWER
	0,
	(9 << 16) + (14 << 5) + 29, // DIGIT -> UPPER -> MIXED
	(14 << 16) + (14 << 10) + (29 << 5) + 30,
	// DIGIT -> UPPER -> MIXED -> PUNCT
	},
	{
	(5 << 16) + 29, // MIXED -> UPPER
	(5 << 16) + 28, // MIXED -> LOWER
	(10 << 16) + (29 << 5) + 30, // MIXED -> UPPER -> DIGIT
	0,
	(5 << 16) + 30, // MIXED -> PUNCT
	},
	{
	(5 << 16) + 31, // PUNCT -> UPPER
	(10 << 16) + (31 << 5) + 28, // PUNCT -> UPPER -> LOWER
	(10 << 16) + (31 << 5) + 30, // PUNCT -> UPPER -> DIGIT
	(10 << 16) + (31 << 5) + 29, // PUNCT -> UPPER -> MIXED
	0,
	},
	};

	// A reverse mapping from [mode][char] to the encoding for that character
	// in that mode. An entry of 0 indicates no mapping exists.
	private static final int[][] CHAR_MAP = new int[5][256];
	static {
	CHAR_MAP[MODE_UPPER][' '] = 1;
	for (int c = 'A'; c <= 'Z'; c++) {
	CHAR_MAP[MODE_UPPER][c] = c - 'A' + 2;
	}
	CHAR_MAP[MODE_LOWER][' '] = 1;
	for (int c = 'a'; c <= 'z'; c++) {
	CHAR_MAP[MODE_LOWER][c] = c - 'a' + 2;
	}
	CHAR_MAP[MODE_DIGIT][' '] = 1;
	for (int c = '0'; c <= '9'; c++) {
	CHAR_MAP[MODE_DIGIT][c] = c - '0' + 2;
	}
	CHAR_MAP[MODE_DIGIT][','] = 12;
	CHAR_MAP[MODE_DIGIT]['.'] = 13;
	int[] mixedTable = {
	'\0', ' ', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\b', '\t', '\n',
	'\13', '\f', '\r', '\33', '\34', '\35', '\36', '\37', '@', '\\', '^',
	'_', '`', '\|', '~', '\177'
	};
	for (int i = 0; i < mixedTable.length; i++) {
	CHAR_MAP[MODE_MIXED][mixedTable[i]] = i;
	}
	int[] punctTable = {
	'\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'',
	'(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
	'[', ']', '{', '}'
	};
	for (int i = 0; i < punctTable.length; i++) {
	if (punctTable[i] > 0) {
	CHAR_MAP[MODE_PUNCT][punctTable[i]] = i;
	}
	}
	}

	// A map showing the available shift codes. (The shifts to BINARY are not
	// shown
	static final int[][] SHIFT_TABLE = new int[6][6]; // mode shift codes, per table
	static {
	for (int[] table : SHIFT_TABLE) {
	Arrays.fill(table, -1);
	}
	SHIFT_TABLE[MODE_UPPER][MODE_PUNCT] = 0;

	SHIFT_TABLE[MODE_LOWER][MODE_PUNCT] = 0;
	SHIFT_TABLE[MODE_LOWER][MODE_UPPER] = 28;

	SHIFT_TABLE[MODE_MIXED][MODE_PUNCT] = 0;

	SHIFT_TABLE[MODE_DIGIT][MODE_PUNCT] = 0;
	SHIFT_TABLE[MODE_DIGIT][MODE_UPPER] = 15;
	}

	private final byte[] text;

	public HighLevelEncoder(byte[] text) {
	this.text = text;
	}

	/**
	* @return text represented by this encoder encoded as a {@link BitArray}
	*/
	public BitArray encode() {
	Collection<State> states = Collections.singletonList(State.INITIAL_STATE);
	for (int index = 0; index < text.length; index++) {
	int pairCode;
	int nextChar = index + 1 < text.length ? text[index + 1] : 0;
	switch (text[index]) {
	case '\r':
	pairCode = nextChar == '\n' ? 2 : 0;
	break;
	case '.' :
	pairCode = nextChar == ' ' ? 3 : 0;
	break;
	case ',' :
	pairCode = nextChar == ' ' ? 4 : 0;
	break;
	case ':' :
	pairCode = nextChar == ' ' ? 5 : 0;
	break;
	default:
	pairCode = 0;
	}
	if (pairCode > 0) {
	// We have one of the four special PUNCT pairs. Treat them specially.
	// Get a new set of states for the two new characters.
	states = updateStateListForPair(states, index, pairCode);
	index++;
	} else {
	// Get a new set of states for the new character.
	states = updateStateListForChar(states, index);
	}
	}
	// We are left with a set of states. Find the shortest one.
	State minState = Collections.min(states, new Comparator<State>() {
	@Override
	public int compare(State a, State b) {
	return a.getBitCount() - b.getBitCount();
	}
	});
	// Convert it to a bit array, and return.
	return minState.toBitArray(text);
	}

	// We update a set of states for a new character by updating each state
	// for the new character, merging the results, and then removing the
	// non-optimal states.
	private Collection<State> updateStateListForChar(Iterable<State> states, int index) {
	Collection<State> result = new LinkedList<>();
	for (State state : states) {
	updateStateForChar(state, index, result);
	}
	return simplifyStates(result);
	}

	// Return a set of states that represent the possible ways of updating this
	// state for the next character. The resulting set of states are added to
	// the "result" list.
	private void updateStateForChar(State state, int index, Collection<State> result) {
	char ch = (char) (text[index] & 0xFF);
	boolean charInCurrentTable = CHAR_MAP[state.getMode()][ch] > 0;
	State stateNoBinary = null;
	for (int mode = 0; mode <= MODE_PUNCT; mode++) {
	int charInMode = CHAR_MAP[mode][ch];
	if (charInMode > 0) {
	if (stateNoBinary == null) {
	// Only create stateNoBinary the first time it's required.
	stateNoBinary = state.endBinaryShift(index);
	}
	// Try generating the character by latching to its mode
	if (!charInCurrentTable \|\| mode == state.getMode() \|\| mode == MODE_DIGIT) {
	// If the character is in the current table, we don't want to latch to
	// any other mode except possibly digit (which uses only 4 bits). Any
	// other latch would be equally successful after this character, and
	// so wouldn't save any bits.
	State latchState = stateNoBinary.latchAndAppend(mode, charInMode);
	result.add(latchState);
	}
	// Try generating the character by switching to its mode.
	if (!charInCurrentTable && SHIFT_TABLE[state.getMode()][mode] >= 0) {
	// It never makes sense to temporarily shift to another mode if the
	// character exists in the current mode. That can never save bits.
	State shiftState = stateNoBinary.shiftAndAppend(mode, charInMode);
	result.add(shiftState);
	}
	}
	}
	if (state.getBinaryShiftByteCount() > 0 \|\| CHAR_MAP[state.getMode()][ch] == 0) {
	// It's never worthwhile to go into binary shift mode if you're not already
	// in binary shift mode, and the character exists in your current mode.
	// That can never save bits over just outputting the char in the current mode.
	State binaryState = state.addBinaryShiftChar(index);
	result.add(binaryState);
	}
	}

	private static Collection<State> updateStateListForPair(Iterable<State> states, int index, int pairCode) {
	Collection<State> result = new LinkedList<>();
	for (State state : states) {
	updateStateForPair(state, index, pairCode, result);
	}
	return simplifyStates(result);
	}

	private static void updateStateForPair(State state, int index, int pairCode, Collection<State> result) {
	State stateNoBinary = state.endBinaryShift(index);
	// Possibility 1. Latch to MODE_PUNCT, and then append this code
	result.add(stateNoBinary.latchAndAppend(MODE_PUNCT, pairCode));
	if (state.getMode() != MODE_PUNCT) {
	// Possibility 2. Shift to MODE_PUNCT, and then append this code.
	// Every state except MODE_PUNCT (handled above) can shift
	result.add(stateNoBinary.shiftAndAppend(MODE_PUNCT, pairCode));
	}
	if (pairCode == 3 \|\| pairCode == 4) {
	// both characters are in DIGITS. Sometimes better to just add two digits
	State digitState = stateNoBinary
	.latchAndAppend(MODE_DIGIT, 16 - pairCode) // period or comma in DIGIT
	.latchAndAppend(MODE_DIGIT, 1); // space in DIGIT
	result.add(digitState);
	}
	if (state.getBinaryShiftByteCount() > 0) {
	// It only makes sense to do the characters as binary if we're already
	// in binary mode.
	State binaryState = state.addBinaryShiftChar(index).addBinaryShiftChar(index + 1);
	result.add(binaryState);
	}
	}

	private static Collection<State> simplifyStates(Iterable<State> states) {
	Collection<State> result = new LinkedList<>();
	for (State newState : states) {
	boolean add = true;
	for (Iterator<State> iterator = result.iterator(); iterator.hasNext();) {
	State oldState = iterator.next();
	if (oldState.isBetterThanOrEqualTo(newState)) {
	add = false;
	break;
	}
	if (newState.isBetterThanOrEqualTo(oldState)) {
	iterator.remove();
	}
	}
	if (add) {
	result.add(newState);
	}
	}
	return result;
	}

	}