blob: 0f287b61eb63523fe3e63e9d113650358cd0fc61 [file] [log] [blame]
/*
* Copyright 2021 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.common;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
/**
* Class that converts a character string into a sequence of ECIs and bytes
*
* The implementation uses the Dijkstra algorithm to produce minimal encodings
*
* @author Alex Geller
*/
public class MinimalECIInput implements ECIInput {
private static final int COST_PER_ECI = 3; // approximated (latch + 2 codewords)
private final int[] bytes;
private final int fnc1;
/**
* Constructs a minimal input
*
* @param stringToEncode the character string to encode
* @param priorityCharset The preferred {@link Charset}. When the value of the argument is null, the algorithm
* chooses charsets that leads to a minimal representation. Otherwise the algorithm will use the priority
* charset to encode any character in the input that can be encoded by it if the charset is among the
* supported charsets.
* @param fnc1 denotes the character in the input that represents the FNC1 character or -1 if this is not GS1
* input.
*/
public MinimalECIInput(String stringToEncode, Charset priorityCharset, int fnc1) {
this.fnc1 = fnc1;
ECIEncoderSet encoderSet = new ECIEncoderSet(stringToEncode, priorityCharset, fnc1);
if (encoderSet.length() == 1) { //optimization for the case when all can be encoded without ECI in ISO-8859-1
bytes = new int[stringToEncode.length()];
for (int i = 0; i < bytes.length; i++) {
char c = stringToEncode.charAt(i);
bytes[i] = c == fnc1 ? 1000 : (int) c;
}
} else {
bytes = encodeMinimally(stringToEncode, encoderSet, fnc1);
}
}
public int getFNC1Character() {
return fnc1;
}
/**
* Returns the length of this input. The length is the number
* of {@code byte}s, FNC1 characters or ECIs in the sequence.
*
* @return the number of {@code char}s in this sequence
*/
public int length() {
return bytes.length;
}
public boolean haveNCharacters(int index, int n) {
if (index + n - 1 >= bytes.length) {
return false;
}
for (int i = 0; i < n; i++) {
if (isECI(index + i)) {
return false;
}
}
return true;
}
/**
* Returns the {@code byte} value at the specified index. An index ranges from zero
* to {@code length() - 1}. The first {@code byte} value of the sequence is at
* index zero, the next at index one, and so on, as for array
* indexing.
*
* @param index the index of the {@code byte} value to be returned
*
* @return the specified {@code byte} value as character or the FNC1 character
*
* @throws IndexOutOfBoundsException
* if the {@code index} argument is negative or not less than
* {@code length()}
* @throws IllegalArgumentException
* if the value at the {@code index} argument is an ECI (@see #isECI)
*/
public char charAt(int index) {
if (index < 0 || index >= length()) {
throw new IndexOutOfBoundsException("" + index);
}
if (isECI(index)) {
throw new IllegalArgumentException("value at " + index + " is not a character but an ECI");
}
return isFNC1(index) ? (char) fnc1 : (char) bytes[index];
}
/**
* Returns a {@code CharSequence} that is a subsequence of this sequence.
* The subsequence starts with the {@code char} value at the specified index and
* ends with the {@code char} value at index {@code end - 1}. The length
* (in {@code char}s) of the
* returned sequence is {@code end - start}, so if {@code start == end}
* then an empty sequence is returned.
*
* @param start the start index, inclusive
* @param end the end index, exclusive
*
* @return the specified subsequence
*
* @throws IndexOutOfBoundsException
* if {@code start} or {@code end} are negative,
* if {@code end} is greater than {@code length()},
* or if {@code start} is greater than {@code end}
* @throws IllegalArgumentException
* if a value in the range {@code start}-{@code end} is an ECI (@see #isECI)
*/
public CharSequence subSequence(int start, int end) {
if (start < 0 || start > end || end > length()) {
throw new IndexOutOfBoundsException("" + start);
}
StringBuilder result = new StringBuilder();
for (int i = start; i < end; i++) {
if (isECI(i)) {
throw new IllegalArgumentException("value at " + i + " is not a character but an ECI");
}
result.append(charAt(i));
}
return result;
}
/**
* Determines if a value is an ECI
*
* @param index the index of the value
*
* @return true if the value at position {@code index} is an ECI
*
* @throws IndexOutOfBoundsException
* if the {@code index} argument is negative or not less than
* {@code length()}
*/
public boolean isECI(int index) {
if (index < 0 || index >= length()) {
throw new IndexOutOfBoundsException("" + index);
}
return bytes[index] > 255 && bytes[index] <= 999;
}
/**
* Determines if a value is the FNC1 character
*
* @param index the index of the value
*
* @return true if the value at position {@code index} is the FNC1 character
*
* @throws IndexOutOfBoundsException
* if the {@code index} argument is negative or not less than
* {@code length()}
*/
public boolean isFNC1(int index) {
if (index < 0 || index >= length()) {
throw new IndexOutOfBoundsException("" + index);
}
return bytes[index] == 1000;
}
/**
* Returns the {@code int} ECI value at the specified index. An index ranges from zero
* to {@code length() - 1}. The first {@code byte} value of the sequence is at
* index zero, the next at index one, and so on, as for array
* indexing.
*
* @param index the index of the {@code int} value to be returned
*
* @return the specified {@code int} ECI value.
* The ECI specified the encoding of all bytes with a higher index until the
* next ECI or until the end of the input if no other ECI follows.
*
* @throws IndexOutOfBoundsException
* if the {@code index} argument is negative or not less than
* {@code length()}
* @throws IllegalArgumentException
* if the value at the {@code index} argument is not an ECI (@see #isECI)
*/
public int getECIValue(int index) {
if (index < 0 || index >= length()) {
throw new IndexOutOfBoundsException("" + index);
}
if (!isECI(index)) {
throw new IllegalArgumentException("value at " + index + " is not an ECI but a character");
}
return bytes[index] - 256;
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();
for (int i = 0; i < length(); i++) {
if (i > 0) {
result.append(", ");
}
if (isECI(i)) {
result.append("ECI(");
result.append(getECIValue(i));
result.append(')');
} else if (charAt(i) < 128) {
result.append('\'');
result.append(charAt(i));
result.append('\'');
} else {
result.append((int) charAt(i));
}
}
return result.toString();
}
static void addEdge(InputEdge[][] edges, int to, InputEdge edge) {
if (edges[to][edge.encoderIndex] == null ||
edges[to][edge.encoderIndex].cachedTotalSize > edge.cachedTotalSize) {
edges[to][edge.encoderIndex] = edge;
}
}
static void addEdges(String stringToEncode,
ECIEncoderSet encoderSet,
InputEdge[][] edges,
int from,
InputEdge previous,
int fnc1) {
char ch = stringToEncode.charAt(from);
int start = 0;
int end = encoderSet.length();
if (encoderSet.getPriorityEncoderIndex() >= 0 && (ch == fnc1 || encoderSet.canEncode(ch,
encoderSet.getPriorityEncoderIndex()))) {
start = encoderSet.getPriorityEncoderIndex();
end = start + 1;
}
for (int i = start; i < end; i++) {
if (ch == fnc1 || encoderSet.canEncode(ch,i)) {
addEdge(edges, from + 1, new InputEdge(ch, encoderSet, i, previous, fnc1));
}
}
}
static int[] encodeMinimally(String stringToEncode, ECIEncoderSet encoderSet, int fnc1) {
int inputLength = stringToEncode.length();
// Array that represents vertices. There is a vertex for every character and encoding.
InputEdge[][] edges = new InputEdge[inputLength + 1][encoderSet.length()];
addEdges(stringToEncode, encoderSet, edges, 0, null, fnc1);
for (int i = 1; i <= inputLength; i++) {
for (int j = 0; j < encoderSet.length(); j++) {
if (edges[i][j] != null && i < inputLength) {
addEdges(stringToEncode, encoderSet, edges, i, edges[i][j], fnc1);
}
}
//optimize memory by removing edges that have been passed.
for (int j = 0; j < encoderSet.length(); j++) {
edges[i - 1][j] = null;
}
}
int minimalJ = -1;
int minimalSize = Integer.MAX_VALUE;
for (int j = 0; j < encoderSet.length(); j++) {
if (edges[inputLength][j] != null) {
InputEdge edge = edges[inputLength][j];
if (edge.cachedTotalSize < minimalSize) {
minimalSize = edge.cachedTotalSize;
minimalJ = j;
}
}
}
if (minimalJ < 0) {
throw new IllegalStateException("Failed to encode \"" + stringToEncode + "\"");
}
List<Integer> intsAL = new ArrayList<>();
InputEdge current = edges[inputLength][minimalJ];
while (current != null) {
if (current.isFNC1()) {
intsAL.add(0, 1000);
} else {
byte[] bytes = encoderSet.encode(current.c,current.encoderIndex);
for (int i = bytes.length - 1; i >= 0; i--) {
intsAL.add(0, (bytes[i] & 0xFF));
}
}
int previousEncoderIndex = current.previous == null ? 0 : current.previous.encoderIndex;
if (previousEncoderIndex != current.encoderIndex) {
intsAL.add(0,256 + encoderSet.getECIValue(current.encoderIndex));
}
current = current.previous;
}
int[] ints = new int[intsAL.size()];
for (int i = 0; i < ints.length; i++) {
ints[i] = intsAL.get(i);
}
return ints;
}
private static final class InputEdge {
private final char c;
private final int encoderIndex; //the encoding of this edge
private final InputEdge previous;
private final int cachedTotalSize;
private InputEdge(char c, ECIEncoderSet encoderSet, int encoderIndex, InputEdge previous, int fnc1) {
this.c = c == fnc1 ? 1000 : c;
this.encoderIndex = encoderIndex;
this.previous = previous;
int size = this.c == 1000 ? 1 : encoderSet.encode(c, encoderIndex).length;
int previousEncoderIndex = previous == null ? 0 : previous.encoderIndex;
if (previousEncoderIndex != encoderIndex) {
size += COST_PER_ECI;
}
if (previous != null) {
size += previous.cachedTotalSize;
}
this.cachedTotalSize = size;
}
boolean isFNC1() {
return c == 1000;
}
}
}