blob: f644b3d010bd19a1588911f53a3b5fb6f1072cab [file] [log] [blame]
/*
**
** Copyright 2008, Google Inc.
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include <nativehelper/jni.h>
#include <assert.h>
#include <dirent.h>
#include <ctype.h>
#include "baseapi.h"
#include "varable.h"
#include "tessvars.h"
#ifdef HAVE_LIBLEPT
// Include leptonica library only if autoconf (or makefile etc) tell us to.
#include "allheaders.h"
#endif
#define DEBUG 0
#if DEBUG
#include <stdio.h>
BOOL_VAR (tessedit_write_images, TRUE,
"Capture the image from the IPE");
#endif
#define LOG_NDEBUG 0
#define LOG_TAG "OcrLib(native)"
#include <utils/Log.h>
#define TESSBASE "/sdcard/"
static jfieldID field_mNativeData;
struct native_data_t {
native_data_t() : image_obj(NULL), image_buffer(NULL) {}
tesseract::TessBaseAPI api;
jbyteArray image_obj;
jbyte* image_buffer;
#ifdef HAVE_LIBLEPT
PIX* pix;
#endif
};
static inline native_data_t * get_native_data(JNIEnv *env, jobject object) {
return (native_data_t *)(env->GetIntField(object, field_mNativeData));
}
struct language_info_t {
language_info_t(char *lang, int shards) :
lang(strdup(lang)), shards(shards) { }
~language_info_t() { free(lang); }
language_info_t *next;
char *lang;
int shards;
};
static struct language_info_t *languages;
static int num_languages;
static language_info_t* find_language(const char *lang)
{
LOGV(__FUNCTION__);
language_info_t *trav = languages;
while (trav) {
if (!strcmp(trav->lang, lang)) {
return trav;
}
trav = trav->next;
}
return NULL;
}
static void add_language(char *lang, int shards)
{
LOGV(__FUNCTION__);
language_info_t *trav = find_language(lang);
if (trav) {
if (shards > trav->shards) {
LOGI("UPDATE LANG %s SHARDS %d", lang, shards);
trav->shards = shards;
}
return;
}
LOGI("ADD NEW LANG %s SHARDS %d", lang, shards);
trav = new language_info_t(lang, shards);
trav->next = languages;
languages = trav;
num_languages++;
}
static void free_languages()
{
LOGV(__FUNCTION__);
language_info_t *trav = languages, *old;
while (trav) {
old = trav;
LOGI("FREE LANG %s\n", trav->lang);
trav = trav->next;
delete old;
}
num_languages = 0;
}
static int get_num_languages() {
return num_languages;
}
static language_info_t *iter;
static language_info_t* language_iter_init()
{
iter = languages;
return iter;
}
static language_info_t* language_iter_next()
{
if (iter)
iter = iter->next;
return iter;
}
#if DEBUG
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#define FAILIF(cond, msg...) do { \
if (cond) { \
LOGE("%s(%d): ", __FILE__, __LINE__); \
LOGE(msg); \
return; \
} \
} while(0)
void test_ocr(const char *infile, int x, int y, int bpp,
const char *outfile, const char *lang,
const char *ratings, const char *tessdata)
{
void *buffer;
struct stat s;
int ifd, ofd;
LOGI("input file %s\n", infile);
ifd = open(infile, O_RDONLY);
FAILIF(ifd < 0, "open(%s): %s\n", infile, strerror(errno));
FAILIF(fstat(ifd, &s) < 0, "fstat(%d): %s\n", ifd, strerror(errno));
LOGI("file size %lld\n", s.st_size);
buffer = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, ifd, 0);
FAILIF(buffer == MAP_FAILED, "mmap(): %s\n", strerror(errno));
LOGI("infile mmapped at %p\n", buffer);
FAILIF(!tessdata, "You must specify a path for tessdata.\n");
tesseract::TessBaseAPI api;
LOGI("tessdata %s\n", tessdata);
LOGI("lang %s\n", lang);
FAILIF(api.Init(tessdata, lang), "could not initialize tesseract\n");
if (ratings) {
LOGI("ratings %s\n", ratings);
api.ReadConfigFile(ratings, false);
}
LOGI("set image x=%d, y=%d bpp=%d\n", x, y, bpp);
FAILIF(!bpp || bpp == 2 || bpp > 4,
"Invalid value %d of bpp\n", bpp);
api.SetImage((const unsigned char *)buffer, x, y, bpp, bpp*x);
LOGI("set rectangle to cover entire image\n");
api.SetRectangle(0, 0, x, y);
LOGI("set page seg mode to single character\n");
api.SetPageSegMode(tesseract::PSM_SINGLE_CHAR);
LOGI("recognize\n");
char * text = api.GetUTF8Text();
if (tessedit_write_images) {
page_image.write("tessinput.tif");
}
FAILIF(text == NULL, "didn't recognize\n");
FILE* fp = fopen(outfile, "w");
if (fp != NULL) {
LOGI("write to output %s\n", outfile);
fwrite(text, strlen(text), 1, fp);
fclose(fp);
}
else LOGI("could not write to output %s\n", outfile);
int mean_confidence = api.MeanTextConf();
LOGI("mean confidence: %d\n", mean_confidence);
int* confs = api.AllWordConfidences();
int len, *trav;
for (len = 0, trav = confs; *trav != -1; trav++, len++)
LOGI("confidence %d: %d\n", len, *trav);
free(confs);
LOGI("clearing api\n");
api.Clear();
LOGI("clearing adaptive classifier\n");
api.ClearAdaptiveClassifier();
LOGI("clearing text\n");
delete [] text;
}
#endif
jboolean
ocr_open(JNIEnv *env, jobject thiz, jstring lang)
{
LOGV(__FUNCTION__);
native_data_t *nat = get_native_data(env, thiz);
if (lang == NULL) {
LOGE("lang string is null!");
return JNI_FALSE;
}
const char *c_lang = env->GetStringUTFChars(lang, NULL);
if (c_lang == NULL) {
LOGE("could not extract lang string!");
return JNI_FALSE;
}
jboolean res = JNI_TRUE;
LOGI("lang %s\n", c_lang);
if (nat->api.Init(TESSBASE, c_lang)) {
LOGE("could not initialize tesseract!");
res = JNI_FALSE;
}
else {
LOGI("lang %s initialization complete\n", c_lang);
}
env->ReleaseStringUTFChars(lang, c_lang);
LOGI("successfully initialized tesseract!");
return res;
}
static void dump_debug_data(char *text)
{
#if DEBUG
if (tessedit_write_images) {
page_image.write(TESSBASE "tessinput.tif");
}
if (text) {
const char *outfile = TESSBASE "out.txt";
LOGI("write to output %s\n", outfile);
FILE* fp = fopen(outfile, "w");
if (fp != NULL) {
fwrite(text, strlen(text), 1, fp);
fclose(fp);
}
}
#endif
}
#ifdef HAVE_LIBLEPT
void
ocr_set_encoded(JNIEnv *env, jobject thiz,
jbyteArray image)
{
LOGV(__FUNCTION__);
LOGI("set encoded\n");
native_data_t *nat = get_native_data(env, thiz);
LOG_ASSERT(nat->image_obj == NULL && nat->image_buffer == NULL,
"image %p and/or image_buffer %p are not NULL!",
nat->image_obj,
nat->image_buffer);
nat->image_obj = (jbyteArray)env->NewGlobalRef(image);
nat->image_buffer = env->GetByteArrayElements(nat->image_obj, NULL);
LOG_ASSERT(nat->image_buffer != NULL, "image buffer is NULL!");
int length = env->GetArrayLength(image);
nat->pix = pixReadMem((const l_uint8 *)nat->image_buffer, length);
nat->api.SetImage(nat->pix);
}
void
ocr_normalize_bg(JNIEnv *env, jobject thiz,
jint reduction, jint size, jint bgval)
{
LOGV(__FUNCTION__);
LOGI("norm bg\n");
native_data_t *nat = get_native_data(env, thiz);
LOG_ASSERT(nat->pix != NULL, "pix %p is NULL!", nat->pix);
PIX *pixg;
pixg = pixConvertTo8(nat->pix, 0);
pixDestroy(&nat->pix);
nat->pix = pixBackgroundNormMorph(pixg, NULL, reduction, size, bgval);
pixDestroy(&pixg);
}
#endif
void
ocr_set_image(JNIEnv *env, jobject thiz,
jbyteArray image,
jint width, jint height,
jint bpp)
{
LOGV(__FUNCTION__);
LOGI("set image x=%d, y=%d, bpp=%d\n", width, height, bpp);
native_data_t *nat = get_native_data(env, thiz);
LOG_ASSERT(nat->image_obj == NULL && nat->image_buffer == NULL,
"image %p and/or image_buffer %p are not NULL!",
nat->image_obj,
nat->image_buffer);
nat->image_obj = (jbyteArray)env->NewGlobalRef(image);
nat->image_buffer = env->GetByteArrayElements(nat->image_obj, NULL);
LOG_ASSERT(nat->image_buffer != NULL, "image buffer is NULL!");
nat->api.SetImage((const unsigned char *)nat->image_buffer,
width, height, bpp, bpp*width);
}
void
ocr_release_image(JNIEnv *env, jobject thiz)
{
LOGV(__FUNCTION__);
native_data_t *nat = get_native_data(env, thiz);
if (nat->image_buffer != NULL) {
LOGI("releasing image buffer");
env->ReleaseByteArrayElements(nat->image_obj,
nat->image_buffer, JNI_ABORT);
env->DeleteGlobalRef(nat->image_obj);
nat->image_obj = NULL;
nat->image_buffer = NULL;
#ifdef HAVE_LIBLEPT
pixDestroy(&nat->pix);
nat->pix = NULL;
#endif
}
}
void
ocr_set_rectangle(JNIEnv *env, jobject thiz,
jint left, jint top,
jint width, jint height)
{
LOGV(__FUNCTION__);
// Restrict recognition to a sub-rectangle of the image. Call after SetImage.
// Each SetRectangle clears the recogntion results so multiple rectangles
// can be recognized with the same image.
native_data_t *nat = get_native_data(env, thiz);
LOGI("set rectangle left=%d, top=%d, width=%d, height=%d\n",
left, top, width, height);
LOG_ASSERT(nat->image_obj != NULL && nat->image_buffer != NULL,
"image and/or image_buffer are NULL!");
nat->api.SetRectangle(left, top, width, height);
}
jstring
ocr_recognize(JNIEnv *env, jobject thiz)
{
LOGV(__FUNCTION__);
native_data_t *nat = get_native_data(env, thiz);
LOG_ASSERT(nat->image_obj != NULL && nat->image_buffer != NULL,
"image and/or image_buffer are NULL!");
LOGI("BEFORE RECOGNIZE");
char * text = nat->api.GetUTF8Text();
LOGI("AFTER RECOGNIZE");
dump_debug_data(text);
// Will that work on a NULL?
return env->NewStringUTF(text);
}
static jint
ocr_mean_confidence(JNIEnv *env, jobject thiz)
{
LOGV(__FUNCTION__);
native_data_t *nat = get_native_data(env, thiz);
// Returns the (average) confidence value between 0 and 100.
return nat->api.MeanTextConf();
}
static jintArray
ocr_word_confidences(JNIEnv *env, jobject thiz)
{
LOGV(__FUNCTION__);
// Returns all word confidences (between 0 and 100) in an array, terminated
// by -1. The calling function must delete [] after use.
// The number of confidences should correspond to the number of space-
// delimited words in GetUTF8Text.
native_data_t *nat = get_native_data(env, thiz);
int* confs = nat->api.AllWordConfidences();
if (confs == NULL) {
LOGE("Could not get word-confidence values!");
return NULL;
}
int len, *trav;
for (len = 0, trav = confs; *trav != -1; trav++, len++);
LOG_ASSERT(confs != NULL, "Confidence array has %d elements",
len);
jintArray ret = env->NewIntArray(len);
LOG_ASSERT(ret != NULL,
"Could not create Java confidence array!");
env->SetIntArrayRegion(ret, 0, len, confs);
delete [] confs;
return ret;
}
static jboolean
ocr_set_variable(JNIEnv *env, jobject thiz,
jstring var, jstring value)
{
LOGV(__FUNCTION__);
// Set the value of an internal "variable" (of either old or new types).
// Supply the name of the variable and the value as a string, just as
// you would in a config file.
// Returns false if the name lookup failed.
// Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
// Or SetVariable("bln_numericmode", "1"); to set numeric-only mode.
// SetVariable may be used before Init, but settings will revert to
// defaults on End().
native_data_t *nat = get_native_data(env, thiz);
const char *c_var = env->GetStringUTFChars(var, NULL);
const char *c_value = env->GetStringUTFChars(value, NULL);
jboolean set = nat->api.SetVariable(c_var, c_value) ? JNI_TRUE : JNI_FALSE;
env->ReleaseStringUTFChars(var, c_var);
env->ReleaseStringUTFChars(value, c_value);
return set;
}
static void
ocr_clear_results(JNIEnv *env, jobject thiz)
{
LOGV(__FUNCTION__);
// Free up recognition results and any stored image data, without actually
// freeing any recognition data that would be time-consuming to reload.
// Afterwards, you must call SetImage or TesseractRect before doing
// any Recognize or Get* operation.
LOGI("releasing all memory");
native_data_t *nat = get_native_data(env, thiz);
nat->api.Clear();
// Call between pages or documents etc to free up memory and forget
// adaptive data.
LOGI("clearing adaptive classifier");
nat->api.ClearAdaptiveClassifier();
}
static void
ocr_close(JNIEnv *env, jobject thiz)
{
LOGV(__FUNCTION__);
// Close down tesseract and free up all memory. End() is equivalent to
// destructing and reconstructing your TessBaseAPI. Once End() has been
// used, none of the other API functions may be used other than Init and
// anything declared above it in the class definition.
native_data_t *nat = get_native_data(env, thiz);
nat->api.End();
}
static void
ocr_set_page_seg_mode(JNIEnv *env, jobject thiz, jint mode)
{
LOGV(__FUNCTION__);
native_data_t *nat = get_native_data(env, thiz);
nat->api.SetPageSegMode((tesseract::PageSegMode)mode);
}
static jobjectArray
ocr_get_languages(JNIEnv *env, jclass clazz)
{
LOGV(__FUNCTION__);
DIR *tessdata = opendir(TESSBASE "tessdata");
if (tessdata == NULL) {
LOGE("Could not open tessdata directory %s", TESSBASE "tessdata");
return NULL;
}
dirent *ent;
LOGI("readdir");
while ((ent = readdir(tessdata))) {
char *where, *stem;
int shard = -1;
if (ent->d_type == 0x08 &&
(where = strstr(ent->d_name, ".traineddata"))) {
*where = 0;
if (where != ent->d_name) {
where--; // skip the dot
while(where != ent->d_name) {
if(!isdigit(*where))
break;
where--; // it's a digit, backtrack
}
// we backtracked one too much
char *end = ++where;
// if there was a number, it will be written in
// shard, otherwise shard will remain -1.
sscanf(end, "%d", &shard);
*end = 0;
add_language(ent->d_name, shard + 1);
}
}
}
closedir(tessdata);
{
jclass stringClass = env->FindClass("java/lang/String");
jobjectArray langsArray =
env->NewObjectArray(get_num_languages(), stringClass, NULL);
LOG_ASSERT(langsArray != NULL,
"Could not create Java object array!");
int i = 0;
language_info_t *it = language_iter_init();
for (; it; i++, it = language_iter_next()) {
env->SetObjectArrayElement(langsArray, i,
env->NewStringUTF(it->lang));
}
return langsArray;
}
}
static jint
ocr_get_shards(JNIEnv *env, jclass clazz, jstring lang)
{
int ret = -1;
const char *c_lang = env->GetStringUTFChars(lang, NULL);
if (c_lang == NULL) {
LOGE("could not extract lang string!");
return ret;
}
language_info_t* lang_entry = find_language(c_lang);
if (lang_entry)
ret = lang_entry->shards;
LOGI("shards for lang %s: %d\n", c_lang, ret);
env->ReleaseStringUTFChars(lang, c_lang);
return ret;
}
static void class_init(JNIEnv* env, jclass clazz) {
LOGV(__FUNCTION__);
field_mNativeData = env->GetFieldID(clazz, "mNativeData", "I");
}
static void initialize_native_data(JNIEnv* env, jobject object) {
LOGV(__FUNCTION__);
native_data_t *nat = new native_data_t;
if (nat == NULL) {
LOGE("%s: out of memory!", __FUNCTION__);
return;
}
env->SetIntField(object, field_mNativeData, (jint)nat);
}
static void cleanup_native_data(JNIEnv* env, jobject object) {
LOGV(__FUNCTION__);
native_data_t *nat = get_native_data(env, object);
if (nat)
delete nat;
free_languages();
}
static JNINativeMethod methods[] = {
/* name, signature, funcPtr */
{"classInitNative", "()V", (void*)class_init},
{"initializeNativeDataNative", "()V", (void *)initialize_native_data},
{"cleanupNativeDataNative", "()V", (void *)cleanup_native_data},
{"openNative", "(Ljava/lang/String;)Z", (void*)ocr_open},
#ifdef HAVE_LIBLEPT
{"setEncodedNative", "([B)V", (void*)ocr_set_encoded},
{"normalizeBgNative", "(III)V", (void*)ocr_normalize_bg},
#endif
{"setImageNative", "([BIII)V", (void*)ocr_set_image},
{"releaseImageNative", "()V", (void*)ocr_release_image},
{"setRectangleNative", "(IIII)V", (void*)ocr_set_rectangle},
{"recognizeNative", "()Ljava/lang/String;", (void*)ocr_recognize},
{"clearResultsNative", "()V", (void*)ocr_clear_results},
{"closeNative", "()V", (void*)ocr_close},
{"meanConfidenceNative", "()I", (void*)ocr_mean_confidence},
{"wordConfidencesNative", "()[I", (void*)ocr_word_confidences},
{"setVariableNative", "(Ljava/lang/String;Ljava/lang/String;)Z", (void*)ocr_set_variable},
{"setPageSegModeNative", "(I)V", (void*)ocr_set_page_seg_mode},
{"getLanguagesNative", "()[Ljava/lang/String;", (void*)ocr_get_languages},
{"getShardsNative", "(Ljava/lang/String;)I", (void*)ocr_get_shards},
};
/*
* Register several native methods for one class.
*/
static int registerNativeMethods(JNIEnv* env, const char* className,
JNINativeMethod* gMethods, int numMethods)
{
jclass clazz = env->FindClass(className);
if (clazz == NULL) {
LOGE("Native registration unable to find class %s", className);
return JNI_FALSE;
}
if (env->RegisterNatives(clazz, gMethods, numMethods) < 0) {
LOGE("RegisterNatives failed for %s", className);
return JNI_FALSE;
}
return JNI_TRUE;
}
/*
* Set some test stuff up.
*
* Returns the JNI version on success, -1 on failure.
*/
typedef union {
JNIEnv* env;
void* venv;
} UnionJNIEnvToVoid;
jint JNI_OnLoad(JavaVM* vm, void* reserved)
{
UnionJNIEnvToVoid uenv;
uenv.venv = NULL;
JNIEnv* env = NULL;
if (vm->GetEnv(&uenv.venv, JNI_VERSION_1_4) != JNI_OK) {
LOGE("GetEnv failed\n");
return (jint)-1;
}
env = uenv.env;
assert(env != NULL);
LOGI("In OcrLib JNI_OnLoad\n");
if (JNI_FALSE ==
registerNativeMethods(env,
"com/android/ocr/OcrLib",
methods,
sizeof(methods) / sizeof(methods[0]))) {
LOGE("OcrLib native registration failed\n");
return (jint)-1;
}
/* success -- return valid version number */
LOGI("OcrLib native registration succeeded!\n");
return (jint)JNI_VERSION_1_4;
}