| #!/bin/bash |
| # File: runalltests.sh |
| # Description: Script to run a set of UNLV test sets. |
| # Author: Ray Smith |
| # Created: Thu Jun 14 08:21:01 PDT 2007 |
| # |
| # (C) Copyright 2007, Google Inc. |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| if [ $# -ne 2 ] |
| then |
| echo "Usage:$0 unlv-data-dir version-id" |
| exit 1 |
| fi |
| if [ ! -d ccmain ] |
| then |
| echo "Run $0 from the tesseract-ocr root directory!" |
| exit 1 |
| fi |
| if [ ! -r ccmain/tesseract -a ! -r tesseract.exe ] |
| then |
| echo "Please build tesseract before running $0" |
| exit 1 |
| fi |
| if [ ! -r testing/unlv/accuracy -a ! -r testing/unlv/accuracy.exe ] |
| then |
| echo "Please download the UNLV accuracy tools (and build) to testing/unlv" |
| exit 1 |
| fi |
| |
| #deltapc new old calculates the %change from old to new |
| deltapc() { |
| awk ' BEGIN { |
| printf("%.2f", 100.0*('$1'-'$2')/'$2'); |
| }' |
| } |
| |
| #timesum computes the total cpu time |
| timesum() { |
| awk ' BEGIN { |
| total = 0.0; |
| } |
| { |
| total += $2; |
| } |
| END { |
| printf("%.2f\n", total); |
| }' $1 |
| } |
| |
| imdir="$1" |
| vid="$2" |
| bindir=${0%/*} |
| if [ "$bindir" = "$0" ] |
| then |
| bindir="./" |
| fi |
| rdir=testing/reports |
| testsets="bus.3B doe3.3B mag.3B news.3B" |
| |
| totalerrs=0 |
| totalwerrs=0 |
| totalnswerrs=0 |
| totalolderrs=0 |
| totaloldwerrs=0 |
| totaloldnswerrs=0 |
| for set in $testsets |
| do |
| if [ -r $imdir/$set/pages ] |
| then |
| # Run tesseract on all the pages. |
| $bindir/runtestset.sh $imdir/$set/pages |
| # Count the errors on all the pages. |
| $bindir/counttestset.sh $imdir/$set/pages |
| # Get the old character word and nonstop word errors. |
| olderrs=`cat testing/reports/1995.$set.sum | cut -f3` |
| oldwerrs=`cat testing/reports/1995.$set.sum | cut -f6` |
| oldnswerrs=`cat testing/reports/1995.$set.sum | cut -f9` |
| # Get the new character word and nonstop word errors and accuracy. |
| cherrs=`head -4 testing/reports/$set.characc |tail -1 |cut -c1-9 | |
| tr -d '[:blank:]'` |
| chacc=`head -5 testing/reports/$set.characc |tail -1 |cut -c1-9 | |
| tr -d '[:blank:]'` |
| wderrs=`head -4 testing/reports/$set.wordacc |tail -1 |cut -c1-9 | |
| tr -d '[:blank:]'` |
| wdacc=`head -5 testing/reports/$set.wordacc |tail -1 |cut -c1-9 | |
| tr -d '[:blank:]'` |
| nswderrs=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 | |
| cut -c10-17 |tr -d '[:blank:]'` |
| nswdacc=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 | |
| cut -c19-26 |tr -d '[:blank:]'` |
| # Compute the percent change. |
| chdelta=`deltapc $cherrs $olderrs` |
| wdelta=`deltapc $wderrs $oldwerrs` |
| nswdelta=`deltapc $nswderrs $oldnswerrs` |
| sumfile=$rdir/$vid.$set.sum |
| if [ -r testing/reports/$set.times ] |
| then |
| total_time=`timesum testing/reports/$set.times` |
| if [ -r testing/reports/prev/$set.times ] |
| then |
| paste testing/reports/prev/$set.times testing/reports/$set.times | |
| awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >testing/reports/$set.timedelta |
| fi |
| else |
| total_time='0.0' |
| fi |
| echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\ |
| $wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >$sumfile |
| # Sum totals over all the testsets. |
| let totalerrs=totalerrs+cherrs |
| let totalwerrs=totalwerrs+wderrs |
| let totalnswerrs=totalnswerrs+nswderrs |
| let totalolderrs=totalolderrs+olderrs |
| let totaloldwerrs=totaloldwerrs+oldwerrs |
| let totaloldnswerrs=totaloldnswerrs+oldnswerrs |
| fi |
| done |
| # Compute grand total percent change. |
| chdelta=`deltapc $totalerrs $totalolderrs` |
| wdelta=`deltapc $totalwerrs $totaloldwerrs` |
| nswdelta=`deltapc $totalnswerrs $totaloldnswerrs ` |
| tfile=$rdir/$vid.total.sum |
| echo "$vid Total $totalerrs - $chdelta% $totalwerrs\ |
| - $wdelta% $totalnswerrs - $nswdelta%" >$tfile |
| cat $rdir/1995.*.sum $rdir/$vid.*.sum >$rdir/$vid.summary |