mirror of
https://github.com/adaptech-cz/Tesseract4Android.git
synced 2026-01-09 06:12:45 +08:00
Update Tesseract to 5.3.2
This commit is contained in:
parent
66bade85ce
commit
67230142bb
@ -2,6 +2,9 @@ Amit Dovev <amitdev2222@gmail.com>
|
||||
|
||||
Egor Pugin <egor.pugin@gmail.com>
|
||||
|
||||
Jeff Breidenbach <breidenbach@gmail.com>
|
||||
Jeff Breidenbach <breidenbach@gmail.com> <jbreiden@google.com>
|
||||
|
||||
Jim O'Regan <joregan@gmail.com>
|
||||
Jim O'Regan <joregan@gmail.com> <joregan@gmail.com@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
|
||||
Jim O'Regan <joregan@gmail.com> <joregan@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
|
||||
|
||||
@ -96,6 +96,7 @@ option(ENABLE_NATIVE
|
||||
option(BUILD_TRAINING_TOOLS "Build training tools" ON)
|
||||
option(BUILD_TESTS "Build tests" OFF)
|
||||
option(USE_SYSTEM_ICU "Use system ICU" OFF)
|
||||
option(DISABLE_TIFF "Disable build with libtiff (if available)" OFF)
|
||||
option(DISABLE_ARCHIVE "Disable build with libarchive (if available)" OFF)
|
||||
option(DISABLE_CURL "Disable build with libcurl (if available)" OFF)
|
||||
option(INSTALL_CONFIGS "Install tesseract configs" ON)
|
||||
@ -323,7 +324,7 @@ if(OPENMP_BUILD)
|
||||
if(NOT OpenMP_FOUND
|
||||
AND CLANG
|
||||
AND WIN32)
|
||||
# workaroung because find_package(OpenMP) does not work for clang-cl
|
||||
# workaround because find_package(OpenMP) does not work for clang-cl
|
||||
# https://gitlab.kitware.com/cmake/cmake/issues/19404
|
||||
check_include_file_cxx(omp.h HAVE_OMP_H_INCLUDE)
|
||||
find_library(OpenMP_LIBRARY NAMES omp libomp.lib)
|
||||
@ -371,6 +372,7 @@ add_definitions("-DCMAKE_BUILD")
|
||||
# packages
|
||||
#
|
||||
# ##############################################################################
|
||||
include(CheckFunctions)
|
||||
|
||||
if(SW_BUILD)
|
||||
find_package(SW REQUIRED)
|
||||
@ -397,21 +399,37 @@ else()
|
||||
endif()
|
||||
if(NOT Leptonica_FOUND)
|
||||
message(FATAL_ERROR "Cannot find required library Leptonica. Quitting!")
|
||||
else()
|
||||
message(STATUS "Found leptonica version: ${Leptonica_VERSION}")
|
||||
endif(NOT Leptonica_FOUND)
|
||||
include_directories(${Leptonica_INCLUDE_DIRS})
|
||||
|
||||
# Check for optional libraries.
|
||||
find_package(TIFF) # for tesseract
|
||||
if(NOT TIFF_FOUND AND PKG_CONFIG_EXECUTABLE)
|
||||
# try PKG_CONFIG to find libtiff if cmake failed
|
||||
pkg_check_modules(TIFF libtiff-4)
|
||||
check_leptonica_tiff_support()
|
||||
if (NOT LEPT_TIFF_RESULT EQUAL 0)
|
||||
message(NOTICE "Leptonica was build without TIFF support! Disabling TIFF support...")
|
||||
set(DISABLE_TIFF ON)
|
||||
else()
|
||||
message(STATUS "Leptonica was build with TIFF support.")
|
||||
endif()
|
||||
if(TIFF_FOUND)
|
||||
set(HAVE_TIFFIO_H ON)
|
||||
include_directories(${TIFF_INCLUDE_DIRS})
|
||||
endif(TIFF_FOUND)
|
||||
|
||||
# Check for optional libraries.
|
||||
if(DISABLE_TIFF)
|
||||
set(HAVE_TIFFIO_H OFF)
|
||||
message(STATUS "TIFF support disabled.")
|
||||
else(DISABLE_TIFF)
|
||||
find_package(TIFF) # for tesseract
|
||||
if(NOT TIFF_FOUND AND PKG_CONFIG_EXECUTABLE)
|
||||
# try PKG_CONFIG to find libtiff if cmake failed
|
||||
pkg_check_modules(TIFF libtiff-4)
|
||||
endif()
|
||||
if(TIFF_FOUND)
|
||||
set(HAVE_TIFFIO_H ON)
|
||||
include_directories(${TIFF_INCLUDE_DIRS})
|
||||
endif(TIFF_FOUND)
|
||||
endif(DISABLE_TIFF)
|
||||
if(DISABLE_ARCHIVE)
|
||||
set(HAVE_LIBARCHIVE OFF)
|
||||
message(STATUS "LibArchive support disabled.")
|
||||
else(DISABLE_ARCHIVE)
|
||||
find_package(LibArchive)
|
||||
if(NOT LibArchive_FOUND AND PKG_CONFIG_EXECUTABLE)
|
||||
@ -425,6 +443,7 @@ else()
|
||||
endif(DISABLE_ARCHIVE)
|
||||
if(DISABLE_CURL)
|
||||
set(HAVE_LIBCURL OFF)
|
||||
message(STATUS "CURL support disabled.")
|
||||
else(DISABLE_CURL)
|
||||
find_package(CURL)
|
||||
if(NOT CURL_FOUND AND PKG_CONFIG_EXECUTABLE)
|
||||
|
||||
@ -1,3 +1,20 @@
|
||||
2023-07-11 - V5.3.2
|
||||
* Updates for snap package building.
|
||||
* Support for Sgaw and W Pwo Karen languages in the Myanmar validator (#4065).
|
||||
* Improve format of logging from lstmtraining.
|
||||
* Use less digits in filenames of checkpoints written by lstmtraining.
|
||||
* Replace deprecated sprintf.
|
||||
* Remove unused code in function fix_rep_char.
|
||||
* Avoid 32 bit overflow in multiplication (fixes 3 CodeQL CI alerts).
|
||||
* Avoid conversions from std::string to char* to std::string.
|
||||
* Abort with error message if OSD is requested with LSTM-only model.
|
||||
* cmake: allow to disable tiff (-DDISABLE_TIFF=ON).
|
||||
* cmake: provide info about disabled LibArchive and CURL.
|
||||
* cmake: check if leptonica was build with tiff support.
|
||||
* Remove old broken GitHub action vcpkg-4.1.1 (fixes issue #4078).
|
||||
* Create config.yml.
|
||||
* Fix typos.
|
||||
|
||||
2023-04-01 - V5.3.1
|
||||
* Bug fixes for some special scenarios:
|
||||
* Fix issue #4010.
|
||||
|
||||
@ -734,10 +734,15 @@ bin_PROGRAMS = tesseract
|
||||
tesseract_SOURCES = src/tesseract.cpp
|
||||
tesseract_CPPFLAGS =
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/arch
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccmain
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccstruct
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/classify
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/cutil
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/dict
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/textord
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/viewer
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/wordrec
|
||||
if OPENCL
|
||||
tesseract_CPPFLAGS += -I$(top_srcdir)/src/opencl
|
||||
endif
|
||||
|
||||
@ -1 +1 @@
|
||||
5.3.1
|
||||
5.3.2
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
# use this file except in compliance with the License. You may obtain a copy of
|
||||
# the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||
# applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the specific language
|
||||
# governing permissions and limitations under the License.
|
||||
# ##############################################################################
|
||||
#
|
||||
# macros and functions
|
||||
#
|
||||
# ##############################################################################
|
||||
|
||||
# ##############################################################################
|
||||
# FUNCTION check_leptonica_tiff_support
|
||||
# ##############################################################################
|
||||
function(check_leptonica_tiff_support)
|
||||
# check if leptonica was build with tiff support set result to
|
||||
# LEPT_TIFF_RESULT
|
||||
set(TIFF_TEST
|
||||
"#include <leptonica/allheaders.h>\n"
|
||||
"int main() {\n"
|
||||
" l_uint8 *data = NULL;\n"
|
||||
" size_t size = 0;\n"
|
||||
" PIX* pix = pixCreate(3, 3, 4);\n"
|
||||
" l_int32 ret_val = pixWriteMemTiff(&data, &size, pix, IFF_TIFF_G3);\n"
|
||||
" pixDestroy(&pix);\n"
|
||||
" lept_free(data);\n"
|
||||
" return ret_val;}\n")
|
||||
set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
|
||||
try_run(
|
||||
LEPT_TIFF_RESULT
|
||||
LEPT_TIFF_COMPILE
|
||||
SOURCE_FROM_CONTENT tiff_test.cpp "${TIFF_TEST}"
|
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${Leptonica_INCLUDE_DIRS}"
|
||||
LINK_LIBRARIES ${Leptonica_LIBRARIES}
|
||||
COMPILE_OUTPUT_VARIABLE
|
||||
COMPILE_OUTPUT)
|
||||
if(NOT LEPT_TIFF_COMPILE)
|
||||
message(STATUS "COMPILE_OUTPUT: ${COMPILE_OUTPUT}")
|
||||
message(STATUS "Leptonica_INCLUDE_DIRS: ${Leptonica_INCLUDE_DIRS}")
|
||||
message(STATUS "Leptonica_LIBRARIES: ${Leptonica_LIBRARIES}")
|
||||
message(STATUS "LEPT_TIFF_RESULT: ${LEPT_TIFF_RESULT}")
|
||||
message(STATUS "LEPT_TIFF_COMPILE: ${LEPT_TIFF_COMPILE}")
|
||||
message(WARNING "Failed to compile test")
|
||||
endif()
|
||||
endfunction(check_leptonica_tiff_support)
|
||||
|
||||
# ##############################################################################
|
||||
@ -29,7 +29,7 @@ AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc])
|
||||
# Define date of package, etc. Could be useful in auto-generated
|
||||
# documentation.
|
||||
PACKAGE_YEAR=2023
|
||||
PACKAGE_DATE="04/01"
|
||||
PACKAGE_DATE="07/11"
|
||||
|
||||
abs_top_srcdir=`AS_DIRNAME([$0])`
|
||||
|
||||
|
||||
@ -13,13 +13,13 @@ description: |
|
||||
|
||||
grade: stable # must be 'stable' to release into candidate/stable channels
|
||||
confinement: strict
|
||||
base: core22
|
||||
|
||||
apps:
|
||||
tesseract:
|
||||
command: >
|
||||
env
|
||||
TESSDATA_PREFIX=$SNAP_USER_COMMON
|
||||
tesseract
|
||||
command: usr/local/bin/tesseract
|
||||
environment:
|
||||
TESSDATA_PREFIX: $SNAP_USER_COMMON
|
||||
plugs:
|
||||
- home
|
||||
- removable-media
|
||||
@ -30,9 +30,9 @@ parts:
|
||||
plugin: autotools
|
||||
build-packages:
|
||||
- pkg-config
|
||||
- libpng12-dev
|
||||
- libjpeg8-dev
|
||||
- libtiff5-dev
|
||||
- libpng-dev
|
||||
- libjpeg-dev
|
||||
- libtiff-dev
|
||||
- zlib1g-dev
|
||||
- libicu-dev
|
||||
- libpango1.0-dev
|
||||
@ -41,7 +41,7 @@ parts:
|
||||
- libgomp1
|
||||
after: [leptonica]
|
||||
leptonica:
|
||||
source: https://github.com/DanBloomberg/leptonica/archive/1.74.2.tar.gz
|
||||
source: https://github.com/DanBloomberg/leptonica/archive/1.83.1.tar.gz
|
||||
plugin: autotools
|
||||
stage-packages:
|
||||
- libjbig0
|
||||
|
||||
@ -412,7 +412,7 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
|
||||
if (data_size != 0) {
|
||||
mgr.LoadMemBuffer(language, data, data_size);
|
||||
}
|
||||
if (tesseract_->init_tesseract(datapath.c_str(), output_file_.c_str(), language, oem, configs,
|
||||
if (tesseract_->init_tesseract(datapath, output_file_, language, oem, configs,
|
||||
configs_size, vars_vec, vars_values, set_only_non_debug_params,
|
||||
&mgr) != 0) {
|
||||
return -1;
|
||||
@ -2176,7 +2176,7 @@ int TessBaseAPI::FindLines() {
|
||||
" but data path is undefined\n");
|
||||
delete osd_tesseract_;
|
||||
osd_tesseract_ = nullptr;
|
||||
} else if (osd_tesseract_->init_tesseract(datapath_.c_str(), "", "osd", OEM_TESSERACT_ONLY,
|
||||
} else if (osd_tesseract_->init_tesseract(datapath_, "", "osd", OEM_TESSERACT_ONLY,
|
||||
nullptr, 0, nullptr, nullptr, false, &mgr) == 0) {
|
||||
osd_tess = osd_tesseract_;
|
||||
osd_tesseract_->set_source_resolution(thresholder_->GetSourceYResolution());
|
||||
|
||||
@ -1684,18 +1684,6 @@ void Tesseract::fix_rep_char(PAGE_RES_IT *page_res_it) {
|
||||
}
|
||||
word_res->done = true;
|
||||
|
||||
// Measure the mean space.
|
||||
int gap_count = 0;
|
||||
WERD *werd = word_res->word;
|
||||
C_BLOB_IT blob_it(werd->cblob_list());
|
||||
C_BLOB *prev_blob = blob_it.data();
|
||||
for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
|
||||
C_BLOB *blob = blob_it.data();
|
||||
int gap = blob->bounding_box().left();
|
||||
gap -= prev_blob->bounding_box().right();
|
||||
++gap_count;
|
||||
prev_blob = blob;
|
||||
}
|
||||
// Just correct existing classification.
|
||||
CorrectRepcharChoices(best_choice, word_res);
|
||||
word_res->reject_map.initialise(word.length());
|
||||
|
||||
@ -460,7 +460,7 @@ ScriptDetector::ScriptDetector(const std::vector<int> *allowed_scripts, OSResult
|
||||
// adding this blob.
|
||||
void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
bool done[kMaxNumberOfScripts] = {false};
|
||||
std::vector<bool> done(kMaxNumberOfScripts);
|
||||
|
||||
BLOB_CHOICE_IT choice_it;
|
||||
choice_it.set_to_list(scores + i);
|
||||
@ -488,7 +488,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
}
|
||||
}
|
||||
// Script already processed before.
|
||||
if (done[id]) {
|
||||
if (done.at(id)) {
|
||||
continue;
|
||||
}
|
||||
done[id] = true;
|
||||
|
||||
@ -32,7 +32,7 @@
|
||||
# include "svmnode.h" // for SVMenuNode
|
||||
# include "tesseractclass.h" // for Tesseract
|
||||
|
||||
# include <cstdio> // for fclose, fopen, fprintf, sprintf, FILE
|
||||
# include <cstdio> // for fclose, fopen, fprintf, FILE
|
||||
# include <cstdlib> // for atoi
|
||||
# include <cstring> // for strcmp, strcspn, strlen, strncpy
|
||||
# include <locale> // for std::locale::classic
|
||||
@ -319,16 +319,12 @@ ParamsEditor::ParamsEditor(tesseract::Tesseract *tess, ScrollView *sv) {
|
||||
// Write all (changed_) parameters to a config file.
|
||||
void ParamsEditor::WriteParams(char *filename, bool changes_only) {
|
||||
FILE *fp; // input file
|
||||
char msg_str[255];
|
||||
// if file exists
|
||||
if ((fp = fopen(filename, "rb")) != nullptr) {
|
||||
fclose(fp);
|
||||
sprintf(msg_str,
|
||||
"Overwrite file "
|
||||
"%s"
|
||||
"? (Y/N)",
|
||||
filename);
|
||||
int a = sv_window_->ShowYesNoDialog(msg_str);
|
||||
std::stringstream msg;
|
||||
msg << "Overwrite file " << filename << "? (Y/N)";
|
||||
int a = sv_window_->ShowYesNoDialog(msg.str().c_str());
|
||||
if (a == 'n') {
|
||||
return;
|
||||
} // don't write
|
||||
|
||||
@ -36,6 +36,9 @@
|
||||
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <iomanip> // for std::setprecision
|
||||
#include <locale> // for std::locale::classic
|
||||
#include <sstream> // for std::stringstream
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
namespace tesseract {
|
||||
@ -140,32 +143,30 @@ static void show_point(PAGE_RES *page_res, float x, float y) {
|
||||
FCOORD pt(x, y);
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
|
||||
const int kBufsize = 512;
|
||||
char msg[kBufsize];
|
||||
char *msg_ptr = msg;
|
||||
|
||||
msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y);
|
||||
std::stringstream msg;
|
||||
msg.imbue(std::locale::classic());
|
||||
msg << std::fixed << std::setprecision(3) << "Pt:(" << x << ", " << y << ") ";
|
||||
|
||||
for (WERD_RES *word = pr_it.word(); word != nullptr; word = pr_it.forward()) {
|
||||
if (pr_it.row() != pr_it.prev_row() && pr_it.row()->row->bounding_box().contains(pt)) {
|
||||
msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", pr_it.row()->row->base_line(x));
|
||||
msg << "BL(x)=" << pr_it.row()->row->base_line(x) << ' ';
|
||||
}
|
||||
if (word->word->bounding_box().contains(pt)) {
|
||||
TBOX box = word->word->bounding_box();
|
||||
msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(),
|
||||
box.top());
|
||||
msg << "Wd(" << box.left() << ", " << box.bottom() << ")/("
|
||||
<< box.right() << ", " << box.top() << ") ";
|
||||
C_BLOB_IT cblob_it(word->word->cblob_list());
|
||||
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
|
||||
C_BLOB *cblob = cblob_it.data();
|
||||
box = cblob->bounding_box();
|
||||
if (box.contains(pt)) {
|
||||
msg_ptr += sprintf(msg_ptr, "CBlb(%d, %d)/(%d, %d) ", box.left(), box.bottom(),
|
||||
box.right(), box.top());
|
||||
msg << "CBlb(" << box.left() << ", " << box.bottom() << ")/("
|
||||
<< box.right() << ", " << box.top() << ") ";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
image_win->AddMessage(msg);
|
||||
image_win->AddMessage(msg.str().c_str());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -622,7 +623,7 @@ void Tesseract::process_image_event( // action in image win
|
||||
break;
|
||||
|
||||
default:
|
||||
sprintf(msg, "Mode %d not yet implemented", mode);
|
||||
snprintf(msg, sizeof(msg), "Mode %d not yet implemented", mode);
|
||||
image_win->AddMessage(msg);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int64_t DPPoint::CostWithVariance(const DPPoint *prev) {
|
||||
int delta = this - prev;
|
||||
int32_t n = prev->n_ + 1;
|
||||
int32_t sig_x = prev->sig_x_ + delta;
|
||||
int64_t sig_xsq = prev->sig_xsq_ + delta * delta;
|
||||
int64_t sig_xsq = prev->sig_xsq_ + static_cast<int64_t>(delta) * delta;
|
||||
int64_t cost = (sig_xsq - sig_x * sig_x / n) / n;
|
||||
cost += prev->total_cost_;
|
||||
UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq);
|
||||
|
||||
@ -22,6 +22,8 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iostream> // for std::cerr
|
||||
#include <sstream> // for std::stringstream
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -41,37 +43,26 @@ void ERRCODE::error( // handle error
|
||||
const char *format, ... // special message
|
||||
) const {
|
||||
va_list args; // variable args
|
||||
char msg[MAX_MSG];
|
||||
char *msgptr = msg;
|
||||
std::stringstream msg;
|
||||
|
||||
if (caller != nullptr) {
|
||||
// name of caller
|
||||
msgptr += sprintf(msgptr, "%s:", caller);
|
||||
msg << caller << ':';
|
||||
}
|
||||
// actual message
|
||||
msgptr += sprintf(msgptr, "Error:%s", message);
|
||||
msg << "Error:" << message;
|
||||
if (format != nullptr) {
|
||||
msgptr += sprintf(msgptr, ":");
|
||||
char str[MAX_MSG];
|
||||
va_start(args, format); // variable list
|
||||
#ifdef _WIN32
|
||||
// print remainder
|
||||
msgptr += _vsnprintf(msgptr, MAX_MSG - 2 - (msgptr - msg), format, args);
|
||||
msg[MAX_MSG - 2] = '\0'; // ensure termination
|
||||
strcat(msg, "\n");
|
||||
#else
|
||||
// print remainder
|
||||
msgptr += vsprintf(msgptr, format, args);
|
||||
// no specific
|
||||
msgptr += sprintf(msgptr, "\n");
|
||||
#endif
|
||||
// print remainder
|
||||
std::vsnprintf(str, sizeof(str), format, args);
|
||||
// ensure termination
|
||||
str[sizeof(str) - 1] = '\0';
|
||||
va_end(args);
|
||||
} else {
|
||||
// no specific
|
||||
msgptr += sprintf(msgptr, "\n");
|
||||
msg << ':' << str;
|
||||
}
|
||||
|
||||
// %s is needed here so msg is printed correctly!
|
||||
fprintf(stderr, "%s", msg);
|
||||
std::cerr << msg.str() << '\n';
|
||||
|
||||
switch (action) {
|
||||
case DBG:
|
||||
|
||||
@ -314,10 +314,10 @@ std::string UNICHARSET::debug_utf8_str(const char *str) {
|
||||
step = UNICHAR::utf8_step(str + i);
|
||||
if (step == 0) {
|
||||
step = 1;
|
||||
sprintf(hex, "%x", str[i]);
|
||||
snprintf(hex, sizeof(hex), "%x", str[i]);
|
||||
} else {
|
||||
UNICHAR ch(str + i, step);
|
||||
sprintf(hex, "%x", ch.first_uni());
|
||||
snprintf(hex, sizeof(hex), "%x", ch.first_uni());
|
||||
}
|
||||
result += hex;
|
||||
result += " ";
|
||||
|
||||
@ -812,7 +812,8 @@ int OpenclDevice::BinaryGenerated(const char *clFileName, FILE **fhandle) {
|
||||
cl_int clStatus;
|
||||
int status = 0;
|
||||
FILE *fd = nullptr;
|
||||
char fileName[256] = {0}, cl_name[128] = {0};
|
||||
char fileName[256];
|
||||
char cl_name[128];
|
||||
char deviceName[1024];
|
||||
clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName,
|
||||
nullptr);
|
||||
@ -820,7 +821,7 @@ int OpenclDevice::BinaryGenerated(const char *clFileName, FILE **fhandle) {
|
||||
const char *str = strstr(clFileName, ".cl");
|
||||
memcpy(cl_name, clFileName, str - clFileName);
|
||||
cl_name[str - clFileName] = '\0';
|
||||
sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
|
||||
snprintf(fileName, sizeof(fileName), "%s-%s.bin", cl_name, deviceName);
|
||||
legalizeFileName(fileName);
|
||||
fd = fopen(fileName, "rb");
|
||||
status = (fd != nullptr) ? 1 : 0;
|
||||
@ -894,9 +895,9 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, const char *clF
|
||||
|
||||
/* dump out each binary into its own separate file. */
|
||||
for (i = 0; i < numDevices; i++) {
|
||||
char fileName[256] = {0}, cl_name[128] = {0};
|
||||
|
||||
if (binarySizes[i] != 0) {
|
||||
char fileName[256];
|
||||
char cl_name[128];
|
||||
char deviceName[1024];
|
||||
clStatus =
|
||||
clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
|
||||
@ -905,7 +906,7 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, const char *clF
|
||||
const char *str = strstr(clFileName, ".cl");
|
||||
memcpy(cl_name, clFileName, str - clFileName);
|
||||
cl_name[str - clFileName] = '\0';
|
||||
sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
|
||||
snprintf(fileName, sizeof(fileName), "%s-%s.bin", cl_name, deviceName);
|
||||
legalizeFileName(fileName);
|
||||
if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
|
||||
tprintf("[OD] write binary[%s] failed\n", fileName);
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
#endif
|
||||
#include <tesseract/renderer.h>
|
||||
#include "simddetect.h"
|
||||
#include "tesseractclass.h" // for AnyTessLang
|
||||
#include "tprintf.h" // for tprintf
|
||||
|
||||
#ifdef _OPENMP
|
||||
@ -787,6 +788,12 @@ int main(int argc, char **argv) {
|
||||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
|
||||
|
||||
if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY) {
|
||||
if (!api.tesseract()->AnyTessLang()) {
|
||||
fprintf(stderr, "Error, OSD requires a model for the legacy engine\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
#ifdef DISABLED_LEGACY_ENGINE
|
||||
auto cur_psm = api.GetPageSegMode();
|
||||
auto osd_warning = std::string("");
|
||||
|
||||
@ -174,7 +174,7 @@ int32_t OL_BUCKETS::outline_complexity(C_OUTLINE *outline, // parent outline
|
||||
if (child_count + grandchild_count > max_count) { // too complex
|
||||
if (edges_debug) {
|
||||
tprintf(
|
||||
"Disgard outline on child_count=%d + grandchild_count=%d "
|
||||
"Discard outline on child_count=%d + grandchild_count=%d "
|
||||
"> max_count=%d\n",
|
||||
child_count, grandchild_count, max_count);
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <cerrno>
|
||||
#include <locale> // for std::locale::classic
|
||||
#if defined(__USE_GNU)
|
||||
# include <cfenv> // for feenableexcept
|
||||
#endif
|
||||
@ -222,9 +223,10 @@ int main(int argc, char **argv) {
|
||||
iteration = trainer.training_iteration()) {
|
||||
trainer.TrainOnLine(&trainer, false);
|
||||
}
|
||||
std::string log_str;
|
||||
std::stringstream log_str;
|
||||
log_str.imbue(std::locale::classic());
|
||||
trainer.MaintainCheckpoints(tester_callback, log_str);
|
||||
tprintf("%s\n", log_str.c_str());
|
||||
tprintf("%s\n", log_str.str().c_str());
|
||||
} while (trainer.best_error_rate() > FLAGS_target_error_rate &&
|
||||
(trainer.training_iteration() < max_iterations));
|
||||
tprintf("Finished! Selected model with minimal training error rate (BCER) = %g\n",
|
||||
|
||||
@ -278,8 +278,8 @@ bool BoxChar::MostlyVertical(const std::vector<BoxChar *> &boxes) {
|
||||
int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
|
||||
int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
|
||||
if (abs(dx) > abs(dy) * kMinNewlineRatio || abs(dy) > abs(dx) * kMinNewlineRatio) {
|
||||
total_dx += dx * dx;
|
||||
total_dy += dy * dy;
|
||||
total_dx += static_cast<int64_t>(dx) * dx;
|
||||
total_dy += static_cast<int64_t>(dy) * dy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "lstmtester.h"
|
||||
#include <iomanip> // for std::setprecision
|
||||
#include <thread> // for std::thread
|
||||
#include "fileio.h" // for LoadFileLinesToStrings
|
||||
|
||||
@ -115,14 +116,15 @@ std::string LSTMTester::RunEvalSync(int iteration, const double *training_errors
|
||||
}
|
||||
char_error *= 100.0 / total_pages_;
|
||||
word_error *= 100.0 / total_pages_;
|
||||
std::string result;
|
||||
std::stringstream result;
|
||||
result.imbue(std::locale::classic());
|
||||
result << std::fixed << std::setprecision(3);
|
||||
if (iteration != 0 || training_stage != 0) {
|
||||
result += "At iteration " + std::to_string(iteration);
|
||||
result += ", stage " + std::to_string(training_stage) + ", ";
|
||||
result << "At iteration " << iteration
|
||||
<< ", stage " << training_stage << ", ";
|
||||
}
|
||||
result += "BCER eval=" + std::to_string(char_error);
|
||||
result += ", BWER eval=" + std::to_string(word_error);
|
||||
return result;
|
||||
result << "BCER eval=" << char_error << ", BWER eval=" << word_error;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
// Helper thread function for RunEvalAsync.
|
||||
|
||||
@ -23,6 +23,8 @@
|
||||
#endif
|
||||
|
||||
#include <cmath>
|
||||
#include <iomanip> // for std::setprecision
|
||||
#include <locale> // for std::locale::classic
|
||||
#include <string>
|
||||
#include "lstmtrainer.h"
|
||||
|
||||
@ -305,7 +307,7 @@ bool LSTMTrainer::LoadAllTrainingData(const std::vector<std::string> &filenames,
|
||||
// Writes checkpoints at appropriate times and builds and returns a log message
|
||||
// to indicate progress. Returns false if nothing interesting happened.
|
||||
bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
|
||||
std::string &log_msg) {
|
||||
std::stringstream &log_msg) {
|
||||
PrepareLogMsg(log_msg);
|
||||
double error_rate = CharError();
|
||||
int iteration = learning_iteration();
|
||||
@ -330,35 +332,34 @@ bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
|
||||
std::vector<char> rec_model_data;
|
||||
if (error_rate < best_error_rate_) {
|
||||
SaveRecognitionDump(&rec_model_data);
|
||||
log_msg += " New best BCER = " + std::to_string(error_rate);
|
||||
log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
|
||||
log_msg << " New best BCER = " << error_rate;
|
||||
log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
|
||||
// If sub_trainer_ is not nullptr, either *this beat it to a new best, or it
|
||||
// just overwrote *this. In either case, we have finished with it.
|
||||
sub_trainer_.reset();
|
||||
stall_iteration_ = learning_iteration() + kMinStallIterations;
|
||||
if (TransitionTrainingStage(kStageTransitionThreshold)) {
|
||||
log_msg +=
|
||||
" Transitioned to stage " + std::to_string(CurrentTrainingStage());
|
||||
log_msg << " Transitioned to stage " << CurrentTrainingStage();
|
||||
}
|
||||
SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);
|
||||
if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
|
||||
std::string best_model_name = DumpFilename();
|
||||
if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) {
|
||||
log_msg += " failed to write best model:";
|
||||
log_msg << " failed to write best model:";
|
||||
} else {
|
||||
log_msg += " wrote best model:";
|
||||
log_msg << " wrote best model:";
|
||||
error_rate_of_last_saved_best_ = best_error_rate_;
|
||||
}
|
||||
log_msg += best_model_name;
|
||||
log_msg << best_model_name;
|
||||
}
|
||||
} else if (error_rate > worst_error_rate_) {
|
||||
SaveRecognitionDump(&rec_model_data);
|
||||
log_msg += " New worst BCER = " + std::to_string(error_rate);
|
||||
log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
|
||||
log_msg << " New worst BCER = " << error_rate;
|
||||
log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
|
||||
if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&
|
||||
best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
|
||||
// Error rate has ballooned. Go back to the best model.
|
||||
log_msg += "\nDivergence! ";
|
||||
log_msg << "\nDivergence! ";
|
||||
// Copy best_trainer_ before reading it, as it will get overwritten.
|
||||
std::vector<char> revert_data(best_trainer_);
|
||||
if (ReadTrainingDump(revert_data, *this)) {
|
||||
@ -382,34 +383,33 @@ bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
|
||||
std::vector<char> checkpoint;
|
||||
if (!SaveTrainingDump(FULL, *this, &checkpoint) ||
|
||||
!SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
|
||||
log_msg += " failed to write checkpoint.";
|
||||
log_msg << " failed to write checkpoint.";
|
||||
} else {
|
||||
log_msg += " wrote checkpoint.";
|
||||
log_msg << " wrote checkpoint.";
|
||||
}
|
||||
}
|
||||
log_msg += "\n";
|
||||
return result;
|
||||
}
|
||||
|
||||
// Builds a string containing a progress message with current error rates.
|
||||
void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const {
|
||||
void LSTMTrainer::PrepareLogMsg(std::stringstream &log_msg) const {
|
||||
LogIterations("At", log_msg);
|
||||
log_msg += ", Mean rms=" + std::to_string(error_rates_[ET_RMS]);
|
||||
log_msg += "%, delta=" + std::to_string(error_rates_[ET_DELTA]);
|
||||
log_msg += "%, BCER train=" + std::to_string(error_rates_[ET_CHAR_ERROR]);
|
||||
log_msg += "%, BWER train=" + std::to_string(error_rates_[ET_WORD_RECERR]);
|
||||
log_msg += "%, skip ratio=" + std::to_string(error_rates_[ET_SKIP_RATIO]);
|
||||
log_msg += "%, ";
|
||||
log_msg << std::fixed << std::setprecision(3)
|
||||
<< ", mean rms=" << error_rates_[ET_RMS]
|
||||
<< "%, delta=" << error_rates_[ET_DELTA]
|
||||
<< "%, BCER train=" << error_rates_[ET_CHAR_ERROR]
|
||||
<< "%, BWER train=" << error_rates_[ET_WORD_RECERR]
|
||||
<< "%, skip ratio=" << error_rates_[ET_SKIP_RATIO] << "%,";
|
||||
}
|
||||
|
||||
// Appends <intro_str> iteration learning_iteration()/training_iteration()/
|
||||
// sample_iteration() to the log_msg.
|
||||
void LSTMTrainer::LogIterations(const char *intro_str,
|
||||
std::string &log_msg) const {
|
||||
log_msg += intro_str;
|
||||
log_msg += " iteration " + std::to_string(learning_iteration());
|
||||
log_msg += "/" + std::to_string(training_iteration());
|
||||
log_msg += "/" + std::to_string(sample_iteration());
|
||||
std::stringstream &log_msg) const {
|
||||
log_msg << intro_str
|
||||
<< " iteration " << learning_iteration()
|
||||
<< "/" << training_iteration()
|
||||
<< "/" << sample_iteration();
|
||||
}
|
||||
|
||||
// Returns true and increments the training_stage_ if the error rate has just
|
||||
@ -602,14 +602,14 @@ bool LSTMTrainer::DeSerialize(const TessdataManager *mgr, TFile *fp) {
|
||||
// De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
|
||||
// learning rates (by scaling reduction, or layer specific, according to
|
||||
// NF_LAYER_SPECIFIC_LR).
|
||||
void LSTMTrainer::StartSubtrainer(std::string &log_msg) {
|
||||
void LSTMTrainer::StartSubtrainer(std::stringstream &log_msg) {
|
||||
sub_trainer_ = std::make_unique<LSTMTrainer>();
|
||||
if (!ReadTrainingDump(best_trainer_, *sub_trainer_)) {
|
||||
log_msg += " Failed to revert to previous best for trial!";
|
||||
log_msg << " Failed to revert to previous best for trial!";
|
||||
sub_trainer_.reset();
|
||||
} else {
|
||||
log_msg += " Trial sub_trainer_ from iteration " +
|
||||
std::to_string(sub_trainer_->training_iteration());
|
||||
log_msg << " Trial sub_trainer_ from iteration "
|
||||
<< sub_trainer_->training_iteration();
|
||||
// Reduce learning rate so it doesn't diverge this time.
|
||||
sub_trainer_->ReduceLearningRates(this, log_msg);
|
||||
// If it fails again, we will wait twice as long before reverting again.
|
||||
@ -630,14 +630,13 @@ void LSTMTrainer::StartSubtrainer(std::string &log_msg) {
|
||||
// trainer in *this is replaced with sub_trainer_, and STR_REPLACED is
|
||||
// returned. STR_NONE is returned if the subtrainer wasn't good enough to
|
||||
// receive any training iterations.
|
||||
SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
|
||||
SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::stringstream &log_msg) {
|
||||
double training_error = CharError();
|
||||
double sub_error = sub_trainer_->CharError();
|
||||
double sub_margin = (training_error - sub_error) / sub_error;
|
||||
if (sub_margin >= kSubTrainerMarginFraction) {
|
||||
log_msg += " sub_trainer=" + std::to_string(sub_error);
|
||||
log_msg += " margin=" + std::to_string(100.0 * sub_margin);
|
||||
log_msg += "\n";
|
||||
log_msg << " sub_trainer=" << sub_error
|
||||
<< " margin=" << 100.0 * sub_margin << "\n";
|
||||
// Catch up to current iteration.
|
||||
int end_iteration = training_iteration();
|
||||
while (sub_trainer_->training_iteration() < end_iteration &&
|
||||
@ -647,11 +646,12 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
|
||||
while (sub_trainer_->training_iteration() < target_iteration) {
|
||||
sub_trainer_->TrainOnLine(this, false);
|
||||
}
|
||||
std::string batch_log = "Sub:";
|
||||
std::stringstream batch_log("Sub:");
|
||||
batch_log.imbue(std::locale::classic());
|
||||
sub_trainer_->PrepareLogMsg(batch_log);
|
||||
batch_log += "\n";
|
||||
tprintf("UpdateSubtrainer:%s", batch_log.c_str());
|
||||
log_msg += batch_log;
|
||||
batch_log << "\n";
|
||||
tprintf("UpdateSubtrainer:%s", batch_log.str().c_str());
|
||||
log_msg << batch_log.str();
|
||||
sub_error = sub_trainer_->CharError();
|
||||
sub_margin = (training_error - sub_error) / sub_error;
|
||||
}
|
||||
@ -661,9 +661,8 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
|
||||
std::vector<char> updated_trainer;
|
||||
SaveTrainingDump(LIGHT, *sub_trainer_, &updated_trainer);
|
||||
ReadTrainingDump(updated_trainer, *this);
|
||||
log_msg += " Sub trainer wins at iteration " +
|
||||
std::to_string(training_iteration());
|
||||
log_msg += "\n";
|
||||
log_msg << " Sub trainer wins at iteration "
|
||||
<< training_iteration() << "\n";
|
||||
return STR_REPLACED;
|
||||
}
|
||||
return STR_UPDATED;
|
||||
@ -674,17 +673,16 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
|
||||
// Reduces network learning rates, either for everything, or for layers
|
||||
// independently, according to NF_LAYER_SPECIFIC_LR.
|
||||
void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer,
|
||||
std::string &log_msg) {
|
||||
std::stringstream &log_msg) {
|
||||
if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
|
||||
int num_reduced = ReduceLayerLearningRates(
|
||||
kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
|
||||
log_msg +=
|
||||
"\nReduced learning rate on layers: " + std::to_string(num_reduced);
|
||||
log_msg << "\nReduced learning rate on layers: " << num_reduced;
|
||||
} else {
|
||||
ScaleLearningRate(kLearningRateDecay);
|
||||
log_msg += "\nReduced learning rate to :" + std::to_string(learning_rate_);
|
||||
log_msg << "\nReduced learning rate to :" << learning_rate_;
|
||||
}
|
||||
log_msg += "\n";
|
||||
log_msg << "\n";
|
||||
}
|
||||
|
||||
// Considers reducing the learning rate independently for each layer down by
|
||||
@ -1053,13 +1051,14 @@ void LSTMTrainer::SaveRecognitionDump(std::vector<char> *data) const {
|
||||
// Returns a suitable filename for a training dump, based on the model_base_,
|
||||
// best_error_rate_, best_iteration_ and training_iteration_.
|
||||
std::string LSTMTrainer::DumpFilename() const {
|
||||
std::string filename;
|
||||
filename += model_base_.c_str();
|
||||
filename += "_" + std::to_string(best_error_rate_);
|
||||
filename += "_" + std::to_string(best_iteration_);
|
||||
filename += "_" + std::to_string(training_iteration_);
|
||||
filename += ".checkpoint";
|
||||
return filename;
|
||||
std::stringstream filename;
|
||||
filename.imbue(std::locale::classic());
|
||||
filename << model_base_ << std::fixed << std::setprecision(3)
|
||||
<< "_" << best_error_rate_
|
||||
<< "_" << best_iteration_
|
||||
<< "_" << training_iteration_
|
||||
<< ".checkpoint";
|
||||
return filename.str();
|
||||
}
|
||||
|
||||
// Fills the whole error buffer of the given type with the given value.
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include "rect.h"
|
||||
|
||||
#include <functional> // for std::function
|
||||
#include <sstream> // for std::stringstream
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -192,7 +193,7 @@ public:
|
||||
|
||||
// Keeps track of best and locally worst error rate, using internally computed
|
||||
// values. See MaintainCheckpointsSpecific for more detail.
|
||||
bool MaintainCheckpoints(const TestCallback &tester, std::string &log_msg);
|
||||
bool MaintainCheckpoints(const TestCallback &tester, std::stringstream &log_msg);
|
||||
// Keeps track of best and locally worst error_rate (whatever it is) and
|
||||
// launches tests using rec_model, when a new min or max is reached.
|
||||
// Writes checkpoints using train_model at appropriate times and builds and
|
||||
@ -201,12 +202,12 @@ public:
|
||||
bool MaintainCheckpointsSpecific(int iteration,
|
||||
const std::vector<char> *train_model,
|
||||
const std::vector<char> *rec_model,
|
||||
TestCallback tester, std::string &log_msg);
|
||||
// Builds a string containing a progress message with current error rates.
|
||||
void PrepareLogMsg(std::string &log_msg) const;
|
||||
TestCallback tester, std::stringstream &log_msg);
|
||||
// Builds a progress message with current error rates.
|
||||
void PrepareLogMsg(std::stringstream &log_msg) const;
|
||||
// Appends <intro_str> iteration learning_iteration()/training_iteration()/
|
||||
// sample_iteration() to the log_msg.
|
||||
void LogIterations(const char *intro_str, std::string &log_msg) const;
|
||||
void LogIterations(const char *intro_str, std::stringstream &log_msg) const;
|
||||
|
||||
// TODO(rays) Add curriculum learning.
|
||||
// Returns true and increments the training_stage_ if the error rate has just
|
||||
@ -226,7 +227,7 @@ public:
|
||||
// De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
|
||||
// learning rates (by scaling reduction, or layer specific, according to
|
||||
// NF_LAYER_SPECIFIC_LR).
|
||||
void StartSubtrainer(std::string &log_msg);
|
||||
void StartSubtrainer(std::stringstream &log_msg);
|
||||
// While the sub_trainer_ is behind the current training iteration and its
|
||||
// training error is at least kSubTrainerMarginFraction better than the
|
||||
// current training error, trains the sub_trainer_, and returns STR_UPDATED if
|
||||
@ -235,10 +236,10 @@ public:
|
||||
// trainer in *this is replaced with sub_trainer_, and STR_REPLACED is
|
||||
// returned. STR_NONE is returned if the subtrainer wasn't good enough to
|
||||
// receive any training iterations.
|
||||
SubTrainerResult UpdateSubtrainer(std::string &log_msg);
|
||||
SubTrainerResult UpdateSubtrainer(std::stringstream &log_msg);
|
||||
// Reduces network learning rates, either for everything, or for layers
|
||||
// independently, according to NF_LAYER_SPECIFIC_LR.
|
||||
void ReduceLearningRates(LSTMTrainer *samples_trainer, std::string &log_msg);
|
||||
void ReduceLearningRates(LSTMTrainer *samples_trainer, std::stringstream &log_msg);
|
||||
// Considers reducing the learning rate independently for each layer down by
|
||||
// factor(<1), or leaving it the same, by double-training the given number of
|
||||
// samples and minimizing the amount of changing of sign of weight updates.
|
||||
|
||||
@ -140,13 +140,21 @@ bool ValidateMyanmar::ConsumeOptionalSignsIfPresent() {
|
||||
}
|
||||
// Tone mark extensions.
|
||||
ch = codes_[codes_used_].second;
|
||||
if (ch == 0x1038 || ch == kMyanmarAsat || ch == 0x1063 || ch == 0x1064 ||
|
||||
if (ch == 0x102c || ch == 0x1038 || ch == kMyanmarAsat || (0x1062 <= ch && ch <= 0x1064) ||
|
||||
(0x1069 <= ch && ch <= 0x106d) || (0x1087 <= ch && ch <= 0x108d) || ch == 0x108f ||
|
||||
ch == 0x109a || ch == 0x109b || (0xaa7b <= ch && ch <= 0xaa7d)) {
|
||||
if (UseMultiCode(1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Sgaw tones 0x1062, 0x1063 must be followed by asat.
|
||||
// W Pwo tones 0x1069, 0x106a, and 0x106b may be followed by dot below or visarga (nasal).
|
||||
ch = codes_[codes_used_].second;
|
||||
if (ch == 0x103a || ch == 0x1037 || ch == 0x1038) {
|
||||
if (UseMultiCode(1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -103,7 +103,6 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *s
|
||||
void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority,
|
||||
SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) {
|
||||
SEAM *seam;
|
||||
char str[80];
|
||||
float my_priority;
|
||||
/* Add seam of split */
|
||||
my_priority = priority;
|
||||
@ -133,7 +132,8 @@ void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORI
|
||||
seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth,
|
||||
chop_center_knob, chop_width_change_knob);
|
||||
if (chop_debug) {
|
||||
sprintf(str, "Full my_priority %0.0f, ", my_priority);
|
||||
char str[80];
|
||||
snprintf(str, sizeof(str), "Full my_priority %0.0f, ", my_priority);
|
||||
seam->Print(str);
|
||||
}
|
||||
|
||||
|
||||
@ -103,7 +103,7 @@ protected:
|
||||
int iteration_limit = iteration + max_iterations;
|
||||
double best_error = 100.0;
|
||||
do {
|
||||
std::string log_str;
|
||||
std::stringstream log_str;
|
||||
int target_iteration = iteration + kBatchIterations;
|
||||
// Train a few.
|
||||
double mean_error = 0.0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user