Commit f084e1a8 authored by francois's avatar francois

component add tesseract 4.0 and dependancys to slapos

This commit contain tesseract4.0 and its up do date leptonica
dependancy as well as a python wrapper that use tesseract API to do text
recognition.

Currently, the wrapper cannot initialise the api as intended and the
traineddata emplacement is not set (an new environnement variable should
be introduced in zope, but it should be possible to "force" a new path
with a patch that change the tesserocr _DEFAULT_PATH value.
parent 4cb1fcc0
[buildout]
extends =
parts = ocr-leptonica
[ocr-leptonica]
recipe = slapos.recipe.cmmi
url = http://www.leptonica.com/source/leptonica-1.74.1.tar.gz
configure-options =
--disable-static
\ No newline at end of file
[buildout]
extends =
../ocr-leptonica/buildout.cfg
../pkgconfig/buildout.cfg
../libtool/buildout.cfg
../autoconf/buildout.cfg
../automake/buildout.cfg
../cmake/buildout.cfg
parts = ocr-tesseract
[ocr-tesseract-eng-traineddata]
recipe = slapos.recipe.build:download
strip-top-level-dir = false
filename = eng.traineddata
url = https://github.com/tesseract-ocr/tessdata/raw/4.00/eng.traineddata
md5sum = 7af2ad02d11702c7092a5f8dd044d52f
[tesseract-env]
PATH=${pkg-config:location}/bin:${autoconf:location}/bin:${automake:location}/bin:${libtool:location}/bin:%(PATH)s
LIBTOOL=${libtool:location}/bin/libtool
CPPFLAGS=-I${ocr-leptonica:location}/include
LDFLAGS =-L${ocr-leptonica:location}/lib -Wl,-rpath=${ocr-leptonica:location}/lib
PKG_CONFIG_PATH=${ocr-leptonica:location}/lib/pkgconfig
TESSDATA_PREFIX=${ocr-tesseract-eng-traineddata:location}
[ocr-tesseract]
recipe = slapos.recipe.cmmi
url = https://github.com/tesseract-ocr/tesseract/tarball/84db453d3ac002e141a22cd04b41edaac3c50386/tesseract-4.0.tar.gz
environment-section = tesseract-env
patch-options = -p1
patches = ${:_profile_base_location_}/tesseract_cmake.patch
make-options =
-C build
pre-configure =
mkdir build
configure-command =
cd build
${cmake:location}/bin/cmake -DCMAKE_INSTALL_PREFIX=${buildout:parts-directory}/${:_buildout_section_name_} ..
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 079d8482..2b69f212 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,7 +41,7 @@ set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "CMake Targets")
project(tesseract C CXX)
set(VERSION_MAJOR 4)
-set(VERSION_MINOR 00)
+set(VERSION_MINOR 00.00)
set(VERSION_PLAIN ${VERSION_MAJOR}.${VERSION_MINOR})
set(MINIMUM_LEPTONICA_VERSION 1.74)
@@ -156,8 +156,8 @@ include_directories(wordrec)
# LIBRARY tesseract
########################################
-string(SUBSTRING ${VERSION_MINOR} 0 1 VERSION_MINOR_0)
-string(SUBSTRING ${VERSION_MINOR} 1 1 VERSION_MINOR_1)
+string(SUBSTRING ${VERSION_MINOR} 0 2 VERSION_MINOR_0)
+string(SUBSTRING ${VERSION_MINOR} 3 2 VERSION_MINOR_1)
file(GLOB tesseract_src
arch/*.cpp
@@ -224,10 +224,8 @@ endif()
target_link_libraries (libtesseract ${LIB_Ws2_32} ${LIB_pthread})
set_target_properties (libtesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
set_target_properties (libtesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
-if (WIN32)
-set_target_properties (libtesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR})
-set_target_properties (libtesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d)
-endif()
+set_target_properties (libtesseract PROPERTIES OUTPUT_NAME tesseract)
+set_target_properties (libtesseract PROPERTIES DEBUG_OUTPUT_NAME tesseractd)
if (NOT CPPAN_BUILD)
target_link_libraries (libtesseract ${Leptonica_LIBRARIES})
diff --git a/tesseract.pc.cmake b/tesseract.pc.cmake
index f9f64f6c..475236f1 100644
--- a/tesseract.pc.cmake
+++ b/tesseract.pc.cmake
@@ -7,6 +7,6 @@ Name: @tesseract_NAME@
Description: An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google.
URL: https://github.com/tesseract-ocr/tesseract
Version: @tesseract_VERSION@
-Libs: -L${libdir} -l@tesseract_OUTPUT_NAME@
+Libs: -L${libdir} -ltesseract
Libs.private:
Cflags: -I${includedir} -I${includedir}/tesseract
[buildout]
extends =
../ocr-tesseract/buildout.cfg
../ocr-leptonica/buildout.cfg
../cython/buildout.cfg
parts =
tesserocr-patch
ocr-tesserocr
[tesserocr-repository]
recipe = slapos.recipe.build:gitclone
repository = https://github.com/sirfz/tesserocr
git-executable = ${git:location}/bin/git
branch = tesseract4
revision = 61b85da
[tesserocr-env]
PATH=${pkg-config:location}/bin:${ocr-tesseract:location}/bin:%(PATH)s
PKG_CONFIG_PATH=${ocr-tesseract:location}/lib/pkgconfig:${ocr-leptonica:location}/lib/pkgconfig
CPPFLAGS = -I${ocr-leptonica:location}/include -I${ocr-tesseract:location}/include
LDFLAGS =-L${ocr-leptonica:location}/lib -Wl,-rpath=${ocr-leptonica:location}/lib -L${ocr-tesseract:location}/lib -Wl,-rpath=${ocr-tesseract:location}/lib
[ocr-tesserocr]
recipe = zc.recipe.egg:develop
setup = ${tesserocr-repository:location}
egg = tesserocr
setup-eggs =
${cython:egg}
environment = tesserocr-env
\ No newline at end of file
......@@ -60,6 +60,7 @@ extends =
../../component/findutils/buildout.cfg
../../component/userhosts/buildout.cfg
../../component/postfix/buildout.cfg
../../component/ocr-tesserocr/buildout.cfg
../../software/ipython_notebook/software.cfg
../../software/neoppod/software-common.cfg
# keep neoppod extends last
......@@ -140,9 +141,12 @@ parts +=
ipython-notebook
instance-jupyter
# Ocr
ocr-tesserocr
# override python2.7 to add SlapOS libstdc++ in RPATH.
[python2.7]
extra-ldflags = -Wl,-rpath=${gcc:location}/lib -Wl,-rpath=${gcc:location}/lib64
extra-ldflags = -Wl,-rpath=${gcc:location}/lib -Wl,-rpath=${gcc:location}/lib64 -Wl,-rpath=${ocr-leptonica:location}/lib
# override instance-jupyter not to render into default template.cfg
[instance-jupyter]
......@@ -453,6 +457,7 @@ eggs = ${neoppod:eggs}
${pycrypto-python:egg}
${scipy:egg}
${scikit-learn:egg}
${ocr-tesserocr:egg}
lock_file
astor
PyStemmer
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment