From da629a74cbbe9cf76a85e7c26e2b8ac991b9f549 Mon Sep 17 00:00:00 2001 From: Arnaud Fontaine <arnaud.fontaine@nexedi.com> Date: Mon, 7 Nov 2016 17:00:32 +0900 Subject: [PATCH] Revert "erp5: Update tesseract to 3.04.01 because of compilation error." This reverts commit 292be60ff42edc2ce57f2e6701ad283c18e9772b because of a regression in tesseract > 3.02: https://github.com/tesseract-ocr/tesseract/issues/360#issuecomment-258775303 Related Unit Tests failures: * test_PDFDocument_asTextConversion (testDms.TestDocument) * test_MonochromeImageResize (testDms.TestDocument) * test_CMYKImageTextContent (testDms.TestDocument) => AssertionError: 'ERP5 is a free software\n\n' != 'ERPS is a free software.\n\n' Tested versions: * 3.04.01 (leptnonica: 1.73): NOK * 3.03.03 (leptonica: 1.71): NOK * 3.03-rc1 (leptonica: 1.71): NOK * 3.02.01 (leptonica: 1.69): OK * 3.02 (leptonica: 1.71): OK --- component/leptonica/buildout.cfg | 7 ++++-- .../leptonica-1.69-zlib-include.patch | 22 +++++++++++++++++++ component/tesseract/buildout.cfg | 9 ++++++-- .../tesseract-3.00-gcc-4.7-build.patch | 10 +++++++++ .../tesseract/tesseract-3.01-remove-bom.patch | 15 +++++++++++++ 5 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 component/leptonica/leptonica-1.69-zlib-include.patch create mode 100644 component/tesseract/tesseract-3.00-gcc-4.7-build.patch create mode 100644 component/tesseract/tesseract-3.01-remove-bom.patch diff --git a/component/leptonica/buildout.cfg b/component/leptonica/buildout.cfg index 1b18493a6..5bc3e6868 100644 --- a/component/leptonica/buildout.cfg +++ b/component/leptonica/buildout.cfg @@ -11,10 +11,13 @@ extends = [leptonica] recipe = slapos.recipe.cmmi -url = http://www.leptonica.org/source/leptonica-1.73.tar.gz -md5sum = 092cea2e568cada79fff178820397922 +url = http://leptonica.googlecode.com/files/leptonica-1.68.tar.gz +md5sum = 5cd7092f9ff2ca7e3f3e73bfcd556403 configure-options = --disable-static +patch-options = -p1 +patches = + ${:_profile_base_location_}/leptonica-1.69-zlib-include.patch#cff3dc942075190939b407c38e0d3201 environment = CPPFLAGS=-I${zlib:location}/include -I${libjpeg:location}/include -I${libpng:location}/include -I${libtiff:location}/include -I${webp:location}/include -I${giflib:location}/include LDFLAGS=-L${zlib:location}/lib -Wl,-rpath=${zlib:location}/lib -L${libjpeg:location}/lib -Wl,-rpath=${libjpeg:location}/lib -L${libpng:location}/lib -Wl,-rpath=${libpng:location}/lib -L${libtiff:location}/lib -Wl,-rpath=${libtiff:location}/lib -L${webp:location}/lib -Wl,-rpath=${webp:location}/lib -L${giflib:location}/lib -Wl,-rpath=${giflib:location}/lib diff --git a/component/leptonica/leptonica-1.69-zlib-include.patch b/component/leptonica/leptonica-1.69-zlib-include.patch new file mode 100644 index 000000000..7f907611d --- /dev/null +++ b/component/leptonica/leptonica-1.69-zlib-include.patch @@ -0,0 +1,22 @@ +This patch is originally taken from: + +http://leptonica.googlecode.com/issues/attachment?aid=560001000&name=zlib-include.patch&token=m2sugSYxB4xwAuNgrKXyHTxBYNg%3A1337345966091 + +To fix the following issue with leptonica: + +http://code.google.com/p/leptonica/issues/detail?id=56 + +diff -Nurd -x'*~' leptonica-1.68.orig/src/pngio.c leptonica-1.68/src/pngio.c +--- leptonica-1.68.orig/src/pngio.c 2011-02-01 00:41:12.000000000 -0500 ++++ leptonica-1.68/src/pngio.c 2011-07-09 09:17:17.000000000 -0400 +@@ -108,6 +108,10 @@ + + #include "png.h" + ++#ifdef HAVE_LIBZ ++#include "zlib.h" ++#endif ++ + /* ----------------Set defaults for read/write options ----------------- */ + /* strip 16 bpp --> 8 bpp on reading png; default is for stripping */ + static l_int32 var_PNG_STRIP_16_TO_8 = 1; diff --git a/component/tesseract/buildout.cfg b/component/tesseract/buildout.cfg index e8d297910..d81e19c84 100644 --- a/component/tesseract/buildout.cfg +++ b/component/tesseract/buildout.cfg @@ -14,8 +14,13 @@ parts = [tesseract] recipe = slapos.recipe.cmmi -url = https://github.com/tesseract-ocr/tesseract/archive/3.04.01.tar.gz -md5sum = 645a21effcf2825a3473849d72a7fd90 +url = http://tesseract-ocr.googlecode.com/files/tesseract-3.01.tar.gz +md5sum = 1ba496e51a42358fb9d3ffe781b2d20a +patch-options = + -p1 +patches = + ${:_profile_base_location_}/tesseract-3.00-gcc-4.7-build.patch#ca80db3ec489c547b03f3ee48879c1b1 + ${:_profile_base_location_}/tesseract-3.01-remove-bom.patch#2e691858cb492b7c17d23bf0912b3d24 pre-configure = libtoolize -f -c aclocal -I ${libtool:location}/share/aclocal -I config diff --git a/component/tesseract/tesseract-3.00-gcc-4.7-build.patch b/component/tesseract/tesseract-3.00-gcc-4.7-build.patch new file mode 100644 index 000000000..76446109f --- /dev/null +++ b/component/tesseract/tesseract-3.00-gcc-4.7-build.patch @@ -0,0 +1,10 @@ +--- tesseract-3.00/viewer/svutil.cpp.old 2012-04-11 09:34:26.168608940 +0200 ++++ tesseract-3.00/viewer/svutil.cpp 2012-04-11 09:34:46.108565692 +0200 +@@ -21,6 +21,7 @@ + // thread/process creation & synchronization and network connection. + + #include <stdio.h> ++#include <unistd.h> + #ifdef WIN32 + #include <windows.h> + #include <winsock.h> diff --git a/component/tesseract/tesseract-3.01-remove-bom.patch b/component/tesseract/tesseract-3.01-remove-bom.patch new file mode 100644 index 000000000..c50254a65 --- /dev/null +++ b/component/tesseract/tesseract-3.01-remove-bom.patch @@ -0,0 +1,15 @@ +The patch below removes a utf-8 BOM mark. + +Avoid touching it as the BOM is invisible, and copy/pasting might not work. + +It is needed because old compilers treat the BOM as garbage instead of +whitespace. + +--- tesseract-3.01/ccutil/strngs.h.orig 2012-05-24 15:13:22.743808379 +0200 ++++ tesseract-3.01/ccutil/strngs.h 2012-05-24 15:16:54.468858282 +0200 +@@ -1,4 +1,4 @@ +-/********************************************************************** ++/********************************************************************** + * File: strngs.h (Formerly strings.h) + * Description: STRING class definition. + * Author: Ray Smith -- 2.30.9