wok-current rev 25351
updated tesseract and tesseract-dev (3.02.02 -> 5.2.0)
author | Hans-G?nter Theisgen |
---|---|
date | Tue Jul 26 13:57:27 2022 +0100 (2022-07-26) |
parents | 61cc62188020 |
children | a77e2ca5a843 |
files | tesseract-ocr-dev/receipt tesseract-ocr/description.txt tesseract-ocr/receipt |
line diff
1.1 --- a/tesseract-ocr-dev/receipt Mon Jul 25 14:25:43 2022 +0100 1.2 +++ b/tesseract-ocr-dev/receipt Tue Jul 26 13:57:27 2022 +0100 1.3 @@ -1,21 +1,18 @@ 1.4 # SliTaz package receipt. 1.5 1.6 PACKAGE="tesseract-ocr-dev" 1.7 -VERSION="3.02.02" 1.8 +VERSION="5.2.0" 1.9 CATEGORY="development" 1.10 SHORT_DESC="The most accurate open source OCR engine available, development files." 1.11 MAINTAINER="pascal.bellard@slitaz.org" 1.12 LICENSE="Apache" 1.13 WEB_SITE="https://github.com/tesseract-ocr/tesseract" 1.14 + 1.15 +DEPENDS="pkg-config tesseract-ocr" 1.16 WANTED="tesseract-ocr" 1.17 1.18 -DEPENDS="tesseract-ocr pkg-config" 1.19 - 1.20 # Rules to gen a SliTaz package suitable for Tazpkg. 1.21 genpkg_rules() 1.22 { 1.23 - mkdir -p $fs/usr/lib 1.24 - cp -a $install/usr/include $fs/usr 1.25 - cp -a $install/usr/lib/*a $fs/usr/lib 1.26 - cp -a $install/usr/lib/pkgconfig $fs/usr/lib 1.27 + get_dev_files 1.28 }
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/tesseract-ocr/description.txt Tue Jul 26 13:57:27 2022 +0100 2.3 @@ -0,0 +1,28 @@ 2.4 +This package contains an OCR engine - libtesseract and 2.5 +a command line program - tesseract. 2.6 + 2.7 +Tesseract 4 adds a new neural net (LSTM) based OCR engine 2.8 +which is focused on line recognition, but also still 2.9 +supports the legacy Tesseract OCR engine of Tesseract 3 2.10 +which works by recognizing character patterns. 2.11 +Compatibility with Tesseract 3 is enabled by using the 2.12 +Legacy OCR Engine mode (--oem 0). 2.13 +It also needs traineddata files which support the legacy 2.14 +engine, for example those from the tessdata repository. 2.15 + 2.16 +The lead developer is Ray Smith. The maintainer is Zdenko 2.17 +Podobny. 2.18 +For a list of contributors see AUTHORS and GitHub's log 2.19 +of contributors. 2.20 + 2.21 +Tesseract has unicode (UTF-8) support, and can recognize 2.22 +more than 100 languages "out of the box". 2.23 + 2.24 +Tesseract supports various output formats: plain text, 2.25 +hOCR (HTML), PDF, invisible-text-only PDF, TSV. 2.26 +The main branch also has experimental support for ALTO 2.27 +(XML) output. 2.28 + 2.29 +You should note that in many cases, in order to get better 2.30 +OCR results, you'll need to improve the quality of the 2.31 +image you are giving Tesseract.
3.1 --- a/tesseract-ocr/receipt Mon Jul 25 14:25:43 2022 +0100 3.2 +++ b/tesseract-ocr/receipt Tue Jul 26 13:57:27 2022 +0100 3.3 @@ -1,18 +1,19 @@ 3.4 # SliTaz package receipt. 3.5 3.6 PACKAGE="tesseract-ocr" 3.7 -VERSION="3.02.02" 3.8 +VERSION="5.2.0" 3.9 CATEGORY="office" 3.10 SHORT_DESC="The most accurate open source OCR engine available." 3.11 MAINTAINER="pascal.bellard@slitaz.org" 3.12 LICENSE="Apache" 3.13 WEB_SITE="https://github.com/tesseract-ocr/tesseract" 3.14 + 3.15 TARBALL="$PACKAGE-$VERSION.tar.gz" 3.16 -WGET_URL="https://github.com/tesseract-ocr/tesseract/archive/refs/tags/$VERSION.tar.gz" 3.17 +WGET_URL="$WEB_SITE/archive/$VERSION.tar.gz" 3.18 3.19 -DEPENDS="leptonica libpng jpeg tiff giflib" 3.20 -BUILD_DEPENDS="libtool autoconf automake libpng-dev jpeg-dev tiff-dev \ 3.21 -giflib-dev zlib-dev icu-dev pango-dev cairo-dev leptonica-dev" 3.22 +DEPENDS="gcc83-lib-base giflib jpeg leptonica libpng tiff" 3.23 +BUILD_DEPENDS="autoconf automake cairo-dev gcc83 giflib-dev icu-dev jpeg-dev 3.24 + leptonica-dev libpng-dev libtool pango-dev tiff-dev zlib-dev" 3.25 3.26 # What is the latest version available today? 3.27 current_version() 3.28 @@ -24,19 +25,24 @@ 3.29 # Rules to configure and make the package. 3.30 compile_rules() 3.31 { 3.32 - ./autogen.sh 3.33 - ./configure \ 3.34 - --prefix=/usr \ 3.35 + # 5.2.0 avoid undefined symbol: GOMP_parallel at runtime 3.36 + # by disable-openmp 3.37 + 3.38 + ./autogen.sh && 3.39 + ./configure \ 3.40 + CC=gcc-83 \ 3.41 + CXX=g++-83 \ 3.42 + --prefix=/usr \ 3.43 + --disable-openmp \ 3.44 $CONFIGURE_ARGS && 3.45 - make && make install 3.46 + make && 3.47 + make install 3.48 } 3.49 3.50 # Rules to gen a SliTaz package suitable for Tazpkg. 3.51 genpkg_rules() 3.52 { 3.53 - mkdir -p $fs/usr/lib 3.54 - cp -a $install/usr/bin $fs/usr 3.55 - cp -a $install/usr/share $fs/usr 3.56 - rm -rf $fs/usr/share/man 3.57 - cp -a $install/usr/lib/*.so* $fs/usr/lib 3.58 + cook_copy_folders bin 3.59 + cook_copy_folders tessdata 3.60 + cook_copy_files *.so* 3.61 } 3.62 \ No newline at end of file