Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
#!/usr/bin/env python2.7
# Copyright 2013 Virantha Ekanayake All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.
Wrap ImageMagick calls. Yes, this is ugly. """
# Ugly hack to pass in object method to the multiprocessing library # From http://www.rueckstiess.net/research/snippets/show/ca1d7d90 # Basically gets passed in a pair of (self, arg), and calls the method
"""Class to wrap all the ImageMagick convert calls""" 'CV_FAILED': 'convert execution failed', }
def _warn(self, msg): # pragma: no cover print("WARNING: %s" % msg)
except subprocess.CalledProcessError as e: print e.output self._warn("Could not run command %s" % cmd_list)
#-respect-parenthesis \( -clone 0 -colorspace gray -negate -lat 15x5+5% -contrast-stretch 0 \) -compose copy_opacity -composite -opaque none +matte -modulate 100,50 -adaptive-blur 2.0 -sharpen 0x1 # When using Windows, can't use backslash parenthesis in the shell, so omit the backslash backslash = '' else:
'"%s"' % in_filename, '-respect-parenthesis', #'\\( $setcspace -colorspace gray -type grayscale \\)', backslash+'(', '-clone 0', '-colorspace gray -negate -lat 15x15+5% -contrast-stretch 0', backslash+') -compose copy_opacity -composite -opaque none +matte -modulate 100,100', #'-adaptive-blur 1.0', '-blur 1x1', #'-selective-blur 4x4+5%', '-adaptive-sharpen 0x2', '-negate -define morphology:compose=darken -morphology Thinning Rectangle:1x30+0+0 -negate ', # Removes vertical lines >=60 pixes, reduces widht of >30 (oherwise tesseract < 3.03 completely ignores text close to vertical lines in a table) '"%s"' % (out_filename) ] return in_filename else:
except KeyboardInterrupt or Exception: print("Caught keyboard interrupt... terminating") pool.terminate() #sys,exit(-1) raise finally:
|