erp5_receipt_recognition Update bt5 following merge request advices

cb1ffc62 · francois · e9b63938 · cb1ffc62 · cb1ffc62 · cb1ffc62
Commit cb1ffc62 authored Jun 29, 2017 by francois
17 changed files
--- a/bt5/erp5_receipt_recognition/ActionTemplateItem/portal_types/Receipt/receipt_convert.xml
+++ b/bt5/erp5_receipt_recognition/ActionTemplateItem/portal_types/Receipt/receipt_convert.xml
@@ -77,7 +77,7 @@
      <dictionary>
        <item>
            <key> <string>text</string> </key>
-            <value> <string>string:${object_url}/ReceiptConversion_convertImage</string> </value>
+            <value> <string>string:${object_url}/ReceiptRecognition_convertImage</string> </value>
        </item>
      </dictionary>
    </pickle>

--- a/bt5/erp5_receipt_recognition/ActionTemplateItem/portal_types/Receipt/view.xml
+++ b/bt5/erp5_receipt_recognition/ActionTemplateItem/portal_types/Receipt/view.xml
@@ -77,7 +77,7 @@
      <dictionary>
        <item>
            <key> <string>text</string> </key>
-            <value> <string>string:${object_url}/Receipt_view</string> </value>
+            <value> <string>string:${object_url}/ReceiptRecognition_view</string> </value>
        </item>
      </dictionary>
    </pickle>

--- a/bt5/erp5_receipt_recognition/ExtensionTemplateItem/portal_components/extension.erp5.ReceiptRecognition.py
+++ b/bt5/erp5_receipt_recognition/ExtensionTemplateItem/portal_components/extension.erp5.ReceiptRecognition.py
@@ -6,6 +6,8 @@ to work inside erp5 and adapt to receipt binaries and with more
 explanation
 https://github.com/tmbdev/ocropy
 """
+# pylint: disable=unpacking-non-sequence
+# Pylint is confused by ocropy.
 import numpy as np
 import scipy.ndimage as ndi
@@ -14,9 +16,10 @@ from matplotlib import pylab
 import matplotlib.image as mpimg
 import scipy.stats as stats
 import re
+import cPickle
 import ocrolib
-def getReceiptValue(self, image_data):
+def getReceiptValue(self, image_data, model_name = "en-default.pyrnn"):
  """
  Function called from an erp5 script through externalMethod
  that take an image and its name and save its binarized
@@ -27,20 +30,46 @@ def getReceiptValue(self, image_data):
          Represent the erp5 object from which externalmethods or module
          objects can be called
      - image_data:
-          base64 representation of the image to analyse
+          Representation of the image to analyse
    @return:
-      - ret: float
+      - anon: float
          Represent total value paid on the receipt
  ----------------------------
-  This function return the total value of the receipt in euros.
+  This function look for euros only and return a price with a two digit
+  precison like "135.79" or "43,89".
  """
  image_as_string = StringIO.StringIO(image_data)
  image_as_array = mpimg.imread(image_as_string, format = 'JPG')
  line_list, cleared = getLinesFromPicture(image_as_array)
  # Start the neural network
-  network, lnorm = initRnnModel()
+  network, lnorm = initRnnModel(model_name)
+  return findReceiptValue(line_list, cleared, network, lnorm)
+def findReceiptValue(line_list, cleared, network, lnorm):
+  """
+  Function that run the neural network through the receipt and extract
+  meaningfull value
+  -----------------------------
+    @args:
+      - lines: array list
+          Represent lines of text that will be extracted
+          from the image
+      - cleared:2D array
+          Represent binarized image cropped and cleaned,
+          from which we will extract text lines
+      - network: lstm object
+          Represent the trained neural net
+      - lnorm: method from lstm object
+          Represent the size of the lstm object. Is used to scale the objects
+          to recognize from original size to the average network object.
+    @return:
+      - anon: float
+          Represent total value paid on the receipt
+  -----------------------------
+  This function can bemodified to add more field to detect. It might be
+  possible to run a classification neural net on the result.
+  """
  value_list = []
  tofind = r"(EUR)|€|(TOT)"
  for _, line in enumerate(line_list):
@@ -48,15 +77,34 @@ def getReceiptValue(self, image_data):
    # Corner case: he dewarping function from the normalizer fail
    # sometimes on empty lines. Can be corrected with better segmentation
    try:
-      evaluate = getStringFromImage(binline, lnorm, network)
+      evaluate  = getStringFromImage(binline, lnorm, network)
      if re.search(tofind, evaluate.upper()):
        number = re.findall(r"\d+[\.|,]\d\d", evaluate)
        value_list += [float(char.replace(',', '.')) for char in number]
    except ValueError:
      pass
  return round(max(value_list), 2)
+def getRnnModelFromDataStream(self, model_name="en-default.pyrnn"):
+  """
+  This function load a neural network from a dataStream
+  ----------------------------
+    @args:
+      - model_name: string, default: en-default.pyrnn
+          Id of the object in data_stream_module that contain the rnn model
+    @return:
+      - network: lstm object
+          Represent the trained neural net
+      - lnorm: method from lstm object
+          Represent the size of the lstm object. Is used to scale the objects
+          to recognize from original size to the average network object.
+  ----------------------------
+  WARNING: This function present a security issue and should NOT be called with
+  an user-defined model name (see cpickle security issue)
+  """
+  network = cPickle.loads(self.data_stream_module[model_name].getData())
+  lnorm = getattr(network, "lnorm", None)
+  return network, lnorm
 def initRnnModel(model_name = "en-default.pyrnn"):
  """
@@ -65,7 +113,7 @@ def initRnnModel(model_name = "en-default.pyrnn"):
  ----------------------------
    @args:
      - model_name: string, default: en-default.pyrnn
-          Id of the object in data_stream_module that contain the rnn model
+          Id of the object in the filesystem that contain the rnn model
    @return:
      - network: lstm object
          Represent the trained neural net
@@ -108,8 +156,8 @@ def getLinesFromPicture(image_as_array):
  independant picture
  """
  grey_image = convertGreyscale(image_as_array)
-  flattened_image = imageTransformation(grey_image)
+  cropped_image = cropImage(grey_image)
-  binarized_image = imageBinarization(flattened_image)
+  binarized_image = imageBinarization(cropped_image)
  binary = 1 - binarized_image
  cleaned, scale = removeObjects(binary)
  angle = getEstimatedSkewAngle(cleaned, np.linspace(-4, 4, 24))
@@ -293,26 +341,6 @@ def removeObjects(binarized):
  binarized = np.minimum(binarized, 1 - (sums > 0) * (sums < scale))
  return binarized, scale
-def getImageWhitelevel(image):
-  """
-  Function that help flatten the image by estimating locals
-  whitelevels. This remove local extremes and give an image with
-  homogenous background and no details
-  ------------------------------
-    @args:
-      - image: 2D array
-            Represent a greyscale image
-    @return:
-      - white_image: 2D array
-            Represent a greyscale image with no local extreme
-  ------------------------------
-  This function result will be substracted from the original image
-  to make that only local extremes stand out.
-  """
-  white_image = ndi.filters.percentile_filter(image, 50, size = (80, 2))
-  white_image = ndi.filters.percentile_filter(white_image, 50, size = (2, 80))
-  return white_image
 def getEstimatedSkewAngle(image, angle_list):
  """
  Function that estimate at which angle the image is the most
@@ -343,8 +371,37 @@ def getEstimatedSkewAngle(image, angle_list):
  _, angle = max(estimates)
  return angle
+def removeBackground(image, percentile=50):
+  """
+  Function that help flatten the image by estimating locals
+  whitelevels. This remove local extremes and give an image with
+  homogenous background and no details
+  ------------------------------
+    @args:
+      - image: 2D array
+            Represent a greyscale image
+      - percentile: integer between -100 and 100
+            A percentile filter with a value of 50 is basically a
+            median filter, value of 0 is a minimum filter and with
+            a value of 100 a maximum filter
+    @return:
+      - 2D array
+            Represent a greyscale image with no local extreme
+  ------------------------------
+  The filter result will be substracted from the original image
+  to make that only local extremes stand out.
+  A Kuwahara filter might give better results.
+  """
+  # Reduce extreme differences in the greyscale image
+  image = image - pylab.amin(image)
+  image /= pylab.amax(image)
+  white_image = ndi.filters.percentile_filter(image, percentile, size=(80, 2))
+  white_image = ndi.filters.percentile_filter(white_image, percentile, size=(2, 80))
+  # Get the difference between the whiteleveled image and the
+  # original one and put them betewwn 0 an 1
+  return np.clip(image - white_image + 1, 0, 1)
-def imageTransformation(grey):
+def cropImage(image):
  """
  Function that perform cropping and flattening -- Removing
  homogenous background and small extremes-- on an image.
@@ -360,22 +417,17 @@ def imageTransformation(grey):
  homogenous background
  """
  # Reduce extreme differences in the greyscale image
-  image = grey - pylab.amin(grey)
+  white_image = removeBackground(image)
-  image /= pylab.amax(image)
-  white_image = getImageWhitelevel(image)
-  # Get the difference between the whiteleveled image and the
-  # original one and put them betewwn 0 an 1
-  flat = np.clip(image - white_image + 1, 0, 1)
  # Calculate coordinate to crop the image, can be done in another
  # function to improve readability
  mask = ndi.gaussian_filter(
-    flat, 7.0) < 0.9 * np.amax(flat)
+    white_image, 7.0) < 0.9 * np.amax(white_image)
  coords = np.argwhere(mask)
  # Bounding box of kept pixels.
  x_min, y_min = coords.min(axis = 0)
  x_max, y_max = coords.max(axis = 0)
-  return flat[x_min - 10 : x_max + 10, y_min - 10 : y_max + 10]
+  return white_image[x_min - 10 : x_max + 10, y_min - 10 : y_max + 10]
 def imageBinarization(flattened_image):

--- a/bt5/erp5_receipt_recognition/ExtensionTemplateItem/portal_components/extension.erp5.ReceiptRecognition.xml
+++ b/bt5/erp5_receipt_recognition/ExtensionTemplateItem/portal_components/extension.erp5.ReceiptRecognition.xml
@@ -45,10 +45,7 @@
        <item>
            <key> <string>text_content_warning_message</string> </key>
            <value>
-              <tuple>
+              <tuple/>
-                <string>W:243,  2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
-                <string>W:272,  2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
-              </tuple>
            </value>
        </item>
        <item>

--- a/bt5/erp5_receipt_recognition/PortalTypeAllowedContentTypeTemplateItem/allowed_content_types.xml
+++ b/bt5/erp5_receipt_recognition/PortalTypeAllowedContentTypeTemplateItem/allowed_content_types.xml
 <allowed_content_type_list>
 <portal_type id="Receipt Recognition Module">
-  <item>Receipt</item>
+  <item>Receipt Recognition</item>
 </portal_type>
 </allowed_content_type_list>
\ No newline at end of file
--- a/bt5/erp5_receipt_recognition/PortalTypePropertySheetTemplateItem/property_sheet_list.xml
+++ b/bt5/erp5_receipt_recognition/PortalTypePropertySheetTemplateItem/property_sheet_list.xml
 <property_sheet_list>
- <portal_type id="Receipt">
+ <portal_type id="Receipt Recognition">
  <item>Document</item>
 </portal_type>
 </property_sheet_list>
\ No newline at end of file
--- a/bt5/erp5_receipt_recognition/PortalTypeTemplateItem/portal_types/Receipt.xml
+++ b/bt5/erp5_receipt_recognition/PortalTypeTemplateItem/portal_types/Receipt.xml
@@ -28,7 +28,7 @@
        </item>
        <item>
            <key> <string>id</string> </key>
-            <value> <string>Receipt</string> </value>
+            <value> <string>Receipt Recognition</string> </value>
        </item>
        <item>
            <key> <string>init_script</string> </key>

--- a/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/ReceiptConversion_convertImage.py
+++ b/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/ReceiptConversion_convertImage.py
@@ -2,17 +2,17 @@ image = context.getFollowUpValue()
 if image is not None:
  try:
    total = container.ReceiptRecognition_getReceiptValue(image.getData())
-    msg = "Total found"
+    message = "Total found"
    context.edit(
      total = total,
    )
  except ValueError as e:
-    msg = "Could not find value, please submit it manually"
+    message = "Could not find value, please submit it manually"
 else:
-  msg = "Cannot find the image"
+  message = "Cannot find the image"
 if batch_mode:
  return
 context.Base_redirect(
-  'view', keep_items = dict(portal_status_message=msg, my_source="test"))
+  'view', keep_items = dict(portal_status_message=message, my_source="test"))
--- a/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/ReceiptConversion_convertImage.xml
+++ b/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/ReceiptConversion_convertImage.xml
@@ -54,7 +54,7 @@
        </item>
        <item>
            <key> <string>id</string> </key>
-            <value> <string>ReceiptConversion_convertImage</string> </value>
+            <value> <string>ReceiptRecognition_convertImage</string> </value>
        </item>
      </dictionary>
    </pickle>

--- a/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view.xml
+++ b/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view.xml
@@ -89,7 +89,7 @@
        </item>
        <item>
            <key> <string>id</string> </key>
-            <value> <string>Receipt_view</string> </value>
+            <value> <string>ReceiptRecognition_view</string> </value>
        </item>
        <item>
            <key> <string>method</string> </key>

--- a/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view/my_follow_up_title.xml
+++ b/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view/my_follow_up_title.xml
--- a/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view/my_title.xml
+++ b/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view/my_title.xml
--- a/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view/my_total.xml
+++ b/bt5/erp5_receipt_recognition/SkinTemplateItem/portal_skins/erp5_receipt_recognition/Receipt_view/my_total.xml
--- a/bt5/erp5_receipt_recognition/bt/template_action_path_list
+++ b/bt5/erp5_receipt_recognition/bt/template_action_path_list
 Receipt Recognition Module | view
-Receipt | receipt_convert
+Receipt Recognition | receipt_convert
-Receipt | view
+Receipt Recognition | view
\ No newline at end of file
--- a/bt5/erp5_receipt_recognition/bt/template_portal_type_allowed_content_type_list
+++ b/bt5/erp5_receipt_recognition/bt/template_portal_type_allowed_content_type_list
-Receipt Recognition Module | Receipt
+Receipt Recognition Module | Receipt Recognition
\ No newline at end of file
--- a/bt5/erp5_receipt_recognition/bt/template_portal_type_id_list
+++ b/bt5/erp5_receipt_recognition/bt/template_portal_type_id_list
-Receipt
+Receipt Recognition
 Receipt Recognition Module
\ No newline at end of file
--- a/bt5/erp5_receipt_recognition/bt/template_portal_type_property_sheet_list
+++ b/bt5/erp5_receipt_recognition/bt/template_portal_type_property_sheet_list
-Receipt | Document
+Receipt Recognition | Document
\ No newline at end of file