Commit d8651adb authored by francois's avatar francois

erp5_receipt_recognition Add function to load model from datastream

This commit allow the erp5_receipt_recogniton module to unpickle its
model from wendelin datastream.

Loading model from datastream is a bit slower.
parent e5454815
......@@ -14,9 +14,10 @@ from matplotlib import pylab
import matplotlib.image as mpimg
import scipy.stats as stats
import re
import cPickle
import ocrolib
def getReceiptValue(self, image_data):
def getReceiptValue(self, image_data, model_name = "en-default.pyrnn", from_stream=False):
"""
Function called from an erp5 script through externalMethod
that take an image and its name and save its binarized
......@@ -29,7 +30,7 @@ def getReceiptValue(self, image_data):
- image_data:
base64 representation of the image to analyse
@return:
- ret: float
- anon: float
Represent total value paid on the receipt
----------------------------
This function return the total value of the receipt in euros.
......@@ -39,8 +40,36 @@ def getReceiptValue(self, image_data):
line_list, cleared = getLinesFromPicture(image_as_array)
# Start the neural network
network, lnorm = initRnnModel()
if not from_stream:
network, lnorm = initRnnModel(model_name)
else:
network, lnorm = getRnnModelFromDataStream(self, model_name)
return findReceiptValue(line_list, cleared, network, lnorm)
def findReceiptValue(line_list, cleared, network, lnorm):
"""
Function that run the neural network through the receipt and extract
meaningfull value
-----------------------------
@args:
- lines: array list
Represent lines of text that will be extracted
from the image
- cleared:2D array
Represent binarized image cropped and cleaned,
from which we will extract text lines
- network: lstm object
Represent the trained neural net
- lnorm: method from lstm object
Represent the size of the lstm object. Is used to scale the objects
to recognize from original size to the average network object.
@return:
- anon: float
Represent total value paid on the receipt
-----------------------------
This function can bemodified to add more field to detect. It might be
possible to run a classification neural net on the result.
"""
value_list = []
tofind = r"(EUR)|€|(TOT)"
for _, line in enumerate(line_list):
......@@ -48,15 +77,32 @@ def getReceiptValue(self, image_data):
# Corner case: he dewarping function from the normalizer fail
# sometimes on empty lines. Can be corrected with better segmentation
try:
evaluate = getStringFromImage(binline, lnorm, network)
evaluate = getStringFromImage(binline, lnorm, network)
if re.search(tofind, evaluate.upper()):
number = re.findall(r"\d+[\.|,]\d\d", evaluate)
value_list += [float(char.replace(',', '.')) for char in number]
except ValueError:
pass
return round(max(value_list), 2)
def getRnnModelFromDataStream(self, model_name="en-default.pyrnn"):
"""
This function load a neural network from a dataStream
----------------------------
@args:
- model_name: string, default: en-default.pyrnn
Id of the object in data_stream_module that contain the rnn model
@return:
- network: lstm object
Represent the trained neural net
- lnorm: method from lstm object
Represent the size of the lstm object. Is used to scale the objects
to recognize from original size to the average network object.
----------------------------
"""
network = cPickle.loads(self.data_stream_module[model_name].getData())
lnorm = getattr(network, "lnorm", None)
return network, lnorm
def initRnnModel(model_name = "en-default.pyrnn"):
"""
......
......@@ -46,8 +46,8 @@
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple>
<string>W:243, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
<string>W:272, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
<string>W:289, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
<string>W:318, 2: Attempting to unpack a non-sequence defined at line 181 of scipy.ndimage.measurements (unpacking-non-sequence)</string>
</tuple>
</value>
</item>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment