Commit 6d662f06 authored by Noah Brackenbury's avatar Noah Brackenbury

made testing distribution random instead of fixed

parent cf8b8a03
...@@ -7,6 +7,7 @@ def WebMessage_testModel(self): ...@@ -7,6 +7,7 @@ def WebMessage_testModel(self):
from Products.ZSQLCatalog.SQLCatalog import NegatedQuery from Products.ZSQLCatalog.SQLCatalog import NegatedQuery
import datetime import datetime
import time import time
import random
# instantiate arrays # instantiate arrays
stopwords_arrays = {} stopwords_arrays = {}
...@@ -27,7 +28,7 @@ def WebMessage_testModel(self): ...@@ -27,7 +28,7 @@ def WebMessage_testModel(self):
query=NegatedQuery(Query(subject=None)), query=NegatedQuery(Query(subject=None)),
) )
for index, message in enumerate(training_messages): for index, message in enumerate(training_messages):
if index%5 == 0: if random.random() <= 0.2:
test_messages.append(message) test_messages.append(message)
else: else:
(language_arrays, tag_arrays) = message.WebMessage_trainOnWebMessage(language_arrays, tag_arrays, stopwords_arrays) (language_arrays, tag_arrays) = message.WebMessage_trainOnWebMessage(language_arrays, tag_arrays, stopwords_arrays)
...@@ -37,10 +38,12 @@ def WebMessage_testModel(self): ...@@ -37,10 +38,12 @@ def WebMessage_testModel(self):
m = {"job", "sponsorship", "academic", "contributor"} m = {"job", "sponsorship", "academic", "contributor"}
correct_tags = 0 correct_tags = 0
excess_tags = 0 excess_tags = 0
language_accuracy = 0
type_accuracy = 0 type_accuracy = 0
for message in test_messages: for message in test_messages:
suggested_subject_list = [] suggested_subject_list = []
# clean up header from contact form, if there is one # clean up header from contact form, if there is one
text = message.getTextContent() text = message.getTextContent()
line_array = [line for line in text.splitlines() if line.strip() != ''] line_array = [line for line in text.splitlines() if line.strip() != '']
...@@ -79,7 +82,7 @@ def WebMessage_testModel(self): ...@@ -79,7 +82,7 @@ def WebMessage_testModel(self):
tag_relevance[tags[t]] = tag_relevance[tags[t]] + word_relevance tag_relevance[tags[t]] = tag_relevance[tags[t]] + word_relevance
# apply tags # apply tags
average_relevance = sum(tag_relevance.values()) / float(len(tag_relevance.values())) average_relevance = sum(tag_relevance.values()) / (len(tag_relevance.values()))
for t in tag_relevance: for t in tag_relevance:
if tag_relevance[t] >= average_relevance*2: if tag_relevance[t] >= average_relevance*2:
suggested_subject_list.append(t) suggested_subject_list.append(t)
...@@ -93,28 +96,29 @@ def WebMessage_testModel(self): ...@@ -93,28 +96,29 @@ def WebMessage_testModel(self):
excess_tags += len(suggested_tags_set.difference(message_tags_set)) / len(suggested_tags_set) excess_tags += len(suggested_tags_set.difference(message_tags_set)) / len(suggested_tags_set)
correct_language = True correct_language = True
for language in language_arrays.keys(): for language in languages:
if language in message_tags_set.symmetric_difference(suggested_tags_set): if language in message_tags_set.symmetric_difference(suggested_tags_set):
correct_language = False correct_language = False
if correct_language == True: if correct_language == True:
type_accuracy += .5 language_accuracy += 1
if message_tags_set.intersection(sr): if message_tags_set.intersection(sr):
if suggested_tags_set.intersection(sr): if suggested_tags_set.intersection(sr):
type_accuracy += .5 type_accuracy += 1
elif message_tags_set.intersection(so): elif message_tags_set.intersection(so):
if suggested_tags_set.intersection(so): if suggested_tags_set.intersection(so):
type_accuracy += .5 type_accuracy += 1
else: else:
if not suggested_tags_set.intersection(sr) and not suggested_tags_set.intersection(so): if not suggested_tags_set.intersection(sr) and not suggested_tags_set.intersection(so):
type_accuracy += .5 type_accuracy += 1
correct_tags /= len(test_messages) correct_tags /= len(test_messages)
excess_tags /= len(test_messages) excess_tags /= len(test_messages)
language_accuracy /= len(test_messages)
type_accuracy /= len(test_messages) type_accuracy /= len(test_messages)
end_time = time.time() end_time = time.time()
uptime = end_time - start_time uptime = end_time - start_time
human_uptime = str(datetime.timedelta(seconds=int(uptime))) human_uptime = str(datetime.timedelta(seconds=int(uptime)))
return "Model tested in " + human_uptime + " showed a ticket_type/language accuracy of " + str(type_accuracy) + \ return "Model tested in " + human_uptime + " showed a language accuracy of " + str(language_accuracy) + \
" and identified " + str(correct_tags) + " of the tags correctly with " + str(excess_tags) + " excess tags." ", and a ticket_type accuracy of " + str(type_accuracy) + ", identifying " + str(correct_tags) + " of the tags correctly with " + str(excess_tags) + " excess tags."
\ No newline at end of file \ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment