Commit e9972efc authored by Douwe Maan's avatar Douwe Maan

Extract ReplyParser and AttachmentUploader from Receiver.

parent 3ff9d5c6
...@@ -7,7 +7,7 @@ class EmailReceiverWorker ...@@ -7,7 +7,7 @@ class EmailReceiverWorker
return unless Gitlab::ReplyByEmail.enabled? return unless Gitlab::ReplyByEmail.enabled?
begin begin
Gitlab::EmailReceiver.new(raw).execute Gitlab::Email::Receiver.new(raw).execute
rescue => e rescue => e
handle_failure(raw, e) handle_failure(raw, e)
end end
...@@ -22,20 +22,20 @@ class EmailReceiverWorker ...@@ -22,20 +22,20 @@ class EmailReceiverWorker
reason = nil reason = nil
case e case e
when Gitlab::EmailReceiver::SentNotificationNotFound when Gitlab::Email::Receiver::SentNotificationNotFound
reason = "We couldn't figure out what the email is in reply to. Please create your comment through the web interface." reason = "We couldn't figure out what the email is in reply to. Please create your comment through the web interface."
when Gitlab::EmailReceiver::EmptyEmailError when Gitlab::Email::Receiver::EmptyEmailError
can_retry = true can_retry = true
reason = "It appears that the email is blank. Make sure your reply is at the top of the email, we can't process inline replies." reason = "It appears that the email is blank. Make sure your reply is at the top of the email, we can't process inline replies."
when Gitlab::EmailReceiver::AutoGeneratedEmailError when Gitlab::Email::Receiver::AutoGeneratedEmailError
reason = "The email was marked as 'auto generated', which we can't accept. Please create your comment through the web interface." reason = "The email was marked as 'auto generated', which we can't accept. Please create your comment through the web interface."
when Gitlab::EmailReceiver::UserNotFoundError when Gitlab::Email::Receiver::UserNotFoundError
reason = "We couldn't figure out what user corresponds to the email. Please create your comment through the web interface." reason = "We couldn't figure out what user corresponds to the email. Please create your comment through the web interface."
when Gitlab::EmailReceiver::UserNotAuthorizedError when Gitlab::Email::Receiver::UserNotAuthorizedError
reason = "You are not allowed to respond to the thread you are replying to. If you believe this is in error, contact a staff member." reason = "You are not allowed to respond to the thread you are replying to. If you believe this is in error, contact a staff member."
when Gitlab::EmailReceiver::NoteableNotFoundError when Gitlab::Email::Receiver::NoteableNotFoundError
reason = "The thread you are replying to no longer exists, perhaps it was deleted? If you believe this is in error, contact a staff member." reason = "The thread you are replying to no longer exists, perhaps it was deleted? If you believe this is in error, contact a staff member."
when Gitlab::EmailReceiver::InvalidNote when Gitlab::Email::Receiver::InvalidNote
can_retry = true can_retry = true
reason = e.message reason = e.message
else else
......
module Gitlab
module Email
module AttachmentUploader
attr_accessor :message
def initialize(message)
@message = message
end
def execute(project)
attachments = []
message.attachments.each do |attachment|
tmp = Tempfile.new("gitlab-email-attachment")
begin
File.open(tmp.path, "w+b") { |f| f.write attachment.body.decoded }
file = {
tempfile: tmp,
filename: attachment.filename,
content_type: attachment.content_type
}
link = ::Projects::UploadService.new(project, file).execute
attachments << link if link
ensure
tmp.close!
end
end
attachments
end
end
end
end
# Taken mostly from Discourse's Email::HtmlCleaner
module Gitlab
module Email
# HtmlCleaner cleans up the extremely dirty HTML that many email clients
# generate by stripping out any excess divs or spans, removing styling in
# the process (which also makes the html more suitable to be parsed as
# Markdown).
class HtmlCleaner
# Elements to hoist all children out of
HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
# Node types to always delete
HTML_DELETE_ELEMENT_TYPES = [
Nokogiri::XML::Node::DTD_NODE,
Nokogiri::XML::Node::COMMENT_NODE,
]
# Private variables:
# @doc - nokogiri document
# @out - same as @doc, but only if trimming has occured
def initialize(html)
if html.is_a?(String)
@doc = Nokogiri::HTML(html)
else
@doc = html
end
end
class << self
# HtmlCleaner.trim(inp, opts={})
#
# Arguments:
# inp - Either a HTML string or a Nokogiri document.
# Options:
# :return => :doc, :string
# Specify the desired return type.
# Defaults to the type of the input.
# A value of :string is equivalent to calling get_document_text()
# on the returned document.
def trim(inp, opts={})
cleaner = HtmlCleaner.new(inp)
opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
if opts[:return] == :string
cleaner.output_html
else
cleaner.output_document
end
end
# HtmlCleaner.get_document_text(doc)
#
# Get the body portion of the document, including html, as a string.
def get_document_text(doc)
body = doc.xpath('//body')
if body
body.inner_html
else
doc.inner_html
end
end
end
def output_document
@out ||= begin
doc = @doc
trim_process_node doc
add_newlines doc
doc
end
end
def output_html
HtmlCleaner.get_document_text(output_document)
end
private
def add_newlines(doc)
# Replace <br> tags with a markdown \n
doc.xpath('//br').each do |br|
br.replace(new_linebreak_node doc, 2)
end
# Surround <p> tags with newlines, to help with line-wise postprocessing
# and ensure markdown paragraphs
doc.xpath('//p').each do |p|
p.before(new_linebreak_node doc)
p.after(new_linebreak_node doc, 2)
end
end
def new_linebreak_node(doc, count=1)
Nokogiri::XML::Text.new("\n" * count, doc)
end
def trim_process_node(node)
if should_hoist?(node)
hoisted = trim_hoist_element node
hoisted.each { |child| trim_process_node child }
elsif should_delete?(node)
node.remove
else
if children = node.children
children.each { |child| trim_process_node child }
end
end
node
end
def trim_hoist_element(element)
hoisted = []
element.children.each do |child|
element.before(child)
hoisted << child
end
element.remove
hoisted
end
def should_hoist?(node)
return false unless node.element?
HTML_HOIST_ELEMENTS.include? node.name
end
def should_delete?(node)
return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
return true if node.element? && node.name == 'head'
return true if node.text? && node.text.strip.blank?
false
end
end
end
end
# Inspired in great part by Discourse's Email::Receiver
module Gitlab
module Email
class Receiver
class ProcessingError < StandardError; end
class EmailUnparsableError < ProcessingError; end
class EmptyEmailError < ProcessingError; end
class UserNotFoundError < ProcessingError; end
class UserNotAuthorizedError < ProcessingError; end
class NoteableNotFoundError < ProcessingError; end
class AutoGeneratedEmailError < ProcessingError; end
class SentNotificationNotFound < ProcessingError; end
class InvalidNote < ProcessingError; end
def initialize(raw)
@raw = raw
end
def message
@message ||= Mail::Message.new(@raw)
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
raise EmailUnparsableError, e
end
def execute
raise SentNotificationNotFound unless sent_notification
raise EmptyEmailError if @raw.blank?
raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/
author = sent_notification.recipient
raise UserNotFoundError unless author
project = sent_notification.project
raise UserNotAuthorizedError unless author.can?(:create_note, project)
raise NoteableNotFoundError unless sent_notification.noteable
reply = ReplyParser.new(message).execute.strip
raise EmptyEmailError if reply.blank?
reply = add_attachments(reply)
note = create_note(reply)
unless note.persisted?
message = "The comment could not be created for the following reasons:"
note.errors.full_messages.each do |error|
message << "\n\n- #{error}"
end
raise InvalidNote, message
end
end
private
def reply_key
reply_key = nil
message.to.each do |address|
reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address)
break if reply_key
end
reply_key
end
def sent_notification
return nil unless reply_key
SentNotification.for(reply_key)
end
def add_attachments(reply)
attachments = AttachmentUploader.new(message).execute(project)
attachments.each do |link|
text = "[#{link[:alt]}](#{link[:url]})"
text.prepend("!") if link[:is_image]
reply << "\n\n#{text}"
end
end
def create_note(reply)
Notes::CreateService.new(
sent_notification.project,
sent_notification.recipient,
note: reply,
noteable_type: sent_notification.noteable_type,
noteable_id: sent_notification.noteable_id,
commit_id: sent_notification.commit_id
).execute
end
end
end
end
# Inspired in great part by Discourse's Email::Receiver
module Gitlab
module Email
class ReplyParser
attr_accessor :message
def initialize(message)
@message = message
end
def execute
body = select_body(message)
encoding = body.encoding
body = discourse_email_trimmer(body)
body = EmailReplyParser.parse_reply(body)
body.force_encoding(encoding).encode("UTF-8")
end
private
def select_body(message)
html = nil
text = nil
if message.multipart?
html = fix_charset(message.html_part)
text = fix_charset(message.text_part)
elsif message.content_type =~ /text\/html/
html = fix_charset(message)
end
# prefer plain text
return text if text
if html
body = HtmlCleaner.new(html).output_html
else
body = fix_charset(message)
end
# Certain trigger phrases that means we didn't parse correctly
if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
return ""
end
body
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part
def fix_charset(object)
return nil if object.nil?
if object.charset
object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
else
object.body.to_s
end
rescue
nil
end
REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
def discourse_email_trimmer(body)
lines = body.scrub.lines.to_a
range_end = 0
lines.each_with_index do |l, idx|
# This one might be controversial but so many reply lines have years, times and end with a colon.
# Let's try it and see how well it works.
break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
(l =~ /On \w+ \d+,? \d+,?.*wrote:/)
# Headers on subsequent lines
break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
# Headers on the same line
break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
range_end = idx
end
lines[0..range_end].join.strip
end
end
end
end
# Taken mostly from Discourse's Email::HtmlCleaner
module Gitlab
# HtmlCleaner cleans up the extremely dirty HTML that many email clients
# generate by stripping out any excess divs or spans, removing styling in
# the process (which also makes the html more suitable to be parsed as
# Markdown).
class EmailHtmlCleaner
# Elements to hoist all children out of
HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
# Node types to always delete
HTML_DELETE_ELEMENT_TYPES = [
Nokogiri::XML::Node::DTD_NODE,
Nokogiri::XML::Node::COMMENT_NODE,
]
# Private variables:
# @doc - nokogiri document
# @out - same as @doc, but only if trimming has occured
def initialize(html)
if html.is_a?(String)
@doc = Nokogiri::HTML(html)
else
@doc = html
end
end
class << self
# EmailHtmlCleaner.trim(inp, opts={})
#
# Arguments:
# inp - Either a HTML string or a Nokogiri document.
# Options:
# :return => :doc, :string
# Specify the desired return type.
# Defaults to the type of the input.
# A value of :string is equivalent to calling get_document_text()
# on the returned document.
def trim(inp, opts={})
cleaner = EmailHtmlCleaner.new(inp)
opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
if opts[:return] == :string
cleaner.output_html
else
cleaner.output_document
end
end
# EmailHtmlCleaner.get_document_text(doc)
#
# Get the body portion of the document, including html, as a string.
def get_document_text(doc)
body = doc.xpath('//body')
if body
body.inner_html
else
doc.inner_html
end
end
end
def output_document
@out ||= begin
doc = @doc
trim_process_node doc
add_newlines doc
doc
end
end
def output_html
EmailHtmlCleaner.get_document_text(output_document)
end
private
def add_newlines(doc)
# Replace <br> tags with a markdown \n
doc.xpath('//br').each do |br|
br.replace(new_linebreak_node doc, 2)
end
# Surround <p> tags with newlines, to help with line-wise postprocessing
# and ensure markdown paragraphs
doc.xpath('//p').each do |p|
p.before(new_linebreak_node doc)
p.after(new_linebreak_node doc, 2)
end
end
def new_linebreak_node(doc, count=1)
Nokogiri::XML::Text.new("\n" * count, doc)
end
def trim_process_node(node)
if should_hoist?(node)
hoisted = trim_hoist_element node
hoisted.each { |child| trim_process_node child }
elsif should_delete?(node)
node.remove
else
if children = node.children
children.each { |child| trim_process_node child }
end
end
node
end
def trim_hoist_element(element)
hoisted = []
element.children.each do |child|
element.before(child)
hoisted << child
end
element.remove
hoisted
end
def should_hoist?(node)
return false unless node.element?
HTML_HOIST_ELEMENTS.include? node.name
end
def should_delete?(node)
return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
return true if node.element? && node.name == 'head'
return true if node.text? && node.text.strip.blank?
false
end
end
end
# Inspired in great part by Discourse's Email::Receiver
module Gitlab
class EmailReceiver
class ProcessingError < StandardError; end
class EmailUnparsableError < ProcessingError; end
class EmptyEmailError < ProcessingError; end
class UserNotFoundError < ProcessingError; end
class UserNotAuthorizedError < ProcessingError; end
class NoteableNotFoundError < ProcessingError; end
class AutoGeneratedEmailError < ProcessingError; end
class SentNotificationNotFound < ProcessingError; end
class InvalidNote < ProcessingError; end
def initialize(raw)
@raw = raw
end
def message
@message ||= Mail::Message.new(@raw)
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e
raise EmailUnparsableError, e
end
def execute
raise SentNotificationNotFound unless sent_notification
raise EmptyEmailError if @raw.blank?
raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/
author = sent_notification.recipient
raise UserNotFoundError unless author
project = sent_notification.project
raise UserNotAuthorizedError unless author.can?(:create_note, project)
raise NoteableNotFoundError unless sent_notification.noteable
body = parse_body(message)
upload_attachments.each do |link|
body << "\n\n#{link}"
end
note = Notes::CreateService.new(
project,
author,
note: body,
noteable_type: sent_notification.noteable_type,
noteable_id: sent_notification.noteable_id,
commit_id: sent_notification.commit_id
).execute
unless note.persisted?
message = "The comment could not be created for the following reasons:"
note.errors.full_messages.each do |error|
message << "\n\n- #{error}"
end
raise InvalidNote, message
end
end
def parse_body(message)
body = select_body(message)
encoding = body.encoding
raise EmptyEmailError if body.strip.blank?
body = discourse_email_trimmer(body)
raise EmptyEmailError if body.strip.blank?
body = EmailReplyParser.parse_reply(body)
raise EmptyEmailError if body.strip.blank?
body.force_encoding(encoding).encode("UTF-8")
end
private
def reply_key
reply_key = nil
message.to.each do |address|
reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address)
break if reply_key
end
reply_key
end
def sent_notification
return nil unless reply_key
SentNotification.for(reply_key)
end
def select_body(message)
html = nil
text = nil
if message.multipart?
html = fix_charset(message.html_part)
text = fix_charset(message.text_part)
elsif message.content_type =~ /text\/html/
html = fix_charset(message)
end
# prefer plain text
return text if text
if html
body = EmailHtmlCleaner.new(html).output_html
else
body = fix_charset(message)
end
# Certain trigger phrases that means we didn't parse correctly
if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
raise EmptyEmailError
end
body
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part
def fix_charset(object)
return nil if object.nil?
if object.charset
object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
else
object.body.to_s
end
rescue
nil
end
REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
def discourse_email_trimmer(body)
lines = body.scrub.lines.to_a
range_end = 0
lines.each_with_index do |l, idx|
# This one might be controversial but so many reply lines have years, times and end with a colon.
# Let's try it and see how well it works.
break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
(l =~ /On \w+ \d+,? \d+,?.*wrote:/)
# Headers on subsequent lines
break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
# Headers on the same line
break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
range_end = idx
end
lines[0..range_end].join.strip
end
def upload_attachments
attachments = []
message.attachments.each do |attachment|
tmp = Tempfile.new("gitlab-email-attachment")
begin
File.open(tmp.path, "w+b") { |f| f.write attachment.body.decoded }
file = {
tempfile: tmp,
filename: attachment.filename,
content_type: attachment.content_type
}
link = ::Projects::UploadService.new(sent_notification.project, file).execute
if link
text = "[#{link[:alt]}](#{link[:url]})"
text.prepend("!") if link[:is_image]
attachments << text
end
ensure
tmp.close!
end
end
attachments
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment