From 752482f5f6d73dad962d8d55d5104e793f841232 Mon Sep 17 00:00:00 2001
From: Nicolas Delaby <nicolas@nexedi.com>
Date: Thu, 15 Sep 2011 07:49:01 +0200
Subject: [PATCH] Add test to check safe_html conversion of flawed attachment

---
 product/ERP5/tests/testCRM.py                 |  36 +++
 product/ERP5/tests/test_data/broken_html.html | 248 ++++++++++++++++++
 2 files changed, 284 insertions(+)
 create mode 100644 product/ERP5/tests/test_data/broken_html.html

diff --git a/product/ERP5/tests/testCRM.py b/product/ERP5/tests/testCRM.py
index eee69c6e7b..5d1ad88e8e 100644
--- a/product/ERP5/tests/testCRM.py
+++ b/product/ERP5/tests/testCRM.py
@@ -39,6 +39,10 @@ from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase,\
 from Products.ERP5OOo.tests.testIngestion import FILENAME_REGULAR_EXPRESSION
 from Products.ERP5OOo.tests.testIngestion import REFERENCE_REGULAR_EXPRESSION
 from Products.ERP5Type.tests.backportUnittest import expectedFailure
+from email.MIMEMultipart import MIMEMultipart
+from email.MIMEBase import MIMEBase
+from email.MIMEText import MIMEText
+from email import Encoders
 
 def makeFilePath(name):
   return os.path.join(os.path.dirname(__file__), 'test_data', 'crm_emails', name)
@@ -784,6 +788,38 @@ class TestCRMMailIngestion(BaseTestCRM):
                       mixed_document.getTextContent())
     self.assertEquals('text/plain', mixed_document.getContentType())
 
+  def test_flawed_html_attachment(self):
+    portal_type = 'Mail Message'
+    event = self.portal.getDefaultModule(portal_type).newContent(portal_type=portal_type)
+    # build message content with flwd attachment
+    plain_text_message = 'You can read this'
+    html_filename = 'broken_html.html'
+    file_path = '%s/test_data/%s' % (__file__.rstrip('c').replace(__name__+'.py', ''),
+                                     html_filename,)
+    html_message = open(file_path, 'r').read()
+    message = MIMEMultipart('alternative')
+    message.attach(MIMEText('text plain content', _charset='utf-8'))
+    part = MIMEBase('text', 'html')
+    part.set_payload(html_message)
+    Encoders.encode_base64(part)
+
+    part.add_header('Content-Disposition', 'attachment',
+                    filename=html_filename)
+    part.add_header('Content-ID', '<%s>' % \
+                    ''.join(['%s' % ord(i) for i in html_filename]))
+    message.attach(part)
+    event.setData(message.as_string())
+    transaction.commit()
+    self.tic()
+    self.assertTrue('html' in event.getTextContent())
+    self.assertEquals(len(event.getAttachmentInformationList()), 2)
+    self.assertTrue(bool(event.getAttachmentData(1)))
+    self.assertTrue(bool(event.getAttachmentData(2)))
+
+
+
+
+
 
 ## TODO:
 ##
diff --git a/product/ERP5/tests/test_data/broken_html.html b/product/ERP5/tests/test_data/broken_html.html
new file mode 100644
index 0000000000..a75443b9f0
--- /dev/null
+++ b/product/ERP5/tests/test_data/broken_html.html
@@ -0,0 +1,248 @@
+<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:p="urn:schemas-microsoft-com:office:powerpoint" xmlns:a="urn:schemas-microsoft-com:office:access" xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882" xmlns:rs="urn:schemas-microsoft-com:rowset" xmlns:z="#RowsetSchema" xmlns:b="urn:schemas-microsoft-com:office:publisher" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet" xmlns:odc="urn:schemas-microsoft-com:office:odc" xmlns:oa="urn:schemas-microsoft-com:office:activation" xmlns:html="http://www.w3.org/TR/REC-html40" xmlns:q="http://schemas.xmlsoap.org/soap/envelope/" xmlns:rtc="http://microsoft.com/officenet/conferencing" xmlns:D="DAV:" xmlns:Repl="http://schemas.microsoft.com/repl/" xmlns:mt="http://schemas.microsoft.com/sharepoint/soap/meetings/" xmlns:x2="http://schemas.microsoft.com/office/excel/2003/xml" xmlns:ppda="http://www.passport.com/NameSpace.xsd" xmlns:ois="http://schemas.microsoft.com/sharepoint/soap/ois/" xmlns:dir="http://schemas.microsoft.com/sharepoint/soap/directory/" xmlns:ds="http://www.w3.org/2000/09/xmldsig#" xmlns:dsp="http://schemas.microsoft.com/sharepoint/dsp" xmlns:udc="http://schemas.microsoft.com/data/udc" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:sub="http://schemas.microsoft.com/sharepoint/soap/2002/1/alerts/" xmlns:ec="http://www.w3.org/2001/04/xmlenc#" xmlns:sp="http://schemas.microsoft.com/sharepoint/" xmlns:sps="http://schemas.microsoft.com/sharepoint/soap/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:udcs="http://schemas.microsoft.com/data/udc/soap" xmlns:udcxf="http://schemas.microsoft.com/data/udc/xmlfile" xmlns:udcp2p="http://schemas.microsoft.com/data/udc/parttopart" xmlns:wf="http://schemas.microsoft.com/sharepoint/soap/workflow/" xmlns:dsss="http://schemas.microsoft.com/office/2006/digsig-setup" xmlns:dssi="http://schemas.microsoft.com/office/2006/digsig" xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" xmlns:mver="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns:mrels="http://schemas.openxmlformats.org/package/2006/relationships" xmlns:spwp="http://microsoft.com/sharepoint/webpartpages" xmlns:ex12t="http://schemas.microsoft.com/exchange/services/2006/types" xmlns:ex12m="http://schemas.microsoft.com/exchange/services/2006/messages" xmlns:pptsl="http://schemas.microsoft.com/sharepoint/soap/SlideLibrary/" xmlns:spsl="http://microsoft.com/webservices/SharePointPortalServer/PublishedLinksService" xmlns:Z="urn:schemas-microsoft-com:" xmlns:st="&#1;" xmlns="http://www.w3.org/TR/REC-html40">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<meta name="Generator" content="Microsoft Word 12 (filtered medium)">
+<!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
+o\:* {behavior:url(#default#VML);}
+w\:* {behavior:url(#default#VML);}
+.shape {behavior:url(#default#VML);}
+</style><![endif]-->
+<title>One-Time</title>
+<style><!--
+/* Font Definitions */
+@font-face
+	{font-family:"Cambria Math";
+	panose-1:2 4 5 3 5 4 6 3 2 4;}
+@font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+@font-face
+	{font-family:Tahoma;
+	panose-1:2 11 6 4 3 5 4 4 2 4;}
+/* Style Definitions */
+p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin:0cm;
+	margin-bottom:.0001pt;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+a:link, span.MsoHyperlink
+	{mso-style-priority:99;
+	color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{mso-style-priority:99;
+	color:purple;
+	text-decoration:underline;}
+p
+	{mso-style-priority:99;
+	mso-margin-top-alt:auto;
+	margin-right:0cm;
+	mso-margin-bottom-alt:auto;
+	margin-left:0cm;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
+	{mso-style-priority:99;
+	mso-style-link:"Balloon Text Char";
+	margin:0cm;
+	margin-bottom:.0001pt;
+	font-size:8.0pt;
+	font-family:"Tahoma","sans-serif";}
+p.style1, li.style1, div.style1
+	{mso-style-name:style1;
+	mso-margin-top-alt:auto;
+	margin-right:0cm;
+	mso-margin-bottom-alt:auto;
+	margin-left:0cm;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+p.style2, li.style2, div.style2
+	{mso-style-name:style2;
+	mso-margin-top-alt:auto;
+	margin-right:0cm;
+	mso-margin-bottom-alt:auto;
+	margin-left:0cm;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+p.style3, li.style3, div.style3
+	{mso-style-name:style3;
+	mso-margin-top-alt:auto;
+	margin-right:0cm;
+	mso-margin-bottom-alt:auto;
+	margin-left:0cm;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+p.style4, li.style4, div.style4
+	{mso-style-name:style4;
+	mso-margin-top-alt:auto;
+	margin-right:0cm;
+	mso-margin-bottom-alt:auto;
+	margin-left:0cm;
+	font-size:18.0pt;
+	font-family:"Times New Roman","serif";}
+span.EmailStyle22
+	{mso-style-type:personal-reply;
+	font-family:"Calibri","sans-serif";
+	color:#1F497D;}
+span.BalloonTextChar
+	{mso-style-name:"Balloon Text Char";
+	mso-style-priority:99;
+	mso-style-link:"Balloon Text";
+	font-family:"Tahoma","sans-serif";}
+.MsoChpDefault
+	{mso-style-type:export-only;
+	font-size:10.0pt;}
+@page WordSection1
+	{size:612.0pt 792.0pt;
+	margin:72.0pt 72.0pt 72.0pt 72.0pt;}
+div.WordSection1
+	{page:WordSection1;}
+--></style><!--[if gte mso 9]><xml>
+<o:shapedefaults v:ext="edit" spidmax="2050" />
+</xml><![endif]--><!--[if gte mso 9]><xml>
+<o:shapelayout v:ext="edit">
+<o:idmap v:ext="edit" data="1" />
+</o:shapelayout></xml><![endif]-->
+</head>
+<body lang="EN-IE" link="blue" vlink="purple">
+<div class="WordSection1">
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Hi,<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"><o:p>&nbsp;</o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">n
+ <sup>th</sup> .<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"><o:p>&nbsp;</o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">p;
+<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"><o:p>&nbsp;</o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">nt.<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"><o:p>&nbsp;</o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">on.<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"><o:p>&nbsp;</o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Regards,<o:p></o:p></span></p>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"><o:p>&nbsp;</o:p></span></p>
+<div>
+<p class="MsoNormal" style="margin-bottom:10.0pt;line-height:115%"><span style="font-size:10.0pt;line-height:115%;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#1F497D"><br>
+</span><span style="font-size:8.0pt;line-height:115%;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#1F497D">Ator<br>
+<br>
+</span><span style="font-size:10.0pt;line-height:115%;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#1F497D"><br>
+</span><span style="font-size:8.0pt;line-height:115%;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:red">_______________________________________________</span><span style="font-size:8.0pt;line-height:115%;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#1F497D"><br>
+Cse<br>
+oad<br>
+<br>
+
+e<br>
+dqwodj;j;jk;lj
+<img width="288" height="41" id="Picture_x0020_1" src="cid:image001.jpg@01CC7129.3570BB40"><br>
+<br>
+<o:p></o:p></span></p>
+</div>
+<p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"><o:p>&nbsp;</o:p></span></p>
+<div>
+<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
+<p class="MsoNormal"><b><span lang="EN-US" style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">From:</span></b><span lang="EN-US" style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;"> ni[]
+<br>
+<b>Sent:</b> Th:00<br>
+<b>To:</b> ne<br>
+<b>Subject:</b> O<o:p></o:p></span></p>
+</div>
+</div>
+<p class="MsoNormal"><o:p>&nbsp;</o:p></p>
+<div>
+<p class="MsoNormal">One-Time<o:p></o:p></p>
+</div>
+<p>Thank you<o:p></o:p></p>
+<div id="NewUser">
+<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" width="600" style="width:450.0pt">
+<tbody>
+<tr style="height:13.5pt">
+<td width="212" style="width:159.0pt;padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal"><b>r:<o:p></o:p></b></p>
+</td>
+<td style="padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal">04<o:p></o:p></p>
+</td>
+</tr>
+<tr style="height:13.5pt">
+<td width="212" style="width:159.0pt;padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal"><b>r:<o:p></o:p></b></p>
+</td>
+<td style="padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal">1<o:p></o:p></p>
+</td>
+</tr>
+<tr style="height:13.5pt">
+<td width="212" style="width:159.0pt;padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal"><b>:<o:p></o:p></b></p>
+</td>
+<td style="padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal">7<o:p></o:p></p>
+</td>
+</tr>
+<tr>
+<td width="212" style="width:159.0pt;padding:0cm 0cm 0cm 0cm">
+<p class="MsoNormal"><b>PaTts:<o:p></o:p></b></p>
+</td>
+<td style="padding:0cm 0cm 0cm 0cm">
+<p class="MsoNormal">C0<o:p></o:p></p>
+</td>
+</tr>
+<tr style="height:13.5pt">
+<td width="212" style="width:159.0pt;padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal"><b>td:<o:p></o:p></b></p>
+</td>
+<td style="padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal">€3.<o:p></o:p></p>
+</td>
+</tr>
+<tr style="height:13.5pt">
+<td width="212" style="width:159.0pt;padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal"><b>Pt:<o:p></o:p></b></p>
+</td>
+<td style="padding:0cm 0cm 0cm 0cm;height:13.5pt">
+<p class="MsoNormal">081<o:p></o:p></p>
+</td>
+</tr>
+<tr style="height:10.5pt">
+<td width="212" style="width:159.0pt;padding:0cm 0cm 0cm 0cm;height:10.5pt"></td>
+<td style="padding:0cm 0cm 0cm 0cm;height:10.5pt"></td>
+</tr>
+</tbody>
+</table>
+<p class="MsoNormal"><o:p>&nbsp;</o:p></p>
+<div>
+<p class="MsoNormal"><b>Th0.<o:p></o:p></b></p>
+</div>
+</div>
+<p class="MsoNormal"><o:p>&nbsp;</o:p></p>
+<div>
+<p class="MsoNormal">Sho
+<o:p></o:p></p>
+</div>
+<div>
+<p class="MsoNormal"><a href="here">here</a><o:p></o:p></p>
+</div>
+<p class="MsoNormal" style="margin-bottom:12.0pt"><o:p>&nbsp;</o:p></p>
+</div>
+
+<FONT size=2 face=Arial> 
+  €6,000  
+w €10,9. “Wr’s cl” 
+yofawne rs– l stda ru 
+</FONT><A href="htsoe"><FONT size=2 
+face=Arial>wsr.</FONT></A> 
+
+<P><FONT size=2 face=Arial></FONT>&nbsp;</P>
+<P><FONT size=2 face=Arial>Bu a  <STRONG><A 
+href="htt/w.fces"><FONT 
+color=#000080>eoo</FONT></A></STRONG></FONT> </P>
+
+<HR>
+
+<P>Thsssent</P>
+<HR>
+
+
+<FONT face=Arial color=#000080 size=2>WsrMtr
+24<BR></FONT>
+</body>
+</html>
-- 
2.30.9