From 8c01b07429a0d122ea32e633006819f567d52df4 Mon Sep 17 00:00:00 2001
From: Nicolas Delaby <nicolas@nexedi.com>
Date: Fri, 16 Jan 2009 15:19:47 +0000
Subject: [PATCH] Improve asXML   - Add new type 'None' to handle None values  
 - Use XML Marshaller for list types instead of concatenate them with '@@@'  
 - Use XML API to generate tree (avoid indentation issues, and produce a
 lightweight XML)

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@25154 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/ERP5Type/Base.py            |   4 +-
 product/ERP5Type/Core/Folder.py     |   4 +-
 product/ERP5Type/XMLExportImport.py | 176 +++++++++++-----------------
 3 files changed, 72 insertions(+), 112 deletions(-)

diff --git a/product/ERP5Type/Base.py b/product/ERP5Type/Base.py
index 5fdbe65ca0..6243344ea6 100644
--- a/product/ERP5Type/Base.py
+++ b/product/ERP5Type/Base.py
@@ -2817,11 +2817,11 @@ class Base( CopyContainer,
     self.reindexObject(*args, **kw)
 
   security.declareProtected( Permissions.AccessContentsInformation, 'asXML' )
-  def asXML(self, ident=0):
+  def asXML(self):
     """
         Generate an xml text corresponding to the content of this object
     """
-    return Base_asXML(self, ident=ident)
+    return Base_asXML(self)
 
   # Optimized Menu System
   security.declarePublic('allowedContentTypes')
diff --git a/product/ERP5Type/Core/Folder.py b/product/ERP5Type/Core/Folder.py
index 9feeb2377b..ad518ec840 100644
--- a/product/ERP5Type/Core/Folder.py
+++ b/product/ERP5Type/Core/Folder.py
@@ -1224,11 +1224,11 @@ class Folder(CopyContainer, CMFBTreeFolder, CMFHBTreeFolder, Base, FolderMixIn,
     return error_list
 
   security.declareProtected(Permissions.AccessContentsInformation, 'asXML')
-  def asXML(self, ident=0):
+  def asXML(self):
     """
         Generate an xml text corresponding to the content of this object
     """
-    return Folder_asXML(self, ident=ident)
+    return Folder_asXML(self)
 
   # Optimized Menu System
   security.declarePublic('getVisibleAllowedContentTypeList')
diff --git a/product/ERP5Type/XMLExportImport.py b/product/ERP5Type/XMLExportImport.py
index 5b555bff24..f17d2c9e8c 100644
--- a/product/ERP5Type/XMLExportImport.py
+++ b/product/ERP5Type/XMLExportImport.py
@@ -33,7 +33,9 @@ from email.MIMEBase import MIMEBase
 from email import Encoders
 from pickle import Pickler, EMPTY_DICT, MARK, DICT, PyStringMap, DictionaryType
 from xml.sax.saxutils import escape, unescape
-
+from lxml import etree
+from lxml.etree import Element, SubElement
+from xml.marshal.generic import dumps as marshaler
 from zLOG import LOG
 
 class OrderedPickler(Pickler):
@@ -62,152 +64,110 @@ def dumps(obj, protocol=None, bin=None):
     OrderedPickler(file, protocol, bin).dump(obj)
     return file.getvalue()
 
-def Base_asXML(object, ident=0):
+def Base_asXML(object, root=None):
   """
       Generate an xml text corresponding to the content of this object
   """
   self = object
-  xml = ''
-  if ident==0:
-    xml += '<erp5>'
+  return_as_object = True
+  if root is None:
+    return_as_object = False
+    root = Element('erp5')
   #LOG('asXML',0,'Working on: %s' % str(self.getPhysicalPath()))
-  ident_string = '' # This is used in order to have the ident incremented
-                    # for every sub-object
-  for i in range(0,ident):
-    ident_string += ' '
-  xml += ident_string + '<object id=\"%s\" portal_type=\"%s\">\n' % \
-                        (self.getId(),self.portal_type)
+
+  object = SubElement(root, 'object',
+                      attrib=dict(id=self.getId(), portal_type=self.getPortalType()))
 
   # We have to find every property
   for prop_id in self.propertyIds():
     # In most case, we should not synchronize acquired properties
-    prop = ''
-    #if not prop.has_key('acquisition_base_category') \
-    #   and prop['id'] != 'categories_list' and prop['id'] != 'uid':
-    if prop_id not in ('uid','workflow_history'):
-      prop_type = self.getPropertyType(prop_id)
-      xml_prop_type = 'type="' + prop_type + '"'
-      #try:
+    if prop_id not in ('uid', 'workflow_history'):
       value = self.getProperty(prop_id)
-      #except AttributeError:
-      #  value=None
-
-      xml += ident_string + '  <%s %s>' %(prop_id,xml_prop_type)
       if value is None:
-        xml+='None'
-#       elif prop_type in ('image','file','document'):
-#         LOG('asXML',0,'value: %s' % str(value))
-#         # This property is binary and should be converted with mime
-#         msg = MIMEBase('application','octet-stream')
-#         msg.set_payload(value.getvalue())
-#         Encoders.encode_base64(msg)
-#         ascii_data = msg.get_payload()
-#         ascii_data = ascii_data.replace('\n','@@@\n')
-#         xml+=ascii_data
-      elif prop_type in ('object',):
+        prop_type = 'None'
+      else:
+        prop_type = self.getPropertyType(prop_id)
+      sub_object = SubElement(object, prop_id, attrib=dict(type=prop_type))
+      if prop_type in ('object',):
         # We may have very long lines, so we should split
         value = aq_base(value)
         value = dumps(value)
-        msg = MIMEBase('application','octet-stream')
+        msg = MIMEBase('application', 'octet-stream')
         msg.set_payload(value)
         Encoders.encode_base64(msg)
         ascii_data = msg.get_payload()
-        ascii_data = ascii_data.replace('\n','@@@\n')
-        xml+=ascii_data
-      elif self.getPropertyType(prop_id) in ['lines','tokens']:
-        i = 1
-        for line in value:
-          xml += '%s' % line
-          if i<len(value):
-            xml+='@@@' # XXX very bad hack, must find something better
-          i += 1
-      elif self.getPropertyType(prop_id) in ('text','string',):
-        #xml += str(value).replace('\n','@@@')
-        value = str(value).replace('\n','@@@')
-        xml += escape(value)
-      else:
-        xml+= str(value)
-      xml += '</%s>\n' % prop_id
+        sub_object.text = ascii_data
+      elif prop_type in ('lines', 'tokens',):
+        value_as_node = etree.XML(marshaler(value))
+        sub_object.append(value_as_node)
+      elif prop_type in ('text', 'string',):
+        sub_object.text = unicode(escape(value), 'utf-8')
+      elif prop_type != 'None':
+        sub_object.text = str(value)
 
   # We have to describe the workflow history
-  if hasattr(self,'workflow_history'):
+  if getattr(self, 'workflow_history', None) is not None:
     workflow_list = self.workflow_history
     workflow_list_keys = workflow_list.keys()
     workflow_list_keys.sort() # Make sure it is sorted
 
     for workflow_id in workflow_list_keys:
-      #xml += ident_string + '    <workflow_history id=\"%s\">\n' % workflow_id
-      for workflow_action in workflow_list[workflow_id]: # It is already sorted
-        xml += ident_string + '  <workflow_action id=\"%s\">\n'  % workflow_id
+      for workflow_action in workflow_list[workflow_id]:
+        workflow_node = SubElement(object, 'workflow_action',
+                                   attrib=dict(id=workflow_id))
         worfklow_variable_list = workflow_action.keys()
         worfklow_variable_list.sort()
-        for workflow_variable in worfklow_variable_list: # Make sure it is sorted
+        for workflow_variable in worfklow_variable_list:
           variable_type = "string" # Somewhat bad, should find a better way
-          if workflow_variable.find('time')>= 0:
+          if workflow_variable.find('time') >= 0:
             variable_type = "date"
-          if workflow_variable.find('language_revs')>= 0: # XXX specific to cps
+          if workflow_variable.find('language_revs') >= 0: # XXX specific to cps
             variable_type = "dict"
-          xml += ident_string + '    <%s type=\"%s\">%s' % (workflow_variable,
-                              variable_type,workflow_action[workflow_variable])
-          xml += '</%s>\n' % workflow_variable
-        xml += ident_string + '  </workflow_action>\n'
-      #xml += ident_string + '    </workflow_history>\n'
-    #xml += ident_string + '  </workflow_history>\n'
+          variable_node = SubElement(workflow_node, workflow_variable,
+                                     attrib=dict(type=variable_type))
+          variable_node_text = str(workflow_action[workflow_variable])
+          variable_node.text = unicode(variable_node_text, 'utf-8')
 
   # We should now describe security settings
   for user_role in self.get_local_roles():
-    xml += ident_string + '  <local_role id="%s" type="tokens">' % user_role[0]
-    xml += '@@@'.join(user_role[1])
-    xml += '</local_role>\n'
-  if hasattr(self,'get_local_permissions'):
+    local_role_node = SubElement(object, 'local_role',
+                                 attrib=dict(id=user_role[0], type='tokens'))
+    role_list_node = etree.XML(marshaler(user_role[1]))
+    local_role_node.append(role_list_node)
+  if getattr(self, 'get_local_permissions', None) is not None:
     for user_permission in self.get_local_permissions():
-      xml += ident_string + '  <local_permission id="%s" type="tokens">' % user_permission[0]
-      xml += '@@@'.join(user_permission[1])
-      xml += '</local_permission>\n'
+      local_permission_node = SubElement(object, 'local_permission',
+                              attrib=dict(id=user_permission[0], type='tokens'))
+      permission_list_node = etree.XML(marshaler(user_permission[1]))
+      local_permission_node.append(permission_list_node)
   # Sometimes theres is roles specified for groups, like with CPS
-  if hasattr(self,'get_local_group_roles'):
+  if getattr(self, 'get_local_group_roles', None) is not None:
     for group_role in self.get_local_group_roles():
-      xml += ident_string + '  <local_group id="%s" type="tokens">' % group_role[0]
-      xml += '@@@'.join(group_role[1])
-      xml += '</local_group>\n'
-
-  # We have finished to generate the xml
-  xml += ident_string + '</object>\n'
-  if ident==0:
-    xml += '</erp5>'
-  # Now convert the string as unicode
-  if type(xml) is type(u"a"):
-    xml_unicode = xml
-  else:
-    try:
-      xml_unicode = unicode(xml,encoding='UTF-8')
-    except UnicodeDecodeError:
-      LOG('Base_asXML, We should have an UTF-8 encoding,',0,'but we have ISO-8859-1')
-      xml_unicode = unicode(xml,encoding='ISO-8859-1')
-  # This following character is quite strange, and parseString
-  # fails, but when printed, it show a '\n' and a space, so I replace
-  xml_unicode = xml_unicode.replace('\x0c','\n ')
-  return xml_unicode.encode('utf-8')
+      local_group_node = SubElement(object, 'local_group',
+                                    attrib=dict(id=group_role[0], type='tokens'))
+      group_role_node = etree.XML(marshaler(group_role[1]))
+      local_group_node.append(group_role_node)
+  if return_as_object:
+    return root
+  return etree.tostring(root, encoding='utf-8',
+                        xml_declaration=True, pretty_print=True)
 
-def Folder_asXML(object, ident=0):
+def Folder_asXML(object):
   """
       Generate an xml text corresponding to the content of this object
   """
+  from Products.ERP5Type.Base import Base
   self = object
-  xml = ''
-  xml += Base_asXML(self, ident=ident)
-  xml = xml[:xml.rfind('</object>')]
+  root = Element('erp5')
+  Base_asXML(self, root=root)
+  root_node = root.xpath('/erp5/object')[0]
   # Make sure the list of sub objects is ordered
-  object_value_list = list(self.objectValues())
-  object_value_list.sort(key=lambda x: x.getId())
+  id_list = sorted(self.objectIds())
   # Append to the xml the xml of subobjects
-  for o in object_value_list:
-    aq_ob = aq_base(o)
-    if hasattr(aq_ob, 'asXML'):
-      o_xml = o.asXML(ident=ident+2)
-      if type(o_xml) is type('a'):
-        xml += o_xml
-  xml += '</object>\n'
-  if ident==0:
-    xml += '</erp5>'
-  return xml
+  for id in id_list:
+    o = self._getOb(id)
+    if issubclass(o.__class__, Base):
+      Base_asXML(o, root=root_node)
+
+  return etree.tostring(root, encoding='utf-8',
+                        xml_declaration=True, pretty_print=True)
-- 
2.30.9