DocumentConversion.py 334 Bytes
Newer Older
1 2 3 4 5 6 7 8 9 10 11
import re

def Base_extractImageUrlList(self, text_content=None):
  """
    Extract list of image URLS used in a Text document (i.e. Web Page)
  """
  if text_content is None:
    text_content = self.getTextContent()
  if text_content is not None:
    return re.findall('src=[\"\'](.[^\"\']+)[\"\']', text_content, re.I) 
  return []