GetRssDataAsDict.py 1.97 KB
Newer Older
1
import feedparser, md5, urllib2, socket
2
 
3 4 5 6 7
def getRssDataAsDict(self, url, username=None, password=None):
  result = {}
  translate = self.Base_translateString
  # no url, no feed to read
  if url in ('', None, 'None',):
8 9
    # no URL
    return {'status':-1}
10 11 12 13 14 15 16 17 18 19 20 21 22 23
    
  # use authentication or not?
  handlers = []
  if username is not None and password is not None:
    passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
    passman.add_password(None, url, username, password)
    auth_handler = urllib2.HTTPBasicAuthHandler(passman)
    handlers.append(auth_handler)
  
  # set shorter timeouts and revert default at enf of read  
  default_timeout = socket.getdefaulttimeout()
  socket.setdefaulttimeout(10.0)
  d = feedparser.parse(url, handlers=handlers)  
  socket.setdefaulttimeout(default_timeout)    
24
  
25 26
  if d.bozo and isinstance(d.bozo_exception, urllib2.URLError):
    # we have an URL error
27 28 29 30
    return {'status':-2}
  elif d.bozo:
    print d.bozo, d.bozo_exception
    return {'status': -5}
31
  if d.status == 401:
32
    return {'status':-3}
33
  elif d.status == 404:
34 35 36
    return {'status':-4}
  

37 38 39 40 41 42 43 44 45 46 47 48 49
  
  result['items'] = []
  # some feeds may not provide logo
  if d.feed.get('image', None) is not None:
    result['logo'] = d.feed.image['href']
  result['title'] = d.feed.title
  result['link'] = d.feed.link
  for entry in d.entries:
    entry_dict = {}
    entry_dict['title'] = entry['title']
    entry_dict['link'] = entry['link']
    entry_dict['other_links'] = [x['href'] for x in entry['links']]
    entry_dict['md5'] = md5.new(entry['link']).hexdigest() 
50 51
    entry_dict['content'] = entry.get('summary', '')
    entry_dict['date'] = entry.get('updated', None)
52
    entry_dict['img'] = [x['href'] for x in entry.get('enclosures', [])]
53
    entry_dict['updated_parsed'] = entry.get('updated_parsed', None)
54 55 56 57
    result['items'].append(entry_dict)
  # sort by date
  result['items'] = sorted(result['items'], key=lambda k: k['updated_parsed'])
  result['items'].reverse()
58
  result['status'] = 0
59
  return result