erp5_url_checker.py 5.25 KB
Newer Older
Jean-Paul Smets's avatar
Jean-Paul Smets committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#! /usr/bin/env python
#
# this script is looking the file called "path"
# then it try to get every url inside the file
# path.
#
# If you want it to work, you have to create an
# user per each thread, so if you want 3 threads,
# you have to create in zope the following users
# with the following passwords :
# user: user0    password: user0
# user: user1    password: user1
# user: user2    password: user2

from threading import Thread
from time import sleep
from urllib import addinfourl
18 19 20 21
from urllib import splithost
from urllib import splituser
from urllib import unquote
from urllib import splittype
Jean-Paul Smets's avatar
Jean-Paul Smets committed
22 23
import string

24
from urllib import FancyURLopener
Jean-Paul Smets's avatar
Jean-Paul Smets committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
from Cookie import SimpleCookie

def main():
  max_thread = 7  # The number of thread we want by the same time
  file =  open('path','r')
  list_url = []
  while 1:
    line = file.readline()
    if line == '':
      break
    list_url += [line]

  threads = []
  checker = []
  threads = []
  for i in range(0,max_thread):
    checker += [Checker()]
  i = 0
  request_number = 0
  while i < len(list_url):
    sleep(1)
    if len(threads) < max_thread:
      # We must provide an authentication parameter such as __ac_name
      url = '//user%i:user%i@localhost:9673%s?__ac_name=user%s&__ac_password=user%s' % \
                (i,i,list_url[i][:-1],i,i)
      #print "cur thread : %i" % (len(threads))
      threads += [Thread(target=checker[len(threads)].CheckUrl,kwargs={'url':url})]
      #print "cur thread : %i" % (len(threads)-1)
      threads[len(threads)-1].start()
      request_number += 1
      i+=1
      print "thread: %i request: %i url: %s" % (i,request_number,url)
    else:
      for t in range(0,max_thread):
        if threads[t].isAlive() == 0:
          url = '//user%i:user%i@localhost:9673%s?__ac_name=user%s&__ac_password=user%s' % \
               (t,t,list_url[i][:-1],t,t)
          threads[t] = Thread(target=checker[t].CheckUrl,kwargs={'url':url})
          threads[t].start()
          i+=1
          request_number += 1
          print "thread: %i request: %i url: %s" % (i,request_number,url)
          break


class URLOpener(FancyURLopener):

    '''Overrides the http implementation so that it sends and receives
    cookie headers.'''

    cookies = SimpleCookie()

    def open_http(self, url, data=None):
        """Use HTTP protocol."""
        import httplib
        user_passwd = None
        if type(url) is type(""):
            host, selector = splithost(url)
            if host:
                user_passwd, host = splituser(host)
                host = unquote(host)
            realhost = host
        else:
            host, selector = url
            urltype, rest = splittype(selector)
            url = rest
            user_passwd = None
            if string.lower(urltype) != 'http':
                realhost = None
            else:
                realhost, rest = splithost(rest)
                if realhost:
                    user_passwd, realhost = splituser(realhost)
                if user_passwd:
                    selector = "%s://%s%s" % (urltype, realhost, rest)
            #print "proxy via http:", host, selector
101
        if not host: raise IOError('http error', 'no host given')
Jean-Paul Smets's avatar
Jean-Paul Smets committed
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
        if user_passwd:
            import base64
            auth = string.strip(base64.encodestring(user_passwd))
        else:
            auth = None
        h = httplib.HTTP(host)
        if data is not None:
            h.putrequest('POST', selector)
            h.putheader('Content-type', 'application/x-www-form-urlencoded')
            h.putheader('Content-length', '%d' % len(data))
        else:
            h.putrequest('GET', selector)
        for cookie in self.cookies.items():
            h.putheader('Cookie', '%s=%s;' % cookie)

        if auth: h.putheader('Authorization', 'Basic %s' % auth)
        if realhost: h.putheader('Host', realhost)
        for args in self.addheaders: apply(h.putheader, args)
        h.endheaders()
        if data is not None:
            h.send(data + '\r\n')
        errcode, errmsg, headers = h.getreply()
124
        if headers and 'set-cookie' in headers:
Jean-Paul Smets's avatar
Jean-Paul Smets committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
            cookies = headers.getallmatchingheaders('set-cookie')
            for cookie in cookies: self.cookies.load(cookie)

        fp = h.getfile()
        if errcode == 200:
            return addinfourl(fp, headers, "http:" + url)
        else:
            if data is None:
                return self.http_error(url, fp, errcode, errmsg, headers)
            else:
                return self.http_error(url, fp, errcode, errmsg, headers, data)


class Checker(URLOpener):

140
  # This seems necessary for exceptions
Jean-Paul Smets's avatar
Jean-Paul Smets committed
141
  type = 'http'
142

Jean-Paul Smets's avatar
Jean-Paul Smets committed
143 144 145 146 147 148 149
  def CheckUrl(self, url=None):
    try:
      thread = Thread(target=self.SearchUrl,args=(url,))
      thread.start()
      while thread.isAlive():
        sleep(0.5)
      print "Connection to %s went fine" % url
150 151
    except IOError as err:
      (errno, strerror) = err.args
Jean-Paul Smets's avatar
Jean-Paul Smets committed
152 153 154 155 156
      print "Can't connect to %s because of I/O error(%s): %s" % (url, errno, strerror)

  def SearchUrl(self, url=None):
    try:
      conn = self.open_http(url)
157 158
    except IOError as err:
      (errno, strerror) = err.args
Jean-Paul Smets's avatar
Jean-Paul Smets committed
159 160
      print "Can't connect to %s because of I/O error(%s): %s" % (url, errno, strerror)

161

Jean-Paul Smets's avatar
Jean-Paul Smets committed
162 163 164 165 166 167 168 169
  def raise_error(self, error_key, field):
    raise IOError(error_key, field)



if __name__ == '__main__':
    main()