Commit f8287237 authored by Romain Courteaud's avatar Romain Courteaud

Check http status

parent 8c8fef11
......@@ -4,7 +4,7 @@ from urlchecker_configuration import createConfiguration, logConfiguration
from urlchecker_platform import logPlatform
from urlchecker_status import logStatus
from urlchecker_dns import getResolverDict, expandDomainList, getServerIpDict
from urlchecker_http import getUrlHostname
from urlchecker_http import getUrlHostname, checkHttpStatus
from urlchecker_network import isTcpPortOpen
......@@ -48,6 +48,7 @@ class WebBot:
print(domain_list)
# Get the list of server to check
# XXX Check DNS expiration
server_ip_dict = getServerIpDict(
self._db, status_id, resolver_dict, domain_list, "A"
)
......@@ -55,14 +56,26 @@ class WebBot:
# Check TCP port for the list of IP found
# XXX For now, check http/https only
server_ip_list = [x for x in server_ip_dict.keys()]
url_dict = {}
for server_ip in server_ip_list:
isTcpPortOpen(self._db, server_ip, 80, status_id)
isTcpPortOpen(self._db, server_ip, 443, status_id)
print(server_ip_dict)
# XXX Check SSL certificate expiration
for port, protocol in [(80, "http"), (443, "https")]:
if isTcpPortOpen(self._db, server_ip, port, status_id):
for hostname in server_ip_dict[server_ip]:
url = "%s://%s" % (protocol, hostname)
if url not in url_dict:
url_dict[url] = []
url_dict[url].append(server_ip)
# XXX If https ok, check SSL certificate
# XXX put back orignal url list
# Check HTTP Status
for url in url_dict:
for ip in url_dict[url]:
checkHttpStatus(self._db, status_id, url, ip, __version__)
# XXX Check location header and check new url recursively
# XXX Parse HTML, fetch found link, css, js, image
# XXX Check HTTP Cache
def stop(self):
self._running = False
......
......@@ -57,11 +57,18 @@ class LogDB:
rdtype = peewee.TextField()
response = peewee.TextField()
class HttpCodeChange(BaseModel):
status = peewee.ForeignKeyField(Status)
ip = peewee.TextField(index=True)
url = peewee.TextField(index=True)
status_code = peewee.IntegerField()
self.Status = Status
self.ConfigurationChange = ConfigurationChange
self.PlatformChange = PlatformChange
self.NetworkChange = NetworkChange
self.DnsChange = DnsChange
self.HttpCodeChange = HttpCodeChange
def createTables(self):
# http://www.sqlite.org/pragma.html#pragma_user_version
......@@ -73,6 +80,7 @@ class LogDB:
[
self.Status,
self.ConfigurationChange,
self.HttpCodeChange,
self.NetworkChange,
self.PlatformChange,
self.DnsChange,
......
from urllib.parse import urlparse, urlunsplit
import requests
import socket
from urllib.parse import urlparse, urlunsplit
import sys
import traceback
import time
from forcediphttpsadapter.adapters import ForcedIPHTTPSAdapter
import dns.resolver
import miniupnpc
import platform
from urlchecker_db import LogDB
import configparser
import os
__version__ = "0.0.3"
PREFERRED_TYPE = "text/html"
TIMEOUT = 2
CONFIG_SECTION = "URLCHECKER"
class BotError(Exception):
pass
class WebBot:
def __init__(self):
self.config = configparser.ConfigParser(empty_lines_in_values=False)
self.config[CONFIG_SECTION] = {"INTERVAL": -1}
def getUrlHostname(url):
return urlparse(url).hostname
def initDB(self, sqlite_path):
self._db = LogDB(sqlite_path)
self._db.createTables()
def getUserAgent(self):
def getUserAgent(self, version="0"):
return "%s/%s (+%s)" % (
"URLCHECKER",
__version__,
version,
"https://lab.nexedi.com/romain/url-checker",
)
def request(
self,
def request(
method,
url,
headers=None,
......@@ -49,8 +30,9 @@ class WebBot:
allow_redirects=False,
verify=True,
session=requests,
version=None,
**kwargs,
):
):
if headers is None:
headers = {}
......@@ -58,7 +40,7 @@ class WebBot:
headers["Accept"] = "%s;q=0.9,*/*;q=0.8" % PREFERRED_TYPE
if "User-Agent" not in headers:
# XXX user agent
headers["User-Agent"] = self.getUserAgent()
headers["User-Agent"] = getUserAgent(version)
kwargs["stream"] = stream
kwargs["timeout"] = timeout
......@@ -91,22 +73,34 @@ class WebBot:
return response
def check(self, url):
parsed_url = urlparse(url)
# response = self.request("GET", url)
# print(url, response.status_code)
# Get the list of available IPv4 frontend CDN
hostname = parsed_url.hostname
def logHttpStatus(db, ip, url, code, status_id):
with db._db.atomic():
try:
dns_info_list = socket.getaddrinfo(
hostname, "http", socket.AF_INET
# Check previous parameter value
previous_entry = (
db.HttpCodeChange.select()
.where(
db.HttpCodeChange.ip == ip, db.HttpCodeChange.url == url
)
.order_by(db.HttpCodeChange.status.desc())
.get()
)
except db.HttpCodeChange.DoesNotExist:
previous_entry = None
if (previous_entry is None) or (previous_entry.status_code != code):
previous_entry = db.HttpCodeChange.create(
status=status_id, ip=ip, url=url, status_code=code
)
except socket.gaierror:
dns_info_list = []
ip_list = [x[4][0] for x in dns_info_list]
return previous_entry.id
def checkHttpStatus(db, status_id, url, ip, bot_version):
parsed_url = urlparse(url)
hostname = parsed_url.hostname
for ip in ip_list:
session = requests.Session()
# SNI Support
if parsed_url.scheme == "https":
......@@ -116,80 +110,11 @@ class WebBot:
)
session.mount(base_url, ForcedIPHTTPSAdapter(dest_ip=ip))
response = self.request(
"GET", url, headers={"Host": hostname}, session=session
)
self._db.storeQuery(ip, url, response.status_code)
def iterateLoop(self):
for url in self.config[CONFIG_SECTION]["URL"].split():
self.check(url)
def stop(self):
print("Bye bye")
print(time.strftime("%Y-%m-%d %H:%M:%S"))
self._running = False
if hasattr(self, "_db"):
self._db.close()
def run(self):
print(time.strftime("%Y-%m-%d %H:%M:%S"))
self.initDB(self.config[CONFIG_SECTION]["SQLITE"])
self._db.storeEntry(platform=platform.platform())
print("Platform", platform.platform())
print(
"Python",
platform.python_build(),
platform.python_compiler(),
platform.python_branch(),
platform.python_implementation(),
platform.python_revision(),
platform.python_version(),
response = request(
"GET",
url,
headers={"Host": hostname},
session=session,
version=bot_version,
)
print("Hostname", socket.gethostname())
myresolver = dns.resolver.Resolver()
print("Resolvers", myresolver.nameservers)
u = miniupnpc.UPnP()
u.discoverdelay = 200
u.discover()
try:
u.selectigd()
print("external ip: {}".format(u.externalipaddress()))
except Exception:
pass
self._running = True
try:
while self._running:
self.iterateLoop()
interval = self.config.getint(CONFIG_SECTION, "INTERVAL")
if interval < 0:
self.stop()
else:
time.sleep(interval)
except KeyboardInterrupt:
self.stop()
except:
self.stop()
print("Oups, error")
raise
def create_bot(envvar="URLCHECKER_SETTINGS", cfgfile=None, mapping=None):
bot = WebBot()
if (envvar is not None) and (envvar in os.environ):
bot.config.read([os.environ.get(envvar)])
if cfgfile is not None:
print(cfgfile)
bot.config.read([cfgfile])
if mapping is not None:
bot.config.read_dict({CONFIG_SECTION: mapping})
for parameter in ["URL", "SQLITE"]:
if parameter not in bot.config[CONFIG_SECTION]:
raise AttributeError("Config %s not defined" % parameter)
return bot
logHttpStatus(db, ip, url, response.status_code, status_id)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment