Commit f8287237 authored by Romain Courteaud's avatar Romain Courteaud

Check http status

parent 8c8fef11
...@@ -4,7 +4,7 @@ from urlchecker_configuration import createConfiguration, logConfiguration ...@@ -4,7 +4,7 @@ from urlchecker_configuration import createConfiguration, logConfiguration
from urlchecker_platform import logPlatform from urlchecker_platform import logPlatform
from urlchecker_status import logStatus from urlchecker_status import logStatus
from urlchecker_dns import getResolverDict, expandDomainList, getServerIpDict from urlchecker_dns import getResolverDict, expandDomainList, getServerIpDict
from urlchecker_http import getUrlHostname from urlchecker_http import getUrlHostname, checkHttpStatus
from urlchecker_network import isTcpPortOpen from urlchecker_network import isTcpPortOpen
...@@ -48,6 +48,7 @@ class WebBot: ...@@ -48,6 +48,7 @@ class WebBot:
print(domain_list) print(domain_list)
# Get the list of server to check # Get the list of server to check
# XXX Check DNS expiration
server_ip_dict = getServerIpDict( server_ip_dict = getServerIpDict(
self._db, status_id, resolver_dict, domain_list, "A" self._db, status_id, resolver_dict, domain_list, "A"
) )
...@@ -55,14 +56,26 @@ class WebBot: ...@@ -55,14 +56,26 @@ class WebBot:
# Check TCP port for the list of IP found # Check TCP port for the list of IP found
# XXX For now, check http/https only # XXX For now, check http/https only
server_ip_list = [x for x in server_ip_dict.keys()] server_ip_list = [x for x in server_ip_dict.keys()]
url_dict = {}
for server_ip in server_ip_list: for server_ip in server_ip_list:
isTcpPortOpen(self._db, server_ip, 80, status_id) # XXX Check SSL certificate expiration
isTcpPortOpen(self._db, server_ip, 443, status_id) for port, protocol in [(80, "http"), (443, "https")]:
print(server_ip_dict) if isTcpPortOpen(self._db, server_ip, port, status_id):
for hostname in server_ip_dict[server_ip]:
url = "%s://%s" % (protocol, hostname)
if url not in url_dict:
url_dict[url] = []
url_dict[url].append(server_ip)
# XXX If https ok, check SSL certificate # XXX put back orignal url list
# Check HTTP Status # Check HTTP Status
for url in url_dict:
for ip in url_dict[url]:
checkHttpStatus(self._db, status_id, url, ip, __version__)
# XXX Check location header and check new url recursively
# XXX Parse HTML, fetch found link, css, js, image
# XXX Check HTTP Cache
def stop(self): def stop(self):
self._running = False self._running = False
......
...@@ -57,11 +57,18 @@ class LogDB: ...@@ -57,11 +57,18 @@ class LogDB:
rdtype = peewee.TextField() rdtype = peewee.TextField()
response = peewee.TextField() response = peewee.TextField()
class HttpCodeChange(BaseModel):
status = peewee.ForeignKeyField(Status)
ip = peewee.TextField(index=True)
url = peewee.TextField(index=True)
status_code = peewee.IntegerField()
self.Status = Status self.Status = Status
self.ConfigurationChange = ConfigurationChange self.ConfigurationChange = ConfigurationChange
self.PlatformChange = PlatformChange self.PlatformChange = PlatformChange
self.NetworkChange = NetworkChange self.NetworkChange = NetworkChange
self.DnsChange = DnsChange self.DnsChange = DnsChange
self.HttpCodeChange = HttpCodeChange
def createTables(self): def createTables(self):
# http://www.sqlite.org/pragma.html#pragma_user_version # http://www.sqlite.org/pragma.html#pragma_user_version
...@@ -73,6 +80,7 @@ class LogDB: ...@@ -73,6 +80,7 @@ class LogDB:
[ [
self.Status, self.Status,
self.ConfigurationChange, self.ConfigurationChange,
self.HttpCodeChange,
self.NetworkChange, self.NetworkChange,
self.PlatformChange, self.PlatformChange,
self.DnsChange, self.DnsChange,
......
from urllib.parse import urlparse, urlunsplit
import requests import requests
import socket from urllib.parse import urlparse, urlunsplit
import sys import sys
import traceback import traceback
import time
from forcediphttpsadapter.adapters import ForcedIPHTTPSAdapter from forcediphttpsadapter.adapters import ForcedIPHTTPSAdapter
import dns.resolver
import miniupnpc
import platform
from urlchecker_db import LogDB
import configparser
import os
__version__ = "0.0.3"
PREFERRED_TYPE = "text/html" PREFERRED_TYPE = "text/html"
TIMEOUT = 2 TIMEOUT = 2
CONFIG_SECTION = "URLCHECKER"
class BotError(Exception):
pass
class WebBot: def getUrlHostname(url):
def __init__(self): return urlparse(url).hostname
self.config = configparser.ConfigParser(empty_lines_in_values=False)
self.config[CONFIG_SECTION] = {"INTERVAL": -1}
def initDB(self, sqlite_path):
self._db = LogDB(sqlite_path)
self._db.createTables()
def getUserAgent(self): def getUserAgent(self, version="0"):
return "%s/%s (+%s)" % ( return "%s/%s (+%s)" % (
"URLCHECKER", "URLCHECKER",
__version__, version,
"https://lab.nexedi.com/romain/url-checker", "https://lab.nexedi.com/romain/url-checker",
) )
def request(
self, def request(
method, method,
url, url,
headers=None, headers=None,
...@@ -49,8 +30,9 @@ class WebBot: ...@@ -49,8 +30,9 @@ class WebBot:
allow_redirects=False, allow_redirects=False,
verify=True, verify=True,
session=requests, session=requests,
version=None,
**kwargs, **kwargs,
): ):
if headers is None: if headers is None:
headers = {} headers = {}
...@@ -58,7 +40,7 @@ class WebBot: ...@@ -58,7 +40,7 @@ class WebBot:
headers["Accept"] = "%s;q=0.9,*/*;q=0.8" % PREFERRED_TYPE headers["Accept"] = "%s;q=0.9,*/*;q=0.8" % PREFERRED_TYPE
if "User-Agent" not in headers: if "User-Agent" not in headers:
# XXX user agent # XXX user agent
headers["User-Agent"] = self.getUserAgent() headers["User-Agent"] = getUserAgent(version)
kwargs["stream"] = stream kwargs["stream"] = stream
kwargs["timeout"] = timeout kwargs["timeout"] = timeout
...@@ -91,22 +73,34 @@ class WebBot: ...@@ -91,22 +73,34 @@ class WebBot:
return response return response
def check(self, url):
parsed_url = urlparse(url)
# response = self.request("GET", url)
# print(url, response.status_code)
# Get the list of available IPv4 frontend CDN def logHttpStatus(db, ip, url, code, status_id):
hostname = parsed_url.hostname
with db._db.atomic():
try: try:
dns_info_list = socket.getaddrinfo( # Check previous parameter value
hostname, "http", socket.AF_INET previous_entry = (
db.HttpCodeChange.select()
.where(
db.HttpCodeChange.ip == ip, db.HttpCodeChange.url == url
)
.order_by(db.HttpCodeChange.status.desc())
.get()
)
except db.HttpCodeChange.DoesNotExist:
previous_entry = None
if (previous_entry is None) or (previous_entry.status_code != code):
previous_entry = db.HttpCodeChange.create(
status=status_id, ip=ip, url=url, status_code=code
) )
except socket.gaierror: return previous_entry.id
dns_info_list = []
ip_list = [x[4][0] for x in dns_info_list]
def checkHttpStatus(db, status_id, url, ip, bot_version):
parsed_url = urlparse(url)
hostname = parsed_url.hostname
for ip in ip_list:
session = requests.Session() session = requests.Session()
# SNI Support # SNI Support
if parsed_url.scheme == "https": if parsed_url.scheme == "https":
...@@ -116,80 +110,11 @@ class WebBot: ...@@ -116,80 +110,11 @@ class WebBot:
) )
session.mount(base_url, ForcedIPHTTPSAdapter(dest_ip=ip)) session.mount(base_url, ForcedIPHTTPSAdapter(dest_ip=ip))
response = self.request( response = request(
"GET", url, headers={"Host": hostname}, session=session "GET",
) url,
self._db.storeQuery(ip, url, response.status_code) headers={"Host": hostname},
session=session,
def iterateLoop(self): version=bot_version,
for url in self.config[CONFIG_SECTION]["URL"].split():
self.check(url)
def stop(self):
print("Bye bye")
print(time.strftime("%Y-%m-%d %H:%M:%S"))
self._running = False
if hasattr(self, "_db"):
self._db.close()
def run(self):
print(time.strftime("%Y-%m-%d %H:%M:%S"))
self.initDB(self.config[CONFIG_SECTION]["SQLITE"])
self._db.storeEntry(platform=platform.platform())
print("Platform", platform.platform())
print(
"Python",
platform.python_build(),
platform.python_compiler(),
platform.python_branch(),
platform.python_implementation(),
platform.python_revision(),
platform.python_version(),
) )
print("Hostname", socket.gethostname()) logHttpStatus(db, ip, url, response.status_code, status_id)
myresolver = dns.resolver.Resolver()
print("Resolvers", myresolver.nameservers)
u = miniupnpc.UPnP()
u.discoverdelay = 200
u.discover()
try:
u.selectigd()
print("external ip: {}".format(u.externalipaddress()))
except Exception:
pass
self._running = True
try:
while self._running:
self.iterateLoop()
interval = self.config.getint(CONFIG_SECTION, "INTERVAL")
if interval < 0:
self.stop()
else:
time.sleep(interval)
except KeyboardInterrupt:
self.stop()
except:
self.stop()
print("Oups, error")
raise
def create_bot(envvar="URLCHECKER_SETTINGS", cfgfile=None, mapping=None):
bot = WebBot()
if (envvar is not None) and (envvar in os.environ):
bot.config.read([os.environ.get(envvar)])
if cfgfile is not None:
print(cfgfile)
bot.config.read([cfgfile])
if mapping is not None:
bot.config.read_dict({CONFIG_SECTION: mapping})
for parameter in ["URL", "SQLITE"]:
if parameter not in bot.config[CONFIG_SECTION]:
raise AttributeError("Config %s not defined" % parameter)
return bot
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment