diff --git a/stack/monitor/buildout.cfg b/stack/monitor/buildout.cfg index 84c4083f7b9be34bc0abf8cdd216277db81b81d3..35661e3d3e10751e45dd99d2e81e8908b9e05602 100644 --- a/stack/monitor/buildout.cfg +++ b/stack/monitor/buildout.cfg @@ -161,6 +161,38 @@ download-only = true filename = mysql-querydigest.py.in mode = 0644 +[mariadb-performance-cgi] +recipe = hexagonit.recipe.download +url = ${:_profile_base_location_}/webfile-directory/${:filename} +download-only = true +#md5sum = +filename = mariadb-performance.cgi.in +mode = 0644 + +[http-cache-performance-cgi] +recipe = hexagonit.recipe.download +url = ${:_profile_base_location_}/webfile-directory/${:filename} +download-only = true +#md5sum = +filename = http-cache-performance.cgi.in +mode = 0644 + +[zodb-performance-cgi] +recipe = hexagonit.recipe.download +url = ${:_profile_base_location_}/webfile-directory/${:filename} +download-only = true +#md5sum = +filename = zodb-performance.cgi.in +mode = 0644 + +[thread-performance-cgi] +recipe = hexagonit.recipe.download +url = ${:_profile_base_location_}/webfile-directory/${:filename} +download-only = true +#md5sum = +filename = thread-performance.cgi.in +mode = 0644 + [run-apachedex] recipe = hexagonit.recipe.download url = ${:_profile_base_location_}/${:filename} diff --git a/stack/monitor/monitor.cfg.in b/stack/monitor/monitor.cfg.in index bd6018a7159d66c7defa3a107841f9bc76298015..fdc3d072e8f2710b6cf49fbb2e8c32b2aa8a5aca 100644 --- a/stack/monitor/monitor.cfg.in +++ b/stack/monitor/monitor.cfg.in @@ -45,6 +45,7 @@ www = $${:var}/www cgi-bin = $${:var}/cgi-bin monitoring-cgi = $${:cgi-bin}/monitoring +performance-cgi = $${:cgi-bin}/performance knowledge0-cgi = $${:cgi-bin}/zero-knowledge public-cgi = $${:cgi-bin}/monitor-public @@ -208,6 +209,42 @@ context = key root_folder buildout:directory raw config_cfg $${buildout:directory}/$${public:filename} +[deploy-mariadb-performance-cgi] +recipe = slapos.recipe.template:jinja2 +template = ${mariadb-performance-cgi:location}/${mariadb-performance-cgi:filename} +rendered = $${monitor-directory:performance-cgi}/$${:filename} +filename = mariadb-performance.cgi +mode = $${deploy-settings-cgi:mode} +context = + raw python_executable ${buildout:directory}/bin/${extra-eggs:interpreter} + +[deploy-http-cache-performance-cgi] +recipe = slapos.recipe.template:jinja2 +template = ${http-cache-performance-cgi:location}/${http-cache-performance-cgi:filename} +rendered = $${monitor-directory:performance-cgi}/$${:filename} +filename = http-cache-performance.cgi +mode = $${deploy-settings-cgi:mode} +context = + raw python_executable ${buildout:directory}/bin/${extra-eggs:interpreter} + +[deploy-zodb-performance-cgi] +recipe = slapos.recipe.template:jinja2 +template = ${zodb-performance-cgi:location}/${zodb-performance-cgi:filename} +rendered = $${monitor-directory:performance-cgi}/$${:filename} +filename = zodb-performance.cgi +mode = $${deploy-settings-cgi:mode} +context = + raw python_executable ${buildout:directory}/bin/${extra-eggs:interpreter} + +[deploy-thread-performance-cgi] +recipe = slapos.recipe.template:jinja2 +template = ${thread-performance-cgi:location}/${thread-performance-cgi:filename} +rendered = $${monitor-directory:performance-cgi}/$${:filename} +filename = thread-performance.cgi +mode = $${deploy-settings-cgi:mode} +context = + raw python_executable ${buildout:directory}/bin/${extra-eggs:interpreter} + [make-rss] recipe = slapos.recipe.template:jinja2 template = ${make-rss-script:output} diff --git a/stack/monitor/webfile-directory/http-cache-performance.cgi.in b/stack/monitor/webfile-directory/http-cache-performance.cgi.in new file mode 100644 index 0000000000000000000000000000000000000000..4adb2186cd2970749813a7db2c0c9e198a2c61c6 --- /dev/null +++ b/stack/monitor/webfile-directory/http-cache-performance.cgi.in @@ -0,0 +1,27 @@ +#!{{ python_executable }} + +print """ +<h1>HTTP cache performance</h1> +<p> + Web pages rendered by a CDN speed up a web site significantly. It is + required to have a CDN mainly for frequented web sites in order to have a high + availability for internet users. +</p> +<p> + If a web site is not well served, the backend will then take care of the + client request by calculating the response, which takes calculation time + and may slow down all other asynchronous server operations. +</p> +<p> + However, if a web site needs to serve different data to authenticated users, + caching is difficult. As web pages loads components that can be cached, the + ones which are also loaded by anonymous user pages can easily be cached for + one hour for stylesheet, javascript and image files. Even better if they are + static files, so their cache duration can be increased to one year. +</p> +<h2>Wrongly cached web pages list</h2> +<p> + This shows some web pages cache state by checking TrafficServer or Squid HTTP headers. (Last update: XXX.) +</p> +<p>(TODO)</p> +""" diff --git a/stack/monitor/webfile-directory/mariadb-performance.cgi.in b/stack/monitor/webfile-directory/mariadb-performance.cgi.in new file mode 100644 index 0000000000000000000000000000000000000000..287517d1b7694cd7c11ce43a5d6d3810e9b3be01 --- /dev/null +++ b/stack/monitor/webfile-directory/mariadb-performance.cgi.in @@ -0,0 +1,35 @@ +#!{{ python_executable }} + +print """ +<h1>Mariadb performance</h1> +<p> + MariaDB is a single point of congestion in ERP5 architecture. With multiple + catalog or "Spider" sharded mariadb storage, this single point of congestion + can be somehow changed into multiple points of congestion. Yet, each point + congestion can easily collapse under badly formed or badly optimized queries. +</p> +<p> + It is thus essential to keep track of SQL queries that take a long time to + execute. A SAL query that takes 100 ms to execute on an ERP5 site used by 100 + users means for example response times of 10 s for each page. +</p> +<p> + Today slow queries are logged + <a href="/private/log/mariadb_slowquery.log">here</a>. +</p> +<p> + All you need to do is parse the logs and find out which are the 10 slowest + queries by taking into account also their frequency. A slow query that takes + 10 seconds to execute but only once a day is much less harmful than a slow + query that takes 100 ms is executed every minute by 100 users. +</p> +<h2>10 most time consuming SQL queries</h2> +<p> + The following queries are those who which have consumed the highest database + time today. (Last update: XXX.) +</p> +<p>(TODO)</p> +""" + +# ~/srv/runner/instance/slappartX/software_release/parts/site_perl/bin/pt-query-digest mariadb_slowquery.log +# echo 'show full processlist;' | ~/srv/runner/instance/slappartX/bin/mysql -u diff --git a/stack/monitor/webfile-directory/thread-performance.cgi.in b/stack/monitor/webfile-directory/thread-performance.cgi.in new file mode 100644 index 0000000000000000000000000000000000000000..be3ea2ce32b8a91609e645dcd67f9b0396d1c7dc --- /dev/null +++ b/stack/monitor/webfile-directory/thread-performance.cgi.in @@ -0,0 +1,17 @@ +#!{{ python_executable }} + +print """ +<h1>Thread performance</h1> +<p> + All zope instances can handle only one client request per thread to manage + one transaction at a time. Transactions prevent from object modification + conflict made by several users at the same time. If a transaction is in + conflict with another one, the thread will retry it until success. +</p> +<p> + If the processed code during a transaction is too long, then it increases the + risk to get conflict and it increases the time to wait for client response. + One way to check for long code is to collect process stack traces with an + short interval and then analyze manually which function occurs several time. +</p> +""" diff --git a/stack/monitor/webfile-directory/zodb-performance.cgi.in b/stack/monitor/webfile-directory/zodb-performance.cgi.in new file mode 100644 index 0000000000000000000000000000000000000000..a0c25d9cf46080cc8d08f63465759c39b9bd3230 --- /dev/null +++ b/stack/monitor/webfile-directory/zodb-performance.cgi.in @@ -0,0 +1,29 @@ +#!{{ python_executable }} + +print """ +<h1>ZODB performance</h1> +<p> + A healthy zope system based on ZEO server can easily handle up to XXX requests + per second on an SSD. A healthy zope client can easily handle up to XXX + requests per second. +</p> +<p> + If disk performance is too slow, then the performance on ZEO server side will + decrease (XXX/s) and instead of having a curve with high pikes, the IO curve + will show somehow steady shape with constant access at much lower ratio + (ex. XXX/s). In this situation, disk must probably be analysed and eventually + replaced. +</p> +<p> + If network performance between server and client is poor, then ZEO Client + takes much time to retrieve objects from ZEO server either because of high + latency or because of packet loss. A typical ZEO Client can easily retrieve + XXX objets / second. +</p> +<p> + The ZODB cache defaults to 5000 objects, the threshold is easily reached when + someone loads a web page with a lot of information. Increase this amount may + decrease web page loading time, but be careful of available RAM on your + machines. +</p> +"""