This commit is contained in:
cn 2016-11-23 19:40:22 +01:00
commit d3905a0f62
2 changed files with 193 additions and 0 deletions

41
README.md Normal file
View File

@ -0,0 +1,41 @@
# nginx-metrics-graphite
This is a Lua plugin for the Nginx web server that automatically collects and submits several important Nginx metrics to [Graphite](https://graphiteapp.org/) suitable for visualisation with e.g. [Grafana](http://grafana.org/).
In constrast to commercial and proprietary solutions such as [Luameter](https://luameter.com/) or [NGINX Plus](https://www.nginx.com/products/) with it's [ngx_http_status_module](http://nginx.org/en/docs/http/ngx_http_status_module.html), this plugin is open source software while featuring more and additional metrics compared to those available via the open source [ngx_http_stub_status_module](http://nginx.org/en/docs/http/ngx_http_stub_status_module.html).
This plugin takes inspiration from other Nginx metric libraries like [nginx-lua-prometheus](https://github.com/knyar/nginx-lua-prometheus) but differs fundamentally in the metrics submission handling by. In certain intervals it automatically pushes metrics to the configured Graphite (i.e. Carbon) host using pure Lua code instead of exposing them via a separate web page for polling.
The metrics collection happens on every request for which the user configures a suitable `log_by_lua` direcitve and towards server-wide global counters (finer granularity might be added later). The counters are realized using a single shared dictionary across all Nginx worker threads which has constant memory usage (128 KiB currently, may be reduced further).
Collected metrics in this prototype implementation:
* number of requests
* average request duration
* accumulated request sizes over all requests
* accumulated response sizes over all requests
* HTTP status code classes (1xx, 2xx, 3xx, 4xx, 5xx)
* HTTP methods (GET, HEAD, PUT, POST, DELETE, OPTIONS, others)
## Caveats
A short metric submission interval might cause blocking on the Nginx worker threads since the shared dictionary storing all counters has to be locked.
Intermittent network errors while communicating with Graphite might leed to permanent loss of metric information.
## Install
* Install `nginx-extra` (includes Lua support) on Debian Jessie
* Clone the nginx-metrics-graphite repository to */opt/nginx-metrics-graphite*
* Add the following config to top-level `http` block:
```nginx
resolver x.y.z.w; # DNS resolver IP address needed
lua_shared_dict metrics_graphite 128k;
lua_package_path ";;/opt/nginx-metrics-graphite/?.lua";
init_by_lua 'metrics_graphite = require("metrics_graphite").init("graphite.example.net", 300, "my.node.nginx_metrics.prefix")';
init_worker_by_lua 'metrics_graphite:worker()';
```
* Instrument the `http` block or any server or location beneath it using `log_by_lua 'metrics_graphite:log()';`

152
metrics_graphite.lua Normal file
View File

@ -0,0 +1,152 @@
local MetricsGraphite = {}
MetricsGraphite.__index = MetricsGraphite
function MetricsGraphite.init(carbon_host, interval, mbase)
local self = setmetatable({}, MetricsGraphite)
ngx.log(ngx.INFO, "nginx-metrics-graphite initializing on nginx version " .. ngx.config.nginx_version .. " with ngx_lua version " .. ngx.config.ngx_lua_version)
self.carbon_host = carbon_host
self.interval = interval
self.mbase = mbase
-- metadata tables for more flexible metric creation
self.query_status = {
status_5xx = 500,
status_4xx = 400,
status_3xx = 300,
status_2xx = 200,
status_1xx = 100
}
self.query_method = {
method_get = "GET",
method_head = "HEAD",
method_put = "PUT",
method_post = "POST",
method_delete = "DELETE",
method_options = "OPTIONS",
method_other = ""
}
-- initialize/reset counters
self.stats = ngx.shared.metrics_graphite -- TODO: unclear whether ngx.shared.DICT is thread-safe?
self.stats:set("main_loop_worker", 0)
self.stats:set("requests", 0)
self.stats:set("request_length", 0)
self.stats:set("bytes_sent", 0)
self.stats:set("request_time_sum", 0)
self.stats:set("request_time_num", 0)
for k,v in pairs(self.query_status) do
self.stats:set(k, 0)
end
for k,v in pairs(self.query_method) do
self.stats:set(k, 0)
end
return self
end
function MetricsGraphite:worker()
-- determine which worker should handle the main loop, relies on thread-safety of ngx.shared.DICT:incr
if self.stats:incr("main_loop_worker", 1) ~= 1 then
return
end
ngx.log(ngx.INFO, "nginx-metrics-graphite main loop worker PID is " .. ngx.worker.pid())
local this = self
local callback
callback = function (premature)
-- first create the new timer to keep our intervals as good as possible
-- (not when called premature since nginx is going to shut down soon)
if not premature then
local ok, err = ngx.timer.at(this.interval, callback)
if not ok then
ngx.log(ngx.ERR, "nginx-metrics-graphite callback failed to create interval timer: ", err)
return
end
end
-- then do the work which might incur delays
local sock, err = ngx.socket.tcp()
if err then
ngx.log(ngx.ERR, "nginx-metrics-graphite callback failed to create carbon socket: ", err)
return
end
-- connect to carbon host with submission port via TCP
local ok, err = sock:connect(this.carbon_host, 2003)
if not ok then
ngx.log(ngx.ERR, "nginx-metrics-graphite callback failed to connect carbon socket: ", err)
return
end
local avg_request_time = this.stats:get("request_time_sum") / this.stats:get("request_time_num")
self.stats:set("request_time_sum", 0)
self.stats:set("request_time_num", 0)
-- submit metrics
sock:send(this.mbase .. ".nginx_test.test" .. ngx.worker.pid() .. " 1 " .. ngx.time() .. "\n")
sock:send(this.mbase .. ".nginx_test.num_requests " .. this.stats:get("requests") .. " " .. ngx.time() .. "\n")
sock:send(this.mbase .. ".nginx_test.acc_request_length " .. this.stats:get("request_length") .. " " .. ngx.time() .. "\n")
sock:send(this.mbase .. ".nginx_test.acc_bytes_sent " .. this.stats:get("bytes_sent") .. " " .. ngx.time() .. "\n")
sock:send(this.mbase .. ".nginx_test.avg_request_time " .. avg_request_time .. " " .. ngx.time() .. "\n")
for k,v in pairs(self.query_status) do
sock:send(this.mbase .. ".nginx_test.num_" .. k .. " " .. this.stats:get(k) .. " " .. ngx.time() .. "\n")
end
for k,v in pairs(self.query_method) do
sock:send(this.mbase .. ".nginx_test.num_" .. k .. " " .. this.stats:get(k) .. " " .. ngx.time() .. "\n")
end
sock:close()
end
-- start first timer
local ok, err = ngx.timer.at(this.interval, callback)
if not ok then
ngx.log(ngx.ERR, "nginx-metrics-graphite callback failed to create interval timer: ", err)
return
end
end
function MetricsGraphite:log()
-- function by default called on every request,
-- should be fast and only do important calculations here
self.stats:incr("requests", 1)
for k,v in pairs(self.query_status) do
if ngx.status >= v and ngx.status < v+100 then
self.stats:incr(k, 1)
break
end
end
local is_method_other = true
for k,v in pairs(self.query_method) do
if ngx.req.get_method() == v then
self.stats:incr(k, 1)
is_method_other = false
break
end
end
if is_method_other then
self.stats:incr("method_other", 1)
end
local request_length = ngx.var.request_length -- in bytes
self.stats:incr("request_length", request_length)
local bytes_sent = ngx.var.bytes_sent -- in bytes
self.stats:incr("bytes_sent", bytes_sent)
local request_time = ngx.now() - ngx.req.start_time() -- in seconds
self.stats:incr("request_time_sum", request_time)
self.stats:incr("request_time_num", 1)
end
return MetricsGraphite