tengine health check
This project is maintained by wangfakang
tengine的健康检查模块的解析:
其实基本思想都是一样定期的和后端机器进行测试连接然后进行更新相应的状态,然后都有机器选择后端的时候然后会
判断其状态进行选取评估.
特点是:该模块使得每一个后端peer都携带一个自检测事件,按照其配置的interval进行测试后端然后进行修改相应的状态.
而且同一时刻只让一个进程进行检测.其锁的粒度很小,控制在每一个后端peer的数据单元上.
http {
upstream bar {
server 127.0.0.1:81;
server 127.0.0.1:82;
check interval=3000 rise=2 fall=5 timeout=1000 type=http;
check_keepalive_requests 100;
check_http_send "HEAD / HTTP/1.1\r\nConnection: keep-alive\r\n\r\n";
check_http_expect_alive http_2xx http_3xx;
}
server {
listen 80;
location /t{
proxy_pass http://bar;
}
location /status {
check_status;
access_log off;
allow 127.0.0.1;
deny all;
}
}
}
注意单位是毫秒.
在init_perocess阶段为每一个worker进程进行设置相应的事件定时器.其在每一个worker进程都会生效
static ngx_int_t
ngx_http_upstream_check_add_timers(ngx_cycle_t *cycle)
{
ngx_uint_t i;
ngx_msec_t t, delay;
ngx_check_conf_t *cf;
ngx_http_upstream_check_peer_t *peer;
ngx_http_upstream_check_peers_t *peers;
ngx_http_upstream_check_srv_conf_t *ucscf;
ngx_http_upstream_check_peer_shm_t *peer_shm;
ngx_http_upstream_check_peers_shm_t *peers_shm;
peers = check_peers_ctx;
if (peers == NULL) {
return NGX_OK;
}
peers_shm = peers->peers_shm;
if (peers_shm == NULL) {
return NGX_OK;
}
ngx_log_debug2(NGX_LOG_DEBUG_HTTP, cycle->log, 0,
"http check upstream init_process, shm_name: %V, "
"peer number: %ud",
&peers->check_shm_name,
peers->peers.nelts);
srandom(ngx_pid);
peer = peers->peers.elts;
peer_shm = peers_shm->peers;
//为每一个后端peer都进行设置相应的检测时间
for (i = 0; i < peers->peers.nelts; i++) {
peer[i].shm = &peer_shm[i];
peer[i].check_ev.handler = ngx_http_upstream_check_begin_handler;
peer[i].check_ev.log = cycle->log;
peer[i].check_ev.data = &peer[i];
peer[i].check_ev.timer_set = 0;
peer[i].check_timeout_ev.handler =
ngx_http_upstream_check_timeout_handler;
peer[i].check_timeout_ev.log = cycle->log;
peer[i].check_timeout_ev.data = &peer[i];
peer[i].check_timeout_ev.timer_set = 0;
ucscf = peer[i].conf;
cf = ucscf->check_type_conf;
if (cf->need_pool) {
peer[i].pool = ngx_create_pool(ngx_pagesize, cycle->log);
if (peer[i].pool == NULL) {
return NGX_ERROR;
}
}
peer[i].send_handler = cf->send_handler;
peer[i].recv_handler = cf->recv_handler;
peer[i].init = cf->init;
peer[i].parse = cf->parse;
peer[i].reinit = cf->reinit;
/*
* We add a random start time here, since we don't want to trigger
* the check events too close to each other at the beginning.
*/
delay = ucscf->check_interval > 1000 ? ucscf->check_interval : 1000;
t = ngx_random() % delay;
ngx_add_timer(&peer[i].check_ev, t);
}
return NGX_OK;
}
每个peer进行设置的自测连接事件
static void
ngx_http_upstream_check_begin_handler(ngx_event_t *event)
{
ngx_msec_t interval;
ngx_http_upstream_check_peer_t *peer;
ngx_http_upstream_check_peers_t *peers;
ngx_http_upstream_check_srv_conf_t *ucscf;
ngx_http_upstream_check_peers_shm_t *peers_shm;
if (ngx_http_upstream_check_need_exit()) {
return;
}
peers = check_peers_ctx;
if (peers == NULL) {
return;
}
peers_shm = peers->peers_shm;
if (peers_shm == NULL) {
return;
}
peer = event->data;
ucscf = peer->conf;
//由于定时时间一旦发生就会从注册的地方进行删除,为了下次继续定时 所以递归进行定时
ngx_add_timer(event, ucscf->check_interval / 2);
/* This process is processing this peer now. */
if (peer->shm->owner == ngx_pid ||
peer->check_timeout_ev.timer_set) {
return;
}
interval = ngx_current_msec - peer->shm->access_time;
ngx_log_debug5(NGX_LOG_DEBUG_HTTP, event->log, 0,
"http check begin handler index: %ui, owner: %P, "
"ngx_pid: %P, interval: %M, check_interval: %M",
peer->index, peer->shm->owner,
ngx_pid, interval,
ucscf->check_interval);
//进行抢锁,此锁是一个自旋锁
ngx_shmtx_lock(&peer->shm->mutex);
if (peers_shm->generation != ngx_http_upstream_check_shm_generation) {
ngx_shmtx_unlock(&peer->shm->mutex);
return;
}
//判断时间间隔
if ((interval >= ucscf->check_interval)
&& (peer->shm->owner == NGX_INVALID_PID))
{
peer->shm->owner = ngx_pid;
} else if (interval >= (ucscf->check_interval << 4)) {
/*
* If the check peer has been untouched for 2^4 times of
* the check interval, activate the current timer.
* Sometimes, the checking process may disappear
* in some circumstances, and the clean event will never
* be triggered.
*/
peer->shm->owner = ngx_pid;
peer->shm->access_time = ngx_current_msec;
}
ngx_shmtx_unlock(&peer->shm->mutex);
//只让当前抢到锁的进程进行连接处理后端
if (peer->shm->owner == ngx_pid) {
ngx_http_upstream_check_connect_handler(event);
}
}
进行测试连接后端
static void
ngx_http_upstream_check_connect_handler(ngx_event_t *event)
{
ngx_int_t rc;
ngx_connection_t *c;
ngx_http_upstream_check_peer_t *peer;
ngx_http_upstream_check_srv_conf_t *ucscf;
//判断当是不是要结束进程
if (ngx_http_upstream_check_need_exit()) {
return;
}
peer = event->data;
ucscf = peer->conf;
//判断其连接是不是空空
if (peer->pc.connection != NULL) {
c = peer->pc.connection;
//测试该连接是否还可以复用 可以复用
if ((rc = ngx_http_upstream_check_peek_one_byte(c)) == NGX_OK) {
goto upstream_check_connect_done;
} else {
ngx_close_connection(c);
peer->pc.connection = NULL;
}
}
ngx_memzero(&peer->pc, sizeof(ngx_peer_connection_t));
peer->pc.sockaddr = peer->check_peer_addr->sockaddr;
peer->pc.socklen = peer->check_peer_addr->socklen;
peer->pc.name = &peer->check_peer_addr->name;
peer->pc.get = ngx_event_get_peer;
peer->pc.log = event->log;
peer->pc.log_error = NGX_ERROR_ERR;
peer->pc.cached = 0;
peer->pc.connection = NULL;
//若该连接已经断开了 则进行重新进行连接
rc = ngx_event_connect_peer(&peer->pc);
if (rc == NGX_ERROR || rc == NGX_DECLINED) {
ngx_http_upstream_check_status_update(peer, 0);
return;
}
/* NGX_OK or NGX_AGAIN */
c = peer->pc.connection;
c->data = peer;
c->log = peer->pc.log;
c->sendfile = 0;
c->read->log = c->log;
c->write->log = c->log;
c->pool = peer->pool;
upstream_check_connect_done:
peer->state = NGX_HTTP_CHECK_CONNECT_DONE;
c->write->handler = peer->send_handler;
c->read->handler = peer->recv_handler;
ngx_add_timer(&peer->check_timeout_ev, ucscf->check_timeout);
/* The kqueue's loop interface needs it. */
if (rc == NGX_OK) {
c->write->handler(c->write);
}
} peer->pc.log_error = NGX_ERROR_ERR;
peer->pc.cached = 0;
peer->pc.connection = NULL;
rc = ngx_event_connect_peer(&peer->pc);
//进行相应状态的更新
if (rc == NGX_ERROR || rc == NGX_DECLINED) {
ngx_http_upstream_check_status_update(peer, 0);
return;
}
/* NGX_OK or NGX_AGAIN */
c = peer->pc.connection;
c->data = peer;
c->log = peer->pc.log;
c->sendfile = 0;
c->read->log = c->log;
c->write->log = c->log;
c->pool = peer->pool;
upstream_check_connect_done:
peer->state = NGX_HTTP_CHECK_CONNECT_DONE;
c->write->handler = peer->send_handler;
c->read->handler = peer->recv_handler;
ngx_add_timer(&peer->check_timeout_ev, ucscf->check_timeout);
/* The kqueue's loop interface needs it. */
if (rc == NGX_OK) {
c->write->handler(c->write);
}
}
相应后端peer的状态的更新
static void
ngx_http_upstream_check_status_update(ngx_http_upstream_check_peer_t *peer,
ngx_int_t result)
{
ngx_http_upstream_check_srv_conf_t *ucscf;
ucscf = peer->conf;
//后端peer连接成功
if (result) {
peer->shm->rise_count++;
peer->shm->fall_count = 0;
//统计累计连续成功的次数 根据配置的rise进行peer的更新
if (peer->shm->down && peer->shm->rise_count >= ucscf->rise_count) {
peer->shm->down = 0;
ngx_log_error(NGX_LOG_ERR, ngx_cycle->log, 0,
"enable check peer: %V ",
&peer->check_peer_addr->name);
}
} else {
//和上面相反操作
peer->shm->rise_count = 0;
peer->shm->fall_count++;
if (!peer->shm->down && peer->shm->fall_count >= ucscf->fall_count) {
peer->shm->down = 1;
ngx_log_error(NGX_LOG_ERR, ngx_cycle->log, 0,
"disable check peer: %V ",
&peer->check_peer_addr->name);
}
}
peer->shm->access_time = ngx_current_msec;
}
在使用中有任何问题,欢迎反馈给我,可以用以下联系方式跟我交流