diff -urN heartbeat-2.1.4-1/configure.in heartbeat-2.1.4-1.mod/configure.in --- heartbeat-2.1.4-1/configure.in 2008-08-19 12:35:05.000000000 +0900 +++ heartbeat-2.1.4-1.mod/configure.in 2010-05-11 11:49:28.000000000 +0900 @@ -3017,6 +3017,9 @@ tools/haresources2cib.py \ tools/hb_report \ tools/ocf-tester \ + tools/hb_actmonitor/Makefile \ + tools/hb_actmonitor/hb_actmonitor.spec \ + tools/hb_actmonitor/actcheck \ resources/Makefile \ resources/OCF/Makefile \ resources/OCF/.ocf-binaries \ diff -urN heartbeat-2.1.4-1/tools/Makefile.am heartbeat-2.1.4-1.mod/tools/Makefile.am --- heartbeat-2.1.4-1/tools/Makefile.am 2008-08-19 12:35:06.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/Makefile.am 2010-05-11 11:49:28.000000000 +0900 @@ -23,6 +23,9 @@ -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl +SUBDIRS = hb_actmonitor +DIST_SUBDIRS = hb_actmonitor + EXTRA_DIST = ccdv.c attrd.h $(hanoarch_DATA) $(sbin_SCRIPTS) apigid = @HA_APIGID@ diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/Makefile.am heartbeat-2.1.4-1.mod/tools/hb_actmonitor/Makefile.am --- heartbeat-2.1.4-1/tools/hb_actmonitor/Makefile.am 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/Makefile.am 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,53 @@ +# +# hb_actmonitor: Heartbeat Active-Nodes Monitor. +# +# Copyright (C) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +MAINTAINERCLEANFILES = Makefile.in + +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/tools -I$(top_srcdir)/tools \ + -I$(top_builddir)/tools/hb_actmonitor -I$(top_srcdir)/tools/hb_actmonitor + +apigid = @HA_APIGID@ +hb_actmonitordir = $(libdir)/@HB_PKG@/hb-actmonitor +hanoarchdir = @HA_NOARCHDATAHBDIR@ +gliblib = @GLIBLIB@ + +hb_actmonitor_PROGRAMS = hb_actmonitor hb_actmon_client +hb_actmonitor_DATA = hb_actmonitor.conf.sample hb_actmon_client.conf.sample + +## SOURCES +hb_actmonitor_SOURCES = hb_actmonitor.c hb_actmon_comm.c +hb_actmonitor_LDADD = $(top_builddir)/lib/clplumbing/libplumb.la \ + $(top_builddir)/lib/crm/common/libcrmcommon.la \ + $(GLIBLIB) + +hb_actmon_client_SOURCES = hb_actmon_client.c hb_actmon_comm.c +hb_actmon_client_LDADD = $(top_builddir)/lib/clplumbing/libplumb.la \ + $(top_builddir)/lib/crm/common/libcrmcommon.la \ + $(top_builddir)/lib/crm/cib/libcib.la \ + $(top_builddir)/lib/crm/pengine/libpe_status.la \ + $(GLIBLIB) \ + $(CURSESLIBS) + +EXTRA_DIST = $(ocf_SCRIPTS) +ocfdir = @OCF_RA_DIR@/heartbeat +ocf_SCRIPTS = actcheck + +uninstall-local: + rm -fr $(DESTDIR)$(hb_actmonitordir) diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/actcheck.in heartbeat-2.1.4-1.mod/tools/hb_actmonitor/actcheck.in --- heartbeat-2.1.4-1/tools/hb_actmonitor/actcheck.in 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/actcheck.in 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,163 @@ +#!/bin/sh +# +# ActCheck OCF RA. Does nothing but call hb_actmon_client +# at start operation, with resource id as argument. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# Copyright (c) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION +# +# OCF parameters: +# OCF_RESKEY_cmd_opt - Additional options for hb_actmon_client (-C, -V). +# +####################################################################### +# Initialization: + +. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs + +unset LC_ALL; export LC_ALL +unset LANGUAGE; export LANGUAGE + +ACTCHECK=@libdir@/@PACKAGE@/hb-actmonitor/hb_actmon_client + +usage() { + cat < + + +1.0 + + +This is a resource script for ActCheck. It does absolutely nothing except +keep track of whether its running or not, but it called hb_actmon_client +at start operation. + +actcheck resource agent + + + + +Additional hb_actmon_client options (-C, -V). Default is "". + +cmd_opt + + + + + + + + + + + +END +} + + +# +# START: called hb_actmon_client. +# +actcheck_start() { + ocf_log info "actcheck_start: started..." + + actcheck_monitor + if [ $? = $OCF_SUCCESS ]; then + return $OCF_SUCCESS + fi + + touch ${OCF_RESKEY_state} + + if [ -z "$OCF_RESKEY_CRM_meta_clone" ]; then + $ACTCHECK ${OCF_RESKEY_cmd_opt} -R ${OCF_RESOURCE_INSTANCE} + else + $ACTCHECK ${OCF_RESKEY_cmd_opt} -R ${OCF_RESOURCE_INSTANCE%:${OCF_RESKEY_CRM_meta_clone}} + fi + + if [ $? -ne 0 ]; then + return $OCF_ERR_GENERIC + fi + + ocf_log info "actcheck_start: complete." + return $OCF_SUCCESS +} + +# +# STOP +# +actcheck_stop() { + ocf_log info "actcheck_stop: started..." + + actcheck_monitor + if [ $? = $OCF_SUCCESS ]; then + rm ${OCF_RESKEY_state} + fi + ocf_log info "actcheck_stop: complete." + return $OCF_SUCCESS +} + +# +# MONITOR +# +actcheck_monitor() { + ocf_log debug "actcheck_monitor: started..." + + RCODE=$OCF_NOT_RUNNING + + if [ -f ${OCF_RESKEY_state} ]; then + RCODE=$OCF_SUCCESS + fi + ocf_log debug "actcheck_monitor: complete." + return $RCODE +} + +# +# main process +# + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +: ${OCF_RESKEY_state=${HA_RSCTMP}/actcheck-${OCF_RESOURCE_INSTANCE}.state} + +OP=$1 + +# These operations do not require instance parameters +case $OP in +meta-data) meta_data + exit $OCF_SUCCESS + ;; +usage) usage + exit $OCF_SUCCESS + ;; +start) actcheck_start;; +stop) actcheck_stop;; +monitor) actcheck_monitor;; +*) usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $OP : $rc" +exit $rc diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_client.c heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_client.c --- heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_client.c 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_client.c 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,1195 @@ +/* + * hb_actmon_client: Heartbeat Active-Nodes Monitor Client. + * + * Copyright (C) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include + +#include +#include +#include +#include + +#define OPTARGS "Vp:C:DP:beR:?" +#define DEFAULT_INTERVAL 60 +#define MIN_INTERVAL 1 +#define DEFAULT_RETRY FALSE +#define RETRY_DELAY 10 +#define DEFAULT_CONFFILE "/etc/hb_actmon_client.conf" +#define DEFAULT_PIDFILE "/var/run/hb_actmon_client.pid" +#define DEFAULT_CONN_ATTEMPTS 0 +#define DEFAULT_ON_CONNERR FALSE +#define MAX_HBCONN_ATTEMPTS 20 +#define HBCONN_ATTEMPT_INTERVAL 1 + +extern gboolean block_unwanted_log; +extern unsigned int loglevel_before_block; +extern char *connected_from; +extern GHashTable *cib_op_callback_table; /* for disconnect from cib */ + +GMainLoop *mainloop = NULL; +guint timer_id = 0; +struct utsname u; +char our_pid[11]; +hb_msg_t rep_actcount; +int rc_update_attribute_foreach; +IPC_Channel *attrd_ch = NULL; +IPC_Channel *crmd_ch = NULL; +cib_t *cib_conn = NULL; + +typedef struct server_info_s +{ + char *node; + char *port; + +} server_info_t; + +typedef struct client_info_s +{ + server_info_t *server; + GHashTable *rsc; + int interval; + gboolean retry; + char *rscid; + int retries; + gboolean set_attr; + +} client_info_t; + +client_info_t c_info; + +/* + * prototype declaration + */ +gboolean do_reconfig(int nsig, gpointer user_data); +gboolean do_shutdown(int nsig, gpointer unused); +void usage(const char *cmd, int exit_status); +int parse_config(const char *file); +gboolean do_reachlimit(gpointer unused); +int do_getoktoact(void); +int handle_msg(hb_msg_t *rcv, int unused); +int handle_req_actcount(hb_msg_t *rcv); +int handle_rep_reachlimit(hb_msg_t *rcv); +int handle_rep_getoktoact(hb_msg_t *rcv); +int update_attribute(const char *name, const char *value, gboolean disconnect); +void update_attribute_foreach(gpointer key, gpointer value, gpointer user_data); +int request_to_attrd(const char *attr_name, const char *attr_value); +int query_to_crmd(void); +char *recv_crmd_state(IPC_Channel *server); +int count_started_resource(void); +gboolean attrd_msg_dispatch(IPC_Channel *server, gpointer user_data); +gboolean crmd_msg_dispatch(IPC_Channel *server, gpointer user_data); +void crmd_channel_destroy(gpointer user_data); +void cib_connection_destroy(gpointer user_data); +int connect_attrd(void); +int connect_crmd(void); +int connect_cib(void); +void disconnect_attrd(void); +void disconnect_crmd(void); +void disconnect_cib(gboolean unset_dnotify); + + +gboolean +do_reconfig(int nsig, gpointer user_data) +{ + crm_debug_2("called..."); + crm_info("received signal %d, re-read config %s", nsig, (char*)user_data); + + if (c_info.server != NULL) { + g_free(c_info.server->node); + g_free(c_info.server->port); + g_free(c_info.server); + c_info.server = NULL; + } + g_hash_table_destroy(c_info.rsc); + c_info.rsc = NULL; + + if (parse_config(user_data) < 0) { + do_shutdown(0, NULL); + crm_info("exiting %s", crm_system_name); + exit(1); + } + do_reachlimit(NULL); + return TRUE; +} + +gboolean +do_shutdown(int nsig, gpointer unused) +{ + crm_debug_2("called..."); + crm_info("received signal %d, do shutdown...", nsig); + + close_listen_sock(); + + if (nsig) { + disconnect_attrd(); + disconnect_crmd(); + disconnect_cib(TRUE); + } + + if (mainloop != NULL && g_main_loop_is_running(mainloop)) { + g_main_loop_quit(mainloop); + } else { + crm_info("Exiting %s", crm_system_name); + exit(0); + } + return FALSE; +} + +void +usage(const char *cmd, int exit_status) +{ + FILE *stream; + + stream = exit_status ? stderr : stdout; + + fprintf(stream, "usage: %s [-%s]\n", cmd, OPTARGS); + fprintf(stream, "\t--%s (-%c)\t\t\t: This text\n", "help", '?'); + fprintf(stream, "\t--%s (-%c)\t\t\t: Increase the debug output\n", + "verbose", 'V'); + fprintf(stream, "\t--%s (-%c) \t: Port number (1-65535) of %s\n" + "\t\t\t\t\t * Required option\n", "client-port", 'p', cmd); + fprintf(stream, "\t--%s (-%c) \t: Configuration file location\n" + "\t\t\t\t\t * Default=%s\n", "config-file", 'C', DEFAULT_CONFFILE); + fprintf(stream, "\t--%s (-%c)\t\t: Run in the background as a daemon\n", + "daemonize", 'D'); + fprintf(stream, "\t--%s (-%c) \t: Daemon pid file location\n", + "pid-file", 'P'); + fprintf(stream, + "\t--%s (-%c)\t\t: Log level of HB's function is set as LOG_ERR\n", + "block-log", 'b'); + fprintf(stream, "\t--%s (-%c)\t\t: Enable to output log to stderr\n", + "enable-stderr", 'e'); + fprintf(stream, "\n Options for call from RA\n"); + fprintf(stream, "\t--%s (-%c) \t: Resource ID without sub-id\n", + "resource", 'R'); + + fflush(stream); + exit(exit_status); +} + +int +parse_config(const char *file) +{ + GKeyFile *kf = g_key_file_new(); + gchar **grp = NULL; + gsize length; + gchar *node, *port, *attr, *val; + const char *key; + gboolean has_client_section = FALSE; + int lpc; + + crm_debug_2("called..."); + + c_info.rsc = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free); + c_info.interval = DEFAULT_INTERVAL; + c_info.retry = DEFAULT_RETRY; + c_info.retries = DEFAULT_CONN_ATTEMPTS; + c_info.set_attr = DEFAULT_ON_CONNERR; + + if ((g_key_file_load_from_file(kf, file, G_KEY_FILE_NONE, NULL)) == FALSE) { + if (g_path_is_absolute(file) == TRUE) { + crm_err("read config-file failed. [%s]", file); + } else { + gchar *curr_dir = g_get_current_dir(); + crm_err("read config-file failed. [%s/%s]", curr_dir, file); + g_free(curr_dir); + } + return -1; + } + grp = g_key_file_get_groups(kf, &length); + + for (lpc = 0; lpc < length; lpc++) { + if (safe_str_eq(grp[lpc], "Client")) { + if (c_info.rscid == NULL) { + key = "interval"; + if ((val = key_file_get_string(kf, grp[lpc], key, FALSE))) { + if (!str_isdigit(val) && + crm_parse_int(val, "-1") >= MIN_INTERVAL && + crm_parse_int(val, "-1") <= INT_MAX) { + c_info.interval = crm_parse_int(val, "-1"); + } else { + crm_warn("invalid %s [%s] specified", key, val); + } + g_free(val); + } + crm_info("[%s]: %s [%d]", grp[lpc], key, c_info.interval); + + key = "retry"; + if ((val = key_file_get_string(kf, grp[lpc], key, FALSE))) { + c_info.retry = crm_is_true(val); + g_free(val); + } + crm_info("[%s]: %s [%s]", + grp[lpc], key, (c_info.retry) ? "yes" : "no"); + } else { + /* items for -R option */ + key = "connerr_retrynum"; + if ((val = key_file_get_string(kf, grp[lpc], key, FALSE))) { + if (!str_isdigit(val) && + crm_parse_int(val, "-1") >= 0 && + crm_parse_int(val, "-1") <= INT_MAX) { + c_info.retries = crm_parse_int(val, "-1"); + } else { + crm_warn("invalid %s [%s] specified", key, val); + } + g_free(val); + } + crm_info("[%s]: %s [%d]", grp[lpc], key, c_info.retries); + + key = "connerr_setattr"; + if ((val = key_file_get_string(kf, grp[lpc], key, FALSE))) { + c_info.set_attr = crm_is_true(val); + g_free(val); + } + crm_info("[%s]: %s [%s]", + grp[lpc], key, (c_info.set_attr) ? "yes" : "no"); + } + has_client_section = TRUE; + } else if (safe_str_eq(grp[lpc], "Server")) { + if (!(node = key_file_get_string(kf, grp[lpc], "node", TRUE))) { + continue; + } + if (!(port = key_file_get_string(kf, grp[lpc], "server_port", TRUE))) { + continue; + } else if (str_isport(port)) { + crm_warn("invalid server_port [%s] specified." + " section [%s] is invalid", port, grp[lpc]); + g_free(port); port = NULL; + continue; + } + c_info.server = g_malloc0(sizeof(server_info_t)); + c_info.server->node = node; + c_info.server->port = port; + crm_info("[%s]: node[%s], server_port[%s]", grp[lpc], node, port); + } else { + key = "attr_name"; + if (!(attr = key_file_get_string(kf, grp[lpc], key, TRUE))) { + continue; + } + /* value of grp (group name) is Resource ID without sub-id */ + g_hash_table_insert(c_info.rsc, g_strdup(grp[lpc]), attr); + crm_info("[%s]: %s[%s]", grp[lpc], key, attr); + } + } + g_strfreev(grp); + g_key_file_free(kf); + + if (has_client_section == FALSE) { + if (c_info.rscid == NULL) { + crm_info("config-file has no [Client] section. interval[%d], retry[%s]", + c_info.interval, (c_info.retry) ? "yes" : "no"); + } else { + crm_info("config-file has no [Client] section." + " connerr_retrynum[%d], connerr_setattr[%s]", + c_info.retries, (c_info.set_attr) ? "yes" : "no"); + } + } + if (c_info.server == NULL || g_hash_table_size(c_info.rsc) == 0) { + if (c_info.server == NULL) { + crm_warn("config-file has no valid server's setting"); + } + if (g_hash_table_size(c_info.rsc) == 0) { + crm_warn("config-file has no valid 'attr_name' setting"); + } + return 1; + } + return 0; +} + +/* + * send request(T_REQ_REACHLIMIT) to server and receive reply(T_REP_REACHLIMIT). + */ +gboolean +do_reachlimit(gpointer unused) +{ + int sockfd; + int interval = calc_interval(c_info.interval, RETRY_DELAY, c_info.retry); + hb_msg_t msg; + + crm_debug_2("called..."); + crm_info("request status (ACTs reached the limit?)"); + + if (timer_id) { + Gmain_timeout_remove(timer_id); + } + if (c_info.server == NULL || g_hash_table_size(c_info.rsc) == 0) { + if (c_info.server == NULL) { + crm_warn("server's setting for request" + " is not specified as config-file"); + } + if (g_hash_table_size(c_info.rsc) == 0) { + crm_warn("'attr_name' setting" + " is not specified as config-file"); + } + return TRUE; + } + + if ((sockfd = connect_to(c_info.server->node, c_info.server->port)) < 0) { + if (sockfd == -3) { + crm_debug("failed to connect to %s", c_info.server->node); + } else { + crm_warn("failed to connect to %s", c_info.server->node); + } + goto timeout_add; + } + memset(&msg, 0, sizeof(hb_msg_t)); + msg.hdr.type = T_REQ_REACHLIMIT; + msg.hdr.req_id = g_str_hash(u.nodename); + msg.hdr.req_no = get_next_req_no(); + crm_debug("send(): hdr[%d:%u:%u]", + msg.hdr.type, msg.hdr.req_id, msg.hdr.req_no); + + if (send(sockfd, &msg, sizeof(hb_msg_t), MSG_DONTWAIT | MSG_NOSIGNAL) < 0) { + cl_perror("send(2) call failed"); + } else { + struct timeval timeout; + timeout.tv_sec = SELECT_TIMEOUT_SEC; + timeout.tv_usec = 0; + if (receive_msg(sockfd, &timeout) == 0) { + interval = c_info.interval; + } + } + close(sockfd); + +timeout_add: + crm_debug("timeout_add [do_reachlimit] %d sec", interval); + timer_id = Gmain_timeout_add(interval*1000, do_reachlimit, NULL); + return TRUE; +} + +/* + * send request(T_REQ_GETOKTOACT) to server and receive reply(T_REP_GETOKTOACT). + * (this function for -R option.) + */ +int +do_getoktoact(void) +{ + int sockfd; + hb_msg_t msg; + + crm_debug_2("called..."); + + if (c_info.server == NULL) { + goto retry; + } + + if ((sockfd = connect_to(c_info.server->node, c_info.server->port)) < 0) { + if (sockfd == -3) { + crm_debug("failed to connect to %s", c_info.server->node); + return -1; + } + crm_warn("failed to connect to %s", c_info.server->node); + goto retry; + } + memset(&msg, 0, sizeof(hb_msg_t)); + msg.hdr.type = T_REQ_GETOKTOACT; + msg.hdr.req_id = g_str_hash(u.nodename); + msg.hdr.req_no = getpid(); + crm_debug("send(): hdr[%d:%u:%u]", + msg.hdr.type, msg.hdr.req_id, msg.hdr.req_no); + + if (send(sockfd, &msg, sizeof(hb_msg_t), MSG_DONTWAIT | MSG_NOSIGNAL) < 0) { + cl_perror("send(2) call failed"); + close(sockfd); + goto retry; + } else { + int rc; + struct timeval timeout; + timeout.tv_sec = SELECT_TIMEOUT_SEC; + timeout.tv_usec = 0; + if ((rc = receive_msg(sockfd, &timeout)) < 0) { + close(sockfd); + if (rc == -3) { + return -1; + } + goto retry; + } + } + close(sockfd); + return 0; + +retry: + crm_debug_2("retry.., c_info.retries[%d]", c_info.retries); + + if (c_info.server != NULL && c_info.retries-- > 0) { + return do_getoktoact(); + } + + if (c_info.set_attr == FALSE) { + return 0; + } + return update_attribute( + g_hash_table_lookup(c_info.rsc, c_info.rscid), "TRUE", TRUE); +} + +int +handle_msg(hb_msg_t *rcv, int unused) +{ + crm_debug_2("called..."); + + switch (rcv->hdr.type) { + case T_REQ_ACTCOUNT: + return handle_req_actcount(rcv); + case T_REP_REACHLIMIT: + return handle_rep_reachlimit(rcv); + case T_REP_GETOKTOACT: + return handle_rep_getoktoact(rcv); + default: + crm_warn("the message NOT intended was received from %s", + connected_from); + crm_info("NOT intended message: msgtype[%d], req_id[%u], req_no[%u]", + rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no); + break; + } + return -2; +} + +/* + * called when request(T_REQ_ACTCOUNT) from server is received. + * only query status of crmd to crmd. + * - in crmd_msg_dispatch(), receive result of query and send + * reply(T_REP_ACTCOUNT) to server. + */ +int +handle_req_actcount(hb_msg_t *rcv) +{ + crm_debug_2("called..."); + crm_debug("hdr[%d:%u:%u]", rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no); + + if (c_info.server == NULL) { + crm_err("cannot connect to server," + " since the config-file is invalid"); + return -1; + } + memset(&rep_actcount, 0, sizeof(hb_msg_t)); + rep_actcount.hdr.type = T_REP_ACTCOUNT; + rep_actcount.hdr.req_id = rcv->hdr.req_id; + rep_actcount.hdr.req_no = rcv->hdr.req_no; + return query_to_crmd(); +} + +/* + * called when reply(T_REP_REACHLIMIT) from server is received. + * update attribute's value to "false", when result is FALSE. + */ +int +handle_rep_reachlimit(hb_msg_t *rcv) +{ + crm_debug_2("called..."); + crm_debug("hdr[%d:%u:%u], result[%d]", + rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no, rcv->result); + + /* check of req_id and req_no is unnecessary */ + + if (rcv->result == TRUE) { + return 0; + } + + if (connect_attrd() < 0) { + return -1; + } + rc_update_attribute_foreach = 0; + { + char value[] = "false"; + g_hash_table_foreach(c_info.rsc, update_attribute_foreach, value); + } + return rc_update_attribute_foreach; +} + +/* + * called when reply(T_REP_GETOKTOACT) from server is received. + * update attribute's value to "TRUE", when result is FALSE. + * (this function for -R option.) + */ +int +handle_rep_getoktoact(hb_msg_t *rcv) +{ + crm_debug_2("called..."); + crm_debug("hdr[%d:%u:%u], result[%d]", + rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no, rcv->result); + + /* check of req_id and req_no is unnecessary */ + + if (rcv->result == TRUE) { + return 0; + } + return update_attribute( + g_hash_table_lookup(c_info.rsc, c_info.rscid), "TRUE", TRUE); +} + +int +update_attribute(const char *name, const char *value, gboolean disconnect) +{ + int ret; + + crm_debug_2("called..."); + + if (connect_attrd() < 0) { + return -1; + } + ret = request_to_attrd(name, value); + if (disconnect == TRUE) { + disconnect_attrd(); + } + return ret; +} + +void +update_attribute_foreach(gpointer key, gpointer value, gpointer user_data) +{ + int rc; + + crm_debug_2("called..."); + + if ((rc = request_to_attrd(value, user_data)) < 0) { + rc_update_attribute_foreach = rc; + } + return; +} + +/* ref. main() in tools/attrd_updater.c */ +int +request_to_attrd(const char *attr_name, const char *attr_value) +{ + HA_Message *msg; + gboolean rc; + + crm_debug_2("called..."); + + if ((msg = ha_msg_new(5)) == NULL) { + crm_err("creating new message failed"); + crm_warn("could not update %s[%s], %s[%s]", + F_ATTRD_ATTRIBUTE, attr_name, F_ATTRD_VALUE, attr_value); + return -1; + } + ha_msg_add(msg, F_TYPE, T_ATTRD); + ha_msg_add(msg, F_ORIG, crm_system_name); + ha_msg_add(msg, F_ATTRD_TASK, "update"); + ha_msg_add(msg, F_ATTRD_ATTRIBUTE, attr_name); + ha_msg_add(msg, F_ATTRD_VALUE, attr_value); + + rc = send_ipc_message(attrd_ch, msg); + ha_msg_del(msg); + if (rc == FALSE) { + crm_err("failed to send update to attrd"); + crm_warn("could not update %s[%s], %s[%s]", + F_ATTRD_ATTRIBUTE, attr_name, F_ATTRD_VALUE, attr_value); + return -1; + } + crm_notice("update %s[%s], %s[%s]", + F_ATTRD_ATTRIBUTE, attr_name, F_ATTRD_VALUE, attr_value); + return 0; +} + +/* ref. do_work() in crm/admin/crmadmin.c */ +int +query_to_crmd(void) +{ + HA_Message *cmd; + gboolean rc; + + crm_debug_2("called..."); + + if (connect_crmd() < 0) { + return -1; + } + cmd = create_request(CRM_OP_PING, NULL, NULL, + CRM_SYSTEM_DC, crm_system_name, our_pid); + rc = send_ipc_message(crmd_ch, cmd); + ha_msg_del(cmd); + if (rc == FALSE) { + crm_err("failed to send query to crmd"); + return -1; + } + return 0; +} + +/* ref. admin_msg_callback() in crm/admin/crmadmin.c */ +char * +recv_crmd_state(IPC_Channel *server) +{ + IPC_Message *msg = NULL; + ha_msg_input_t *input = NULL; + char *state = NULL; + int rc; + HA_Message *action; + + crm_debug_2("called..."); + + while (server->ch_status != IPC_DISCONNECT && + server->ops->is_message_pending(server) == TRUE) { + if ((rc = server->ops->recv(server, &msg)) != IPC_OK) { + cl_perror("receive failure (%d)", rc); + return NULL; + } + if (msg == NULL) { + continue; + } + + input = new_ipc_msg_input(msg); + msg->msg_done(msg); + if (input->xml == NULL) { + crm_debug("XML in IPC message was not valid.. discarding"); + goto cleanup; + } + + set_loglevel_err(); + action = validate_crm_message(input->msg, + crm_system_name, our_pid, XML_ATTR_RESPONSE); + set_loglevel_orig(); + if (action == NULL) { + crm_debug_2("message was not a crmd response. discarding"); + goto cleanup; + } + + /* + * ref. enum crmd_fsa_state in crm/crmd/fsa_defines.h + * fsa_state2string() in crm/crmd/utils.c + */ + state = g_strdup(crm_element_value(input->xml, "crmd_state")); + crm_debug("status of %s@%s: %s (%s)", + crm_element_value(input->xml,XML_PING_ATTR_SYSFROM), + cl_get_string(input->msg, F_CRM_HOST_FROM), state, + crm_element_value(input->xml,XML_PING_ATTR_STATUS)); + delete_ha_msg_input(input); + break; + + cleanup: + delete_ha_msg_input(input); + input = NULL; + } + return state; +} + +/* + * count up the number of ACTs from CIB - count the target resource which has + * started. + * ref. main() in crm/admin/crm_resource.c + */ +int +count_started_resource(void) +{ + crm_data_t *cib; + pe_working_set_t data_set; + int act_num = 0; + + crm_debug_2("called..."); + + if (connect_cib() < 0) { + return -1; + } + cib = get_cib_copy(cib_conn); + set_working_set_defaults(&data_set); + data_set.input = cib; + + set_loglevel_err(); + cluster_status(&data_set); + set_loglevel_orig(); + + slist_iter(node, node_t, data_set.nodes, lpc2, + if (!safe_str_eq(node->details->uname, u.nodename)) { + continue; + } + crm_debug("Node: %s", node->details->uname); + slist_iter(rsc, resource_t, node->details->running_rsc, lpc2, + char *rscid; resource_t *rp; + for (rp = rsc; rp->parent; rp = rp->parent); + if (rp->variant == pe_clone || rp->variant == pe_master) { + char *p; + rscid = g_strdup(rsc->id); + if ((p = strrchr(rscid, ':')) != NULL) { + *p = '\0'; + } + } else { + rscid = g_strdup(rsc->id); + } + crm_debug(" Started: [%s]", rscid); + if (g_hash_table_lookup(c_info.rsc, rscid) != NULL) { + act_num++; + } + g_free(rscid); + ); + ); + data_set.input = NULL; + cleanup_calculations(&data_set); + free_xml(cib); + return act_num; +} + +/* + * called when there is message to receive from attrd. + * ref. subsystem_msg_dispatch() in lib/crm/common/ipc.c + */ +gboolean +attrd_msg_dispatch(IPC_Channel *server, gpointer user_data) +{ + crm_debug_2("called..."); + + if (server->ch_status != IPC_CONNECT) { + /* cuts off the connection with attrd from attrd */ + crm_warn("connection to attrd was terminated"); + do_shutdown(0, NULL); + return FALSE; + } + return TRUE; +} + +/* + * called when there is message(result of query which is status of crmd) to + * receive from crmd. + * receive status and check it, count up the number of ACTs, and send + * reply(T_REP_ACTCOUNT) with the number of ACTs to server. + */ +gboolean +crmd_msg_dispatch(IPC_Channel *server, gpointer user_data) +{ + char *state; + int sockfd; + + crm_debug_2("called..."); + + if ((state = recv_crmd_state(server)) == NULL) { + crm_debug("state is NULL..."); + return TRUE; + } + + /* ref. fsa_state2string() in crm/crmd/utils.c */ + if (safe_str_eq(state, "S_IDLE")) { + int rc; + if ((rc = count_started_resource()) >= 0) { + rep_actcount.result = rc; + crm_info("requested count up the ACTs. ACTs is %d", rc); + } else { + rep_actcount.result = S_FAILED_TO_COUNT; + crm_info("requested count up the ACTs, but it failed"); + } + } else { + rep_actcount.result = S_NOT_IDLE; + crm_info("requested count up the ACTs, but status is %s." + " not S_IDLE", state); + } + g_free(state); + + if ((sockfd = connect_to(c_info.server->node, c_info.server->port)) < 0) { + if (sockfd == -3) { + crm_debug("failed to connect to %s", c_info.server->node); + } else { + crm_warn("failed to connect to %s", c_info.server->node); + } + return FALSE; + } + crm_debug("send(): hdr[%d:%u:%u], result[%d]", + rep_actcount.hdr.type, rep_actcount.hdr.req_id, + rep_actcount.hdr.req_no, rep_actcount.result); + if (send(sockfd, &rep_actcount, + sizeof(hb_msg_t), MSG_DONTWAIT | MSG_NOSIGNAL) < 0) { + cl_perror("send(2) call failed"); + } + close(sockfd); + return FALSE; +} + +/* + * called when cuts off the connection with crmd from crmd. + */ +void +crmd_channel_destroy(gpointer user_data) +{ + crm_debug_2("called..."); + crm_debug("connection to crmd was terminated"); + + crmd_ch = NULL; + return; +} + +/* + * called when cuts off the connection with cib from cib. + */ +void +cib_connection_destroy(gpointer user_data) +{ + crm_debug_2("called..."); + crm_warn("connection to cib was terminated"); + + do_shutdown(0, NULL); + return; +} + +/* ref. main() in tools/attrd_updater.c */ +int +connect_attrd(void) +{ + GCHSource *src; + int attempts; + + crm_debug_2("called..."); + + if (attrd_ch != NULL) { + return 0; + } + + for (attempts = 1; ; attempts++) { + if (attempts > 1) { + sleep(HBCONN_ATTEMPT_INTERVAL); + } + crm_debug("connect to attrd attempt: %d", attempts); + src = init_client_ipc_comms(T_ATTRD, + attrd_msg_dispatch, NULL, &attrd_ch); + if (src != NULL && + attrd_ch != NULL && attrd_ch->ch_status == IPC_CONNECT) { + break; + } + if (attempts >= MAX_HBCONN_ATTEMPTS) { + crm_err("failed to connect to attrd"); + if (attrd_ch == NULL) { + crm_info("connection to callback channel failed"); + } else if (attrd_ch->ch_status != IPC_CONNECT) { + crm_info("connection may have succeeded, but" + " authentication to callback channel failed"); + disconnect_attrd(); + } else if (src == NULL) { + crm_info("callback source not recorded"); + disconnect_attrd(); + } + return -1; + } + if (attrd_ch != NULL) { + disconnect_attrd(); + } + } + crm_debug("succeed at connect to attrd"); + return 0; +} + +/* ref. do_init() in crm/admin/crmadmin.c */ +int +connect_crmd(void) +{ + GCHSource *src; + int attempts; + + crm_debug_2("called..."); + + if (crmd_ch != NULL) { + return 0; + } + + for (attempts = 1; ; attempts++) { + if (attempts > 1) { + sleep(HBCONN_ATTEMPT_INTERVAL); + } + crm_debug("connect to crmd attempt: %d", attempts); + src = init_client_ipc_comms(CRM_SYSTEM_CRMD, + crmd_msg_dispatch, NULL, &crmd_ch); + if (src != NULL && + crmd_ch != NULL && crmd_ch->ch_status == IPC_CONNECT) { + break; + } + if (attempts >= MAX_HBCONN_ATTEMPTS) { + crm_err("failed to connect to crmd"); + if (crmd_ch == NULL) { + crm_info("connection to callback channel failed"); + } else if (crmd_ch->ch_status != IPC_CONNECT) { + crm_info("connection may have succeeded, but" + " authentication to callback channel failed"); + disconnect_crmd(); + } else if (src == NULL) { + crm_info("callback source not recorded"); + disconnect_crmd(); + } + return -1; + } + if (crmd_ch != NULL) { + disconnect_crmd(); + } + } + send_hello_message(crmd_ch, our_pid, crm_system_name, "0", "1"); + set_IPC_Channel_dnotify(src, crmd_channel_destroy); + crm_debug("succeed at connect to crmd"); + return 0; +} + +/* ref. main() in crm/admin/crm_resource.c */ +int +connect_cib(void) +{ + enum cib_errors rc = cib_ok; + int attempts; + + crm_debug_2("called..."); + + if (cib_conn != NULL) { + return 0; + } + + if ((cib_conn = cib_new()) == NULL) { + crm_err("cib connection initialization failed"); + return -1; + } + for (attempts = 0; attempts < MAX_HBCONN_ATTEMPTS; attempts++) { + if (attempts) { + sleep(HBCONN_ATTEMPT_INTERVAL); + } + crm_debug("connect to cib attempt: %d", attempts+1); + if ((rc = cib_conn->cmds->signon(cib_conn, + crm_system_name, cib_query)) == cib_ok) { + break; + } + } + if (rc != cib_ok) { + crm_err("failed to signon to cib: %s", cib_error2string(rc)); + disconnect_cib(FALSE); + return -1; + } + + /* ref. main() in tools/attrd.c */ + if (cib_conn->cmds->set_connection_dnotify( + cib_conn, cib_connection_destroy) != cib_ok) { + crm_err("failed to set dnotify callback"); + disconnect_cib(FALSE); + return -1; + } + crm_debug("succeed at connect to cib"); + return 0; +} + +void +disconnect_attrd(void) +{ + crm_debug_2("called..."); + + if (attrd_ch == NULL) { + return; + } + attrd_ch->ops->destroy(attrd_ch); + attrd_ch = NULL; + return; +} + +void +disconnect_crmd(void) +{ + crm_debug_2("called..."); + + if (crmd_ch == NULL) { + return; + } + crmd_ch->ops->destroy(crmd_ch); + crmd_ch = NULL; + return; +} + +void +disconnect_cib(gboolean unset_dnotify) +{ + crm_debug_2("called..."); + + if (cib_conn == NULL) { + return; + } + if (unset_dnotify == TRUE) { + set_loglevel_err(); + cib_conn->cmds->set_connection_dnotify(cib_conn, NULL); + set_loglevel_orig(); + } + cib_conn->cmds->signoff(cib_conn); + + cib_delete(cib_conn); + /* + * as for disconnect, it is insufficient only to call cib_delete(). since + * the following error will occurs, set NULL to `cib_op_callback_table', + * 'GLib-CRITICAL **: + * g_hash_table_destroy: assertion `hash_table->ref_count > 0' failed'. + */ + cib_op_callback_table = NULL; + + cib_conn = NULL; + return; +} + +int +main(int argc, char **argv) +{ + int argerr = 0, flag; + char *port = NULL; + char *config_file = g_strdup(DEFAULT_CONFFILE); + gboolean daemonize = FALSE; + char *pid_file = g_strdup(DEFAULT_PIDFILE); +#ifdef HAVE_GETOPT_H + int opt_idx = 0; + static struct option long_opts[] = { + {"verbose", 0, 0, 'V'}, + {"client-port", 1, 0, 'p'}, + {"config-file", 1, 0, 'C'}, + {"daemonize", 0, 0, 'D'}, + {"pid-file", 1, 0, 'P'}, + {"block-log", 0, 0, 'b'}, + {"enable-stderr", 0, 0, 'e'}, + {"resource", 1, 0, 'R'}, + {"help", 0, 0, '?'}, + {0, 0, 0, 0} + }; +#endif + + /* ref. cl_inherit_logging_environment() in lib/clplumbing/cl_log.c */ + setenv(ENV_LOGDAEMO, "true", 0); + crm_log_init(basename(argv[0]), loglevel_before_block, TRUE, FALSE, argc, argv); + + G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGHUP, do_reconfig, config_file, NULL); + G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGINT, do_shutdown, NULL, NULL); + G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGTERM, do_shutdown, NULL, NULL); + + if (uname(&u) < 0) { + cl_perror("uname(2) call failed"); + return -1; + } + snprintf(our_pid, 10, "%d", getpid()); + our_pid[10] = '\0'; + + memset(&c_info, 0, sizeof(client_info_t)); + + while (1) { +#ifdef HAVE_GETOPT_H + flag = getopt_long(argc, argv, OPTARGS, long_opts, &opt_idx); +#else + flag = getopt(argc, argv, OPTARGS); +#endif + if (flag == -1) { + break; + } + + switch (flag) { + case 'V': + alter_debug(DEBUG_INC); + break; + case 'p': + if (str_isport(optarg)) { + crm_err("invalid client-port [%s] specified", optarg); + argerr++; + } else { + g_free(port); + port = g_strdup(optarg); + } + break; + case 'C': + if (safe_str_eq(optarg, "")) { + crm_err("invalid config-file [%s] specified", optarg); + argerr++; + } else { + g_free(config_file); + config_file = g_strdup(optarg); + } + break; + case 'D': + daemonize = TRUE; + break; + case 'P': + if (safe_str_eq(optarg, "")) { + crm_err("invalid pid-file [%s] specified", optarg); + argerr++; + } else { + g_free(pid_file); + pid_file = g_strdup(optarg); + } + break; + case 'b': + block_unwanted_log = TRUE; + break; + case 'e': + cl_log_enable_stderr(TRUE); + break; + case 'R': + if (safe_str_eq(optarg, "")) { + crm_err("invalid resource [%s] specified", optarg); + argerr++; + } else { + g_free(c_info.rscid); + c_info.rscid = g_strdup(optarg); + } + break; + case '?': + usage(crm_system_name, 0); + break; + default: + crm_err("Argument code 0%o (%c) is not (?yet?) supported", + flag, flag); + argerr++; + break; + } + } + + if (optind < argc) { + crm_err("non-option ARGV-elements:"); + while (optind < argc) { + crm_err("%s", argv[optind++]); + } + argerr++; + } + if (argerr) { + usage(crm_system_name, 1); + } + + if (c_info.rscid == NULL) { + if (port == NULL) { + usage(crm_system_name, 1); + } + crm_info("config: client-port [%s]", port); + crm_info("config: config-file [%s]", config_file); + crm_info("config: daemonize [%s]", (daemonize) ? "yes" : "no"); + crm_info("config: pid-file [%s]", pid_file); + crm_info("config: block-log [%s]", (block_unwanted_log) ? "yes" : "no"); + + crm_make_daemon(crm_system_name, daemonize, pid_file); + + if (parse_config(config_file) < 0) { + crm_info("exiting %s", crm_system_name); + return 1; + } + + /* create socket for waiting message from server's processes */ + if (listen_to(port) < 0) { + crm_info("exiting %s", crm_system_name); + return 1; + } + crm_info("Starting %s", crm_system_name); + do_reachlimit(NULL); + + mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(mainloop); + } else { + crm_info("config: resource [%s]", c_info.rscid); + crm_info("config: config-file [%s]", config_file); + + if (parse_config(config_file) < 0) { + crm_info("exiting %s", crm_system_name); + return 1; + } + if (g_hash_table_lookup(c_info.rsc, c_info.rscid) == NULL) { + crm_err("config-file has not [%s] section", c_info.rscid); + crm_info("exiting %s", crm_system_name); + return 1; + } + if (do_getoktoact() < 0) { + crm_info("exiting %s", crm_system_name); + return 1; + } + } + crm_info("Exiting %s", crm_system_name); + return 0; +} diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_client.conf.sample heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_client.conf.sample --- heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_client.conf.sample 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_client.conf.sample 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,32 @@ +# +# hb_actmon_client.conf : config file of hb-actmonitor +# + +### +# For hb_actmon_client. +### +[Client] +#interval = 60 +#retry = FALSE +#connerr_retrynum = 0 +#connerr_setattr = FALSE + +### +# For connection with hb_actmonitor (server process). +# - It's required section. +# - `node' and `server_port' are required items. +# - `node' specifies either a numerical network address or a network hostname. +### +[Server] +node = +server_port = + +### +# For hb_actmon_client and RA. +# - section name should specify Resouce-ID. +# - `attr_name' is required item. +### +# ex.) +# [prmActCheck] +# attr_name = inhibit_act:grpPostgreSQLDB +# diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_comm.c heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_comm.c --- heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_comm.c 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_comm.c 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,429 @@ +/* + * hb_actmonitor: Heartbeat Active-Nodes Monitor. + * + * Copyright (C) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include + +static GArray *sock_arr = NULL; +gboolean block_unwanted_log = FALSE; +unsigned int loglevel_before_block = LOG_INFO; +char *connected_from = NULL; + +extern int handle_msg(hb_msg_t *rcv, int sockfd); + + +/* + * return: + * >0: file descriptor which succeeded in connect + * -1: if system call error occurred and could not connect + * -2: connection failed (no route, connection refused, timed out, ...) + * -3: if signal was caught in select() + */ +int +connect_to(const char *node, const char *service) +{ + struct addrinfo hints; + struct addrinfo *result = NULL, *rp; + int sockfd, lock; + fd_set writefds; + struct timeval timeout; + int optval; + socklen_t optlen; + int rc; + int ret = -1; + + crm_debug_2("called..."); + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + if ((rc = getaddrinfo(node, service, &hints, &result)) != 0) { + crm_err("getaddrinfo(3) call failed. [%s:%s]: %s", + node, service, gai_strerror(rc)); + freeaddrinfo(result); + return -1; + } + + for (rp = result; rp; rp = rp->ai_next) { + if ((sockfd = socket(rp->ai_family, + rp->ai_socktype, rp->ai_protocol)) < 0) { + cl_perror("socket(2) call failed"); + continue; + } + + /* set non-blocking mode */ + if ((lock = fcntl(sockfd, F_GETFL, NULL)) < 0) { + cl_perror("fcntl(F_GETFL) call failed"); + goto cleanup_close; + } + if (fcntl(sockfd, F_SETFL, lock | O_NONBLOCK) < 0) { + cl_perror("fcntl(F_SETFL) call failed"); + goto cleanup_close; + } + + /* trying to connect with timeout */ + if (connect(sockfd, result->ai_addr, result->ai_addrlen) < 0) { + if (errno != EINPROGRESS) { + cl_perror("connect(2) call failed"); + goto cleanup_close; + } + crm_debug_2("EINPROGRESS in connect(). selecting..."); + + timeout.tv_sec = SELECT_TIMEOUT_SEC; + timeout.tv_usec = 0; + FD_ZERO(&writefds); + FD_SET(sockfd, &writefds); + + rc = select(sockfd+1, NULL, &writefds, NULL, &timeout); + if (rc < 0) { + if (errno == EINTR) { + crm_info("select(2) call failed: %s", + g_strerror(errno)); + close(sockfd); + freeaddrinfo(result); + return -3; + } + cl_perror("select(2) call failed"); + goto cleanup_close; + } else if (rc == 0) { + crm_info("failed to connect (timed out)"); + ret = -2; + goto cleanup_close; + } + + /* socket selected for write */ + optlen = sizeof(int); + if (getsockopt(sockfd, SOL_SOCKET, + SO_ERROR, &optval, &optlen) < 0) { + cl_perror("getsockopt(2) call failed"); + goto cleanup_close; + } + if (optval) { + crm_info("failed to connect to %s (port:%s): %s", + node, service, g_strerror(optval)); + ret = -2; + goto cleanup_close; + } + } + + /* set blocking mode */ + if (fcntl(sockfd, F_SETFL, lock) < 0) { + cl_perror("fcntl(F_SETFL) call failed"); + goto cleanup_close; + } + crm_debug("succeed at connect [%s:%s]", node, service); + freeaddrinfo(result); + return sockfd; + + cleanup_close: + close(sockfd); + } + crm_notice("failed to connect to %s (port:%s)", node, service); + freeaddrinfo(result); + return ret; +} + +/* + * create socket and listen for waiting message. + */ +int +listen_to(const char *service) +{ + struct addrinfo hints; + struct addrinfo *result = NULL, *rp; + int sockfd; + int optval; + int rc; + + crm_debug_2("called..."); + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_flags = AI_PASSIVE; + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + if ((rc = getaddrinfo(NULL, service, &hints, &result)) != 0) { + crm_err("getaddrinfo(3) call failed: %s", gai_strerror(rc)); + freeaddrinfo(result); + return -1; + } + sock_arr = g_array_new(FALSE, TRUE, sizeof(int)); + + for (rp = result; rp; rp = rp->ai_next) { + if ((sockfd = socket(rp->ai_family, + rp->ai_socktype, rp->ai_protocol)) < 0) { + cl_perror("socket(2) call failed"); + goto cleanup_free; + } + + if (rp->ai_family == AF_INET6) { + optval = 1; + if (setsockopt(sockfd, IPPROTO_IPV6, + IPV6_V6ONLY, &optval, sizeof(optval)) < 0) { + cl_perror("setsockopt(2) call failed"); + goto cleanup_close; + } + } + if (rp->ai_family == AF_INET || rp->ai_family == AF_INET6) { + optval = 1; + if (setsockopt(sockfd, SOL_SOCKET, + SO_REUSEADDR, &optval, sizeof(optval)) < 0) { + cl_perror("setsockopt(2) call failed"); + goto cleanup_close; + } + } + + if (bind(sockfd, rp->ai_addr, rp->ai_addrlen) < 0) { + cl_perror("bind(2) call failed"); + goto cleanup_close; + } + if (listen(sockfd, SOMAXCONN) < 0) { + cl_perror("listen(2) call failed"); + goto cleanup_close; + } + + /* create source for socket and add to the mainloop */ + g_io_add_watch_full(g_io_channel_unix_new(sockfd), + G_PRIORITY_DEFAULT, G_IO_IN, on_listen, NULL, NULL); + g_array_append_val(sock_arr, sockfd); + continue; + + cleanup_close: + close(sockfd); + cleanup_free: + freeaddrinfo(result); + close_listen_sock(); + return -1; + } + freeaddrinfo(result); + return 0; +} + +/* + * called when there is message to receive. + */ +gboolean +on_listen(GIOChannel *channel, GIOCondition condition, gpointer unused) +{ + int sockfd; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + struct timeval timeout; + + crm_debug_2("called..."); + + if (condition & G_IO_IN) { + /* accept the connection */ + if ((sockfd = accept(g_io_channel_unix_get_fd(channel), + (struct sockaddr*)&addr, &addrlen)) < 0) { + cl_perror("accept(2) call failed"); + return TRUE; + } + connected_from = inet_ntoa(addr.sin_addr); + crm_debug("connected from %s", connected_from); + timeout.tv_sec = SELECT_TIMEOUT_SEC; + timeout.tv_usec = 0; + receive_msg(sockfd, &timeout); + close(sockfd); + } + return TRUE; +} + +void +close_listen_sock(void) +{ + int lpc; + + crm_debug_2("called..."); + + if (sock_arr == NULL) { + return; + } + for (lpc = 0; lpc < sock_arr->len; lpc++) { + close(g_array_index(sock_arr, int, lpc)); + } + return; +} + +/* + * return: + * 0: success + * -1: if system call error occurred + * -2: if message NOT intended was received + * -3: if signal was caught in select() + */ +int +receive_msg(int sockfd, struct timeval *timeout) +{ + fd_set readfds; + hb_msg_t msg; + int rc; + + crm_debug_2("called..."); + + FD_ZERO(&readfds); + FD_SET(sockfd, &readfds); + + rc = select(sockfd+1, &readfds, NULL, NULL, timeout); + if (rc < 0) { + if (errno == EINTR) { + crm_info("select(2) call failed: %s", + g_strerror(errno)); + return -3; + } + cl_perror("select(2) call failed"); + return -1; + } else if (rc == 0) { + crm_warn("failed to receive message (timed out)"); + return -1; + } + + memset(&msg, 0, sizeof(hb_msg_t)); + if (recv(sockfd, &msg, sizeof(hb_msg_t), 0) < 0) { + cl_perror("recv(2) call failed"); + return -1; + } + crm_debug("recv(): hdr[%d:%u:%u]", + msg.hdr.type, msg.hdr.req_id, msg.hdr.req_no); + return handle_msg(&msg, sockfd); +} + +unsigned int +get_next_req_no(void) +{ + static unsigned int no = 0; + + crm_debug_2("called..."); + + if (no >= UINT_MAX) { + no = 0; + } + return ++no; +} + +gchar * +key_file_get_string(GKeyFile *key_file, + const gchar *group_name, const gchar *key, gboolean output_invalid) +{ + gchar *str; + + if (g_key_file_has_key(key_file, group_name, key, NULL) == FALSE) { + if (output_invalid == TRUE) { + crm_warn("config: section [%s] has no key [%s]." + " this section is invalid", group_name, key); + } else { + crm_debug("config: section [%s] has no key [%s]", + group_name, key); + } + return NULL; + } + + str = g_key_file_get_string(key_file, group_name, key, NULL); + if (safe_str_eq(str, "")) { + if (output_invalid == TRUE) { + crm_warn("config: value of [%s] in section [%s] is empty." + " this section is invalid", key, group_name); + } else { + crm_warn("config: value of [%s] in section [%s] is empty", + key, group_name); + } + g_free(str); + return NULL; + } + return str; +} + +int +str_isdigit(const char *str) +{ + int lpc; + + if (str == NULL) { + return -1; + } + if (str[0] == 0) { + return 2; + } + for (lpc = 0; lpc < strlen(str); lpc++) { + if (!isdigit(str[lpc])) { + return 1; + } + } + return 0; +} + +int +str_isport(const char *str) +{ + if (str == NULL) { + return -1; + } + if (str[0] == 0) { + return 2; + } + if (str_isdigit(str) || crm_parse_int(str, "-1") < 1 || + crm_parse_int(str, "-1") > 65535) { + return 1; + } + return 0; +} + +int +calc_interval(int interval, int delay, gboolean retry) +{ + if (retry == TRUE) { + return delay; + } + return (interval > delay) ? interval : delay; +} + +/* + * set_loglevel_err() and set_loglevel_orig() are for -b option. + * use in order of set_loglevel_err() and set_loglevel_orig(). + */ +void +set_loglevel_err(void) +{ + crm_debug_2("called..."); + + if (block_unwanted_log == FALSE) { + return; + } + if ((loglevel_before_block = get_crm_log_level()) <= LOG_ERR) { + return; + } + set_crm_log_level(LOG_ERR); + return; +} + +void +set_loglevel_orig(void) +{ + crm_debug_2("called..."); + + if (block_unwanted_log == FALSE) { + return; + } + if (loglevel_before_block == get_crm_log_level()) { + return; + } + /* revert to original log level */ + set_crm_log_level(loglevel_before_block); + return; +} diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_comm.h heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_comm.h --- heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmon_comm.h 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmon_comm.h 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,99 @@ +/* + * hb_actmonitor: Heartbeat Active-Nodes Monitor. + * + * Copyright (C) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef HB_ACTMON_COMM_H +#define HB_ACTMON_COMM_H + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef HAVE_GETOPT_H +# include +#endif + +#define SELECT_TIMEOUT_SEC 10 + +#define ENV_LOGDAEMO "HA_use_logd" /* ref. lib/clplumbing/cl_log.c */ + +typedef enum hb_msgtype { + T_REQ_ACTCOUNT = 1, /* request count up the ACTs from server */ + T_REP_ACTCOUNT, /* reply the number of ACTs */ + T_REQ_REACHLIMIT, /* request status(ACTs reached the limit?) from client */ + T_REP_REACHLIMIT, /* reply status */ + T_REQ_GETOKTOACT, /* request whether being possible to become ACT from client */ + T_REP_GETOKTOACT /* reply about result of "May I become ACT?" */ + +} hb_msgtype; + +typedef enum actcount_status { + /* status of server side */ + S_NOT_REQUESTING = -1, + S_CONNECT_ERROR = -2, + S_CONNECT_FAILURE = -3, + S_SEND_ERROR = -4, + S_REQUEST_SENT = -5, /* not receiving reply yet */ + /* status of client side */ + S_NOT_IDLE = -6, + S_FAILED_TO_COUNT = -7 + +} actcount_status; + +typedef struct hb_msghdr_s +{ + hb_msgtype type; + guint req_id; + unsigned int req_no; + +} hb_msghdr_t; + +typedef struct hb_msg_s +{ + hb_msghdr_t hdr; + int result; + +} hb_msg_t; + +/* + * prototype declaration. + */ +int connect_to(const char *node, const char *service); +int listen_to(const char *service); +gboolean on_listen(GIOChannel *channel, GIOCondition condition, gpointer unused); +void close_listen_sock(void); +int receive_msg(int sockfd, struct timeval *timeout); +unsigned int get_next_req_no(void); +gchar *key_file_get_string(GKeyFile *key_file, + const gchar *group_name, const gchar *key, gboolean output_invalid); +int str_isdigit(const char *str); +int str_isport(const char *str); +int calc_interval(int interval, int delay, gboolean retry); +void set_loglevel_err(void); +void set_loglevel_orig(void); +#endif /* HB_ACTMON_COMM_H */ diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmonitor.c heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmonitor.c --- heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmonitor.c 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmonitor.c 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,681 @@ +/* + * hb_actmonitor: Heartbeat Active-Nodes Monitor. + * + * Copyright (C) 2009 NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include + +#define OPTARGS "Vp:l:C:DP:e?" +#define DEFAULT_ACT_LIMIT INT_MAX +#define DEFAULT_INTERVAL 60 +#define MIN_INTERVAL 1 +#define DEFAULT_RETRY FALSE +#define RETRY_REQ_DELAY 10 +#define RETRY_REP_DELAY 10 +#define REPLY_TIMEOUT 30 +#define DEFAULT_CONFFILE "/etc/hb_actmonitor.conf" +#define DEFAULT_PIDFILE "/var/run/hb_actmonitor.pid" + +extern unsigned int loglevel_before_block; +extern char *connected_from; + +GMainLoop *mainloop = NULL; +GList *client_info = NULL; +guint timer_id = 0; + +typedef struct client_info_s +{ + char *node; + char *port; + unsigned int id; + actcount_status status; + int actcount; + +} client_info_t; + +typedef struct monitor_status_s +{ + int act_limit; + int act_num; + int interval; + gboolean retry; + unsigned int req_no; + gboolean count_up; + +} monitor_status_t; + +monitor_status_t mon_stat; + +/* + * prototype declaration + */ +gboolean do_reconfig(int nsig, gpointer user_data); +gboolean do_shutdown(int nsig, gpointer unused); +void usage(const char *cmd, int exit_status); +int parse_config(const char *file); +void free_client_info(gpointer data, gpointer unused); +int send_req_actcount(client_info_t *client); +gboolean do_actcount(gpointer unused); +gboolean timeout_rep_actcount(gpointer unused); +int handle_msg(hb_msg_t *rcv, int sockfd); +int handle_rep_actcount(hb_msg_t *rcv); +int handle_req_reachlimit(hb_msg_t *rcv, int sockfd); +int handle_req_getoktoact(hb_msg_t *rcv, int sockfd); + + +gboolean +do_reconfig(int nsig, gpointer user_data) +{ + crm_debug_2("called..."); + crm_info("received signal %d, re-read config %s", nsig, (char*)user_data); + + if (client_info != NULL) { + g_list_foreach(client_info, free_client_info, NULL); + g_list_free(client_info); + client_info = NULL; + } + + if (parse_config(user_data) < 0) { + do_shutdown(0, NULL); + crm_info("exiting %s", crm_system_name); + exit(1); + } + do_actcount(NULL); + return TRUE; +} + +gboolean +do_shutdown(int nsig, gpointer unused) +{ + crm_debug_2("called..."); + crm_info("received signal %d, do shutdown...", nsig); + + close_listen_sock(); + + if (mainloop != NULL && g_main_loop_is_running(mainloop)) { + g_main_loop_quit(mainloop); + } else { + crm_info("Exiting %s", crm_system_name); + exit(0); + } + return FALSE; +} + +void +usage(const char *cmd, int exit_status) +{ + FILE *stream; + + stream = exit_status ? stderr : stdout; + + fprintf(stream, "usage: %s [-%s]\n", cmd, OPTARGS); + fprintf(stream, "\t--%s (-%c)\t\t\t: This text\n", "help", '?'); + fprintf(stream, "\t--%s (-%c)\t\t\t: Increase the debug output\n", + "verbose", 'V'); + fprintf(stream, "\t--%s (-%c) \t: Port number (1-65535) of %s\n" + "\t\t\t\t\t * Required option\n", "server-port", 'p', cmd); + fprintf(stream, "\t--%s (-%c) \t\t: Upper limit the ACTs\n" + "\t\t\t\t\t * Default=%d\n", "limit", 'l', DEFAULT_ACT_LIMIT); + fprintf(stream, "\t--%s (-%c) \t: Configuration file location\n" + "\t\t\t\t\t * Default=%s\n", "config-file", 'C', DEFAULT_CONFFILE); + fprintf(stream, "\t--%s (-%c)\t\t: Run in the background as a daemon\n", + "daemonize", 'D'); + fprintf(stream, "\t--%s (-%c) \t: Daemon pid file location\n", + "pid-file", 'P'); + fprintf(stream, "\t--%s (-%c)\t\t: Enable to output log to stderr\n", + "enable-stderr", 'e'); + + fflush(stream); + exit(exit_status); +} + +int +parse_config(const char *file) +{ + GKeyFile *kf = g_key_file_new(); + gchar **grp = NULL; + gsize length; + client_info_t *client; + gchar *node, *port, *val; + const char *key; + gboolean has_server_section = FALSE; + unsigned int client_no = 0; + int lpc; + + crm_debug_2("called..."); + + mon_stat.act_num = mon_stat.act_limit; + mon_stat.interval = DEFAULT_INTERVAL; + mon_stat.retry = DEFAULT_RETRY; + mon_stat.count_up = FALSE; + + if ((g_key_file_load_from_file(kf, file, G_KEY_FILE_NONE, NULL)) == FALSE) { + if (g_path_is_absolute(file) == TRUE) { + crm_err("read config-file failed. [%s]", file); + } else { + gchar *curr_dir = g_get_current_dir(); + crm_err("read config-file failed. [%s/%s]", curr_dir, file); + g_free(curr_dir); + } + return -1; + } + grp = g_key_file_get_groups(kf, &length); + + for (lpc = 0; lpc < length; lpc++) { + if (safe_str_eq(grp[lpc], "Server")) { + key = "interval"; + if ((val = key_file_get_string(kf, grp[lpc], key, FALSE))) { + if (!str_isdigit(val) && + crm_parse_int(val, "-1") >= MIN_INTERVAL && + crm_parse_int(val, "-1") <= INT_MAX) { + mon_stat.interval = crm_parse_int(val, "-1"); + } else { + crm_warn("invalid %s [%s] specified", key, val); + } + g_free(val); + } + crm_info("[%s]: %s [%d]", grp[lpc], key, mon_stat.interval); + + key = "retry"; + if ((val = key_file_get_string(kf, grp[lpc], key, FALSE))) { + mon_stat.retry = crm_is_true(val); + g_free(val); + } + crm_info("[%s]: %s [%s]", + grp[lpc], key, (mon_stat.retry) ? "yes" : "no"); + has_server_section = TRUE; + } else { + if (!(node = key_file_get_string(kf, grp[lpc], "node", TRUE))) { + continue; + } + if (!(port = key_file_get_string(kf, grp[lpc], "client_port", TRUE))) { + continue; + } else if (str_isport(port)) { + crm_warn("invalid client_port [%s] specified." + " section [%s] is invalid", port, grp[lpc]); + g_free(port); port = NULL; + continue; + } + client = g_malloc0(sizeof(client_info_t)); + client->node = node; + client->port = port; + client->id = ++client_no; + client_info = g_list_append(client_info, client); + crm_info("[%s]: node[%s], client_port[%s]", grp[lpc], node, port); + } + } + g_strfreev(grp); + g_key_file_free(kf); + + if (has_server_section == FALSE) { + crm_info("config-file has no [Server] section. interval[%d], retry[%s]", + mon_stat.interval, (mon_stat.retry) ? "yes" : "no"); + } + if (g_list_length(client_info) == 0) { + crm_warn("config-file has no valid client's setting"); + mon_stat.act_num = 0; + return 1; + } + return 0; +} + +void +free_client_info(gpointer data, gpointer unused) +{ + crm_debug_2("called..."); + + if (data != NULL) { + client_info_t *client = (client_info_t*)data; + g_free(client->node); + g_free(client->port); + g_free(client); + } + return; +} + +/* + * send request(T_REQ_ACTCOUNT) to client. + */ +int +send_req_actcount(client_info_t *client) +{ + hb_msg_t msg; + int sockfd; + + crm_debug_2("called..."); + crm_debug("client [%s:%s]", client->node, client->port); + + if ((sockfd = connect_to(client->node, client->port)) < 0) { + switch (sockfd) { + case -2: + crm_notice("failed to connect to %s", client->node); + client->status = S_CONNECT_FAILURE; + break; + case -3: + crm_info("failed to connect to %s", client->node); + client->status = S_CONNECT_ERROR; + break; + default: + crm_warn("failed to connect to %s", client->node); + client->status = S_CONNECT_ERROR; + break; + } + return -1; + } + memset(&msg, 0, sizeof(hb_msg_t)); + msg.hdr.type = T_REQ_ACTCOUNT; + msg.hdr.req_id = client->id; + msg.hdr.req_no = mon_stat.req_no; + crm_debug("send(): hdr[%d:%u:%u]", + msg.hdr.type, msg.hdr.req_id, msg.hdr.req_no); + if (send(sockfd, &msg, sizeof(hb_msg_t), MSG_DONTWAIT | MSG_NOSIGNAL) < 0) { + cl_perror("send(2) call failed"); + client->status = S_SEND_ERROR; + close(sockfd); + return -1; + } + client->status = S_REQUEST_SENT; + close(sockfd); + return 0; +} + +/* + * send request(T_REQ_ACTCOUNT) to all clients. + * (call send_req_actcount() to all clients.) + */ +gboolean +do_actcount(gpointer unused) +{ + client_info_t *client = NULL; + GList *l; + + crm_debug_2("called..."); + crm_debug("mon_stat.act_num [%d]", mon_stat.act_num); + crm_info("request to count up the ACTs"); + + if (timer_id) { + Gmain_timeout_remove(timer_id); + } + if (g_list_length(client_info) == 0) { + crm_warn("client's setting for monitor is not specified as config-file"); + return TRUE; + } + + for (l = g_list_first(client_info); l; l = g_list_next(l)) { + client = l->data; + client->status = S_NOT_REQUESTING; + } + mon_stat.req_no = get_next_req_no(); + + for (l = g_list_first(client_info); l; l = g_list_next(l)) { + client = l->data; + if (send_req_actcount(client) < 0) { + if (client->status == S_CONNECT_FAILURE) { + /* consider that HB service has stopped. */ + client->actcount = 0; + } else { + mon_stat.count_up = FALSE; + break; + } + } else { + mon_stat.count_up = TRUE; + } + } + if (mon_stat.count_up == FALSE) { + int interval = calc_interval(mon_stat.interval, + RETRY_REQ_DELAY, mon_stat.retry); + crm_debug("timeout_add [do_actcount] %d sec", interval); + timer_id = Gmain_timeout_add(interval*1000, do_actcount, NULL); + + if (client->status == S_CONNECT_FAILURE && mon_stat.act_num > 0) { + crm_info("consider that all HB services have stopped," + " ACTs is set to 0"); + mon_stat.act_num = 0; + } + return TRUE; + } + crm_debug("timeout_add [timeout_rep_actcount] %d sec", REPLY_TIMEOUT); + timer_id = Gmain_timeout_add(REPLY_TIMEOUT*1000, timeout_rep_actcount, NULL); + return TRUE; +} + +gboolean +timeout_rep_actcount(gpointer unused) +{ + int interval = (mon_stat.retry == TRUE) ? 0 : mon_stat.interval; + + crm_debug_2("called..."); + + crm_warn("failed to receive reply message (timed out)"); + mon_stat.count_up = FALSE; + if (timer_id) { + Gmain_timeout_remove(timer_id); + } + crm_debug("timeout_add [do_actcount] %d sec", interval); + timer_id = Gmain_timeout_add(interval*1000, do_actcount, NULL); + return TRUE; +} + +int +handle_msg(hb_msg_t *rcv, int sockfd) +{ + crm_debug_2("called..."); + + switch (rcv->hdr.type) { + case T_REP_ACTCOUNT: + return handle_rep_actcount(rcv); + case T_REQ_REACHLIMIT: + return handle_req_reachlimit(rcv, sockfd); + case T_REQ_GETOKTOACT: + return handle_req_getoktoact(rcv, sockfd); + default: + crm_warn("the message NOT intended was received from %s", + connected_from); + crm_info("NOT intended message: msgtype[%d], req_id[%u], req_no[%u]", + rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no); + break; + } + return -2; +} + +/* + * called when reply(T_REP_ACTCOUNT) from client is received. + * count up the number of ACTs of all clients. + */ +int +handle_rep_actcount(hb_msg_t *rcv) +{ + int interval = calc_interval(mon_stat.interval, + RETRY_REP_DELAY, mon_stat.retry); + client_info_t *client; + GList *l; + + crm_debug_2("called..."); + crm_debug("hdr[%d:%u:%u], result[%d]. count_up[%d]", + rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no, + rcv->result, mon_stat.count_up); + + if (mon_stat.count_up == FALSE) { + return 0; + } + + for (l = g_list_first(client_info); l; l = g_list_next(l)) { + client = l->data; + if (client->id != rcv->hdr.req_id || + mon_stat.req_no != rcv->hdr.req_no) { + continue; + } + client->status = rcv->result; + if (rcv->result >= 0) { + crm_info("ACTs at %s is %d", client->node, rcv->result); + client->actcount = rcv->result; + goto replied_check; + } + if (rcv->result == S_NOT_IDLE) { + crm_info("status of [%s] is not S_IDLE", client->node); + } else { + crm_info("failed to count up the ACTs at %s", client->node); + } + goto timeout_add; + } + return 0; + +replied_check: + for (l = g_list_first(client_info); l; l = g_list_next(l)) { + client = l->data; + if (client->status < 0 && client->status != S_CONNECT_FAILURE) { + return 0; + } + } + + mon_stat.act_num = 0; + for (l = g_list_first(client_info); l; l = g_list_next(l)) { + client = l->data; + mon_stat.act_num += client->actcount; + } + crm_debug("ACTs: %d (req_no: %u)", mon_stat.act_num, mon_stat.req_no); + crm_info("count up the ACTs was completed. ACTs is %d", mon_stat.act_num); + + if (mon_stat.act_limit < mon_stat.act_num) { + crm_warn("ACTs (%d) has exceeded the upper limit", mon_stat.act_num); + } + interval = mon_stat.interval; + +timeout_add: + mon_stat.count_up = FALSE; + if (timer_id) { + Gmain_timeout_remove(timer_id); + } + crm_debug("timeout_add [do_actcount] %d sec", interval); + timer_id = Gmain_timeout_add(interval*1000, do_actcount, NULL); + return 0; +} + +/* + * called when request(T_REQ_REACHLIMIT) from client is received. + * send reply(T_REP_REACHLIMIT) with + * TRUE (ACTs has reached the limit) or FALSE (ACTs has NOT reached the limit). + */ +int +handle_req_reachlimit(hb_msg_t *rcv, int sockfd) +{ + hb_msg_t msg; + + crm_debug_2("called..."); + crm_debug("hdr[%d:%u:%u]", rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no); + + memset(&msg, 0, sizeof(hb_msg_t)); + msg.hdr.type = T_REP_REACHLIMIT; + msg.hdr.req_id = rcv->hdr.req_id; + msg.hdr.req_no = rcv->hdr.req_no; + msg.result = (mon_stat.act_limit <= mon_stat.act_num) ? TRUE : FALSE; + crm_debug("send(): hdr[%d:%u:%u], result[%d]", + msg.hdr.type, msg.hdr.req_id, msg.hdr.req_no, msg.result); + if (send(sockfd, &msg, sizeof(hb_msg_t), MSG_DONTWAIT | MSG_NOSIGNAL) < 0) { + cl_perror("send(2) call failed"); + return -1; + } + return 0; +} + +/* + * called when request(T_REQ_GETOKTOACT) from client is received. + * send reply(T_REP_GETOKTOACT) with + * TRUE (ACTs has NOT reached the limit) or FALSE (ACTs has reached the limit). + * when ACTs has NOT reached the limit, ACTs is incremented by 1. + */ +int +handle_req_getoktoact(hb_msg_t *rcv, int sockfd) +{ + hb_msg_t msg; + + crm_debug_2("called..."); + crm_debug("hdr[%d:%u:%u]", rcv->hdr.type, rcv->hdr.req_id, rcv->hdr.req_no); + + memset(&msg, 0, sizeof(hb_msg_t)); + msg.hdr.type = T_REP_GETOKTOACT; + msg.hdr.req_id = rcv->hdr.req_id; + msg.hdr.req_no = rcv->hdr.req_no; + msg.result = (mon_stat.act_limit > mon_stat.act_num) ? TRUE : FALSE; + crm_debug("send(): hdr[%d:%u:%u], result[%d]", + msg.hdr.type, msg.hdr.req_id, msg.hdr.req_no, msg.result); + if (send(sockfd, &msg, sizeof(hb_msg_t), MSG_DONTWAIT | MSG_NOSIGNAL) < 0) { + cl_perror("send(2) call failed"); + return -1; + } + + if (mon_stat.act_limit > mon_stat.act_num) { + mon_stat.act_num++; + crm_info("requested increase the ACTs. replied OK, ACTs is %d", + mon_stat.act_num); + if (mon_stat.count_up == TRUE) { + int interval = calc_interval(mon_stat.interval, + RETRY_REP_DELAY, mon_stat.retry); + if (timer_id) { + Gmain_timeout_remove(timer_id); + } + crm_debug("timeout_add [do_actcount] %d sec", interval); + timer_id = Gmain_timeout_add(interval*1000, do_actcount, NULL); + mon_stat.count_up = FALSE; + } + } else { + crm_info("requested increase the ACTs. but ACTs (%d) has" + " reached upper limit, replied NG", mon_stat.act_num); + } + return 0; +} + +int +main(int argc, char **argv) +{ + int argerr = 0, flag; + char *port = NULL; + char *config_file = g_strdup(DEFAULT_CONFFILE); + gboolean daemonize = FALSE; + char *pid_file = g_strdup(DEFAULT_PIDFILE); + int val; +#ifdef HAVE_GETOPT_H + int opt_idx = 0; + static struct option long_opts[] = { + {"verbose", 0, 0, 'V'}, + {"server-port", 1, 0, 'p'}, + {"limit", 1, 0, 'l'}, + {"config-file", 1, 0, 'C'}, + {"daemonize", 0, 0, 'D'}, + {"pid-file", 1, 0, 'P'}, + {"enable-stderr", 0, 0, 'e'}, + {"help", 0, 0, '?'}, + {0, 0, 0, 0} + }; +#endif + + /* ref. cl_inherit_logging_environment() in lib/clplumbing/cl_log.c */ + setenv(ENV_LOGDAEMO, "true", 0); + crm_log_init(basename(argv[0]), loglevel_before_block, TRUE, FALSE, argc, argv); + + G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGINT, do_shutdown, NULL, NULL); + G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGTERM, do_shutdown, NULL, NULL); + + memset(&mon_stat, 0, sizeof(monitor_status_t)); + mon_stat.act_limit = DEFAULT_ACT_LIMIT; + + while (1) { +#ifdef HAVE_GETOPT_H + flag = getopt_long(argc, argv, OPTARGS, long_opts, &opt_idx); +#else + flag = getopt(argc, argv, OPTARGS); +#endif + if (flag == -1) { + break; + } + + switch (flag) { + case 'V': + alter_debug(DEBUG_INC); + break; + case 'p': + if (str_isport(optarg)) { + crm_err("invalid server-port [%s] specified", optarg); + argerr++; + } else { + g_free(port); + port = g_strdup(optarg); + } + break; + case 'l': + val = crm_parse_int(optarg, "-1"); + if (str_isdigit(optarg) || val < 0 || val > INT_MAX) { + crm_err("invalid limit [%s] specified", optarg); + argerr++; + } else { + mon_stat.act_limit = val; + } + break; + case 'C': + if (safe_str_eq(optarg, "")) { + crm_err("invalid config-file [%s] specified", optarg); + argerr++; + } else { + g_free(config_file); + config_file = g_strdup(optarg); + } + break; + case 'D': + daemonize = TRUE; + break; + case 'P': + if (safe_str_eq(optarg, "")) { + crm_err("invalid pid-file [%s] specified", optarg); + argerr++; + } else { + g_free(pid_file); + pid_file = g_strdup(optarg); + } + break; + case 'e': + cl_log_enable_stderr(TRUE); + break; + case '?': + usage(crm_system_name, 0); + break; + default: + crm_err("Argument code 0%o (%c) is not (?yet?) supported", + flag, flag); + argerr++; + break; + } + } + + if (optind < argc) { + crm_err("non-option ARGV-elements:"); + while (optind < argc) { + crm_err("%s", argv[optind++]); + } + argerr++; + } + if (argerr || port == NULL) { + usage(crm_system_name, 1); + } + crm_info("config: server-port [%s]", port); + crm_info("config: limit [%d]", mon_stat.act_limit); + crm_info("config: config-file [%s]", config_file); + crm_info("config: daemonize [%s]", (daemonize) ? "yes" : "no"); + crm_info("config: pid-file [%s]", pid_file); + + crm_make_daemon(crm_system_name, daemonize, pid_file); + + if (parse_config(config_file) < 0) { + crm_info("exiting %s", crm_system_name); + return 1; + } + + /* create socket for waiting message from client's processes */ + if (listen_to(port) < 0) { + crm_info("exiting %s", crm_system_name); + return 1; + } + G_main_add_SignalHandler(G_PRIORITY_HIGH, SIGHUP, do_reconfig, config_file, NULL); + + crm_info("Starting %s", crm_system_name); + do_actcount(NULL); + + mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(mainloop); + + crm_info("Exiting %s", crm_system_name); + return 0; +} diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmonitor.conf.sample heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmonitor.conf.sample --- heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmonitor.conf.sample 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmonitor.conf.sample 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,22 @@ +# +# hb_actmonitor.conf : config file of hb-actmonitor +# + +### +# For hb_actmonitor. +### +[Server] +#interval = 60 +#retry = FALSE + +### +# For connection with hb_actmon_client (client process). +# - section name should be unique. +# - `node' and `client_port' are required items. +# - `node' specifies either a numerical network address or a network hostname. +### +# ex.) +# [domU-a1] +# node = 192.168.201.181 +# client_port = 1234 +# diff -urN heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmonitor.spec.in heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmonitor.spec.in --- heartbeat-2.1.4-1/tools/hb_actmonitor/hb_actmonitor.spec.in 1970-01-01 09:00:00.000000000 +0900 +++ heartbeat-2.1.4-1.mod/tools/hb_actmonitor/hb_actmonitor.spec.in 2010-05-11 11:49:28.000000000 +0900 @@ -0,0 +1,94 @@ +######################################## +# Derived definitions +######################################## +%define name hb-actmonitor +%define version 1.00 +%define release 1.el5 +%define prefix @libdir@/@PACKAGE@/ +%define ORGARCH heartbeat-2.1.4-1 +# +# +# +Summary: Heartbeat Active-Node Monitor. +Name: %{name} +Version: %{version} +Release: %{release} +Group: Applications +Source: %{ORGARCH}.tar.gz +License: GPL/LGPL +Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION +BuildRoot: %{_tmppath}/%{name}-%{version} +BuildRequires: autoconf, automake libtool +Requires: heartbeat = 2.1.4-1 + +######################################## +%description +######################################## +Monitor the number of nodes which the specific resource has started, +update attribute-value of CIB so that it may not exceed upper limit. + +######################################## +%prep +######################################## +rm -rf $RPM_BUILD_ROOT +%setup -q -n %{ORGARCH} +pushd $RPM_BUILD_DIR/%{ORGARCH} +./ConfigureMe bootstrap +popd + +######################################## +%build +######################################## +pushd $RPM_BUILD_DIR/%{ORGARCH}/replace +make DESTDIR=$RPM_BUILD_ROOT +popd +pushd $RPM_BUILD_DIR/%{ORGARCH}/libltdl +make DESTDIR=$RPM_BUILD_ROOT +popd +pushd $RPM_BUILD_DIR/%{ORGARCH}/lib +make DESTDIR=$RPM_BUILD_ROOT +popd +pushd $RPM_BUILD_DIR/%{ORGARCH}/tools/hb_actmonitor +make DESTDIR=$RPM_BUILD_ROOT +popd + +######################################## +%install +######################################## +pushd $RPM_BUILD_DIR/%{ORGARCH}/tools/hb_actmonitor +make DESTDIR=$RPM_BUILD_ROOT install +popd + +######################################## +%clean +######################################## +if + [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] +then + rm -rf $RPM_BUILD_ROOT +fi +rm -rf $RPM_BUILD_DIR/%{ORGARCH} + +######################################## +%post +######################################## +true +######################################## +%preun +######################################## +true +######################################## +%postun +######################################## +true +######################################## +%files +######################################## +%defattr(-,root,root) +%dir %{prefix}/%{name} +%{prefix}/%{name}/hb_actmonitor +%{prefix}/%{name}/hb_actmon_client +%{prefix}/%{name}/hb_actmonitor.conf.sample +%{prefix}/%{name}/hb_actmon_client.conf.sample +%defattr(755,root,root) +@OCF_RA_DIR@/heartbeat/actcheck