#!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover # Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication # # Copyright: 2006-2010 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local config local param_name param_name=$1 #Check that config file exists if [ -n "$OCF_RESKEY_config" ]; then config=$OCF_RESKEY_config else config=$OCF_RESKEY_pgdata/postgresql.conf fi check_config "$config" [ $? -eq 0 ] || return perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_config_default="" OCF_RESKEY_start_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null OCF_RESKEY_stop_escalate_default=30 OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select now();" # Defaults for replication OCF_RESKEY_rep_mode_default=none OCF_RESKEY_node_list_default="" OCF_RESKEY_restore_command_default="" OCF_RESKEY_master_ip_default="" OCF_RESKEY_repuser_default="postgres" OCF_RESKEY_stop_on_demote_default="no" OCF_RESKEY_primary_conninfo_opt_default="" OCF_RESKEY_tmpdir_default="/var/lib/pgsql" OCF_RESKEY_pgctldata_default=/usr/bin/pg_controldata OCF_RESKEY_xlog_check_count_default="3" OCF_RESKEY_crm_attr_timeout_default="5" : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} # for replication : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} : ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}} : ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}} : ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}} : ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}} : ${OCF_RESKEY_stop_on_demote=${OCF_RESKEY_stop_on_demote_default}} : ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}} : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} : ${OCF_RESKEY_pgctldata=${OCF_RESKEY_pgctldata_default}} : ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}} : ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport PostgreSQL user that pgsql RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql Path to the PostgreSQL configuration file for the instance Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile Unix socket directory for PostgeSQL socketdir Number of shutdown retries (using -m fast) before resorting to -m immediate stop escalation Replication mode(none(default)/async/sync). "async" and "sync" require PostgreSQL 9.1 or later. If you use async or sync, it requires node_list, master_ip, restore_command parameters, and needs setting postgresql.conf, pg_hba.conf up for replication. rep_mode All node names. Please separate each node name with a space. This is required for replication. node list restore_command for recovery.conf. This is required for replication. restore_command Master's floating IP address to be connected from hot standby. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. master ip User used to connect to the master server. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. repuser Whether or not to stop PostgreSQL with instead of restarting it on demote, to speed up failover (yes/no(default)). If this parameter is yes, monitor fails after demote. This is optional for replication. stop_on_demote primary_conninfo options of recovery.conf except host, port, user and application_name. This is optional for replication. primary_conninfo_opt Path to temporary directory. This is optional for replication. tmpdir Path to pg_controldata command. This is optional for replication. pgctldata Number of checking xlog on monitor before promote. This is optional for replication. xlog check count The timeout of crm_attribute forever update command. Default value is 5 seconds. This is optional for replication. The timeout of crm_attribute forever update command. EOF } # # Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; "warn"|"err") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # methods: What methods/operations do we support? # pgsql_methods() { cat </dev/null 2>&1" return $? fi # No PID file false } # # pgsql_real_monitor # pgsql_real_monitor() { local loglevel local rc local output # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log info "PostgreSQL is down" return $OCF_NOT_RUNNING fi if ! is_replication; then OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options -c '$OCF_RESKEY_monitor_sql'" rc=$? else #Check replication state output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $OCF_RESKEY_psql $psql_options -Atc \"${CHECK_MS_SQL}\""` rc=$? fi if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel return $OCF_ERR_GENERIC fi if is_replication; then case "$output" in f) ocf_log debug "PostgreSQL is running as a primary." return $OCF_RUNNING_MASTER;; t) ocf_log debug "PostgreSQL is running as a hot standby." return $OCF_SUCCESS;; *) ocf_log err "$CHECK_MS_SQL output is $output" return $OCF_ERR_GENERIC;; esac fi return $OCF_SUCCESS } #pgsql_monitor: pgsql_real_monitor() wrapper for replication pgsql_monitor() { local rc local rsc local instance local my_status local data_status local is_master="" pgsql_real_monitor rc=$? if ! is_replication; then return $rc fi if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then return $rc fi # If I am Master if [ $rc -eq $OCF_RUNNING_MASTER ]; then change_data_status "$HOSTNAME" "LATEST" change_pgsql_status "$HOSTNAME" "PRI" control_slave_status || return $OCF_ERR_GENERIC return $rc fi # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, # so I will get master node name using crm_mon -n if output=`crm_mon -n1 | grep " Master"`; then rsc=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if echo "$output" | grep "${rsc}:${instance}"; then is_master="yes" break fi instance=`expr $instance + 1` done fi if [ ! -n "$is_master" ]; then # If I am Slave and Master is not exist ocf_log info "Master does not exist." change_pgsql_status "$HOSTNAME" "HS:alone" is_master_right if [ $? -eq 0 ]; then rm -f ${XLOG_NOTE_FILE}.* fi else output=`$CRM_ATTR_FOREVER -N "$HOSTNAME" -n "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$output" = "DISCONNECT" ]; then change_pgsql_status "$HOSTNAME" "HS:alone" fi fi return $rc } #pgsql_post_demote: start PostgreSQL to transit from master to slave pgsql_post_demote() { local my_fail_count DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1` ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE" # I start pgsql on post_demote() # because if it's failed starting pgsql on demote, node is fenced. if [ "$DEMOTE_NODE" = "$HOSTNAME" ]; then if [ "$OCF_RESKEY_stop_on_demote" != "no" ]; then ocf_log info "I don't start PostgreSQL on post-demote because of stop_on_demote=$OCF_RESKEY_stop_on_demote." return $OCF_SUCCESS fi my_fail_count=`$CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -N $HOSTNAME -G -Q | sed "s/INFINITY/1000000/g"` if [ "$my_fail_count" != "0" ]; then ocf_log info "I don't start PostgreSQL on post-demote because of my fail-count=$my_fail_count." return $OCF_SUCCESS fi if [ -f $PGSQL_LOCK ]; then ocf_log err "My data may be inconsistent. You have to remove $PGSQL_LOCK file to force to start." return $OCF_ERR_GENERIC fi is_slave_right || return $OCF_ERR_GENERIC pgsql_real_start if [ $? -ne $OCF_SUCCESS ]; then ocf_log err "Can't start PostgreSQL as a hot standby on post-demote." return $OCF_ERR_GENERIC fi else show_master_baseline fi change_pgsql_status "$HOSTNAME" "HS:alone" return $OCF_SUCCESS } pgsql_pre_promote() { local master_baseline local my_master_baseline local cmp_location local number_of_nodes # If my data is newer than new master's one, I fail my resource. PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | sed "s/ /\n/g" | head -1` number_of_nodes=`echo $OCF_RESKEY_node_list | sed -e "s/ */ /g" | sed -e "s/^ \| $//g" | sed -e "s/ /\n/g" | wc -l` if [ $number_of_nodes -ge 3 -a "$OCF_RESKEY_rep_mode" = "sync" -a "$PROMOTE_NODE" != "$HOSTNAME" ]; then master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` if [ $? -eq 0 ]; then master_baseline=`echo $master_baseline | cut -d ":" -f 2` my_master_baseline=`$CRM_ATTR_REBOOT -N "$HOSTNAME" -n "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null | cut -d ":" -f 2` # get older location cmp_location=`echo -e "$master_baseline\n$my_master_baseline" | sort | head -1` if [ "$cmp_location" != "$my_master_baseline" ]; then ocf_log err "My data is newer than new master's one. New master's location : $master_baseline" $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $HOSTNAME -v INFINITY return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } pgsql_notify() { local type="${OCF_RESKEY_CRM_meta_notify_type}" local op="${OCF_RESKEY_CRM_meta_notify_operation}" local rc ocf_log debug "notify: ${type} for ${op}" case $type in pre) case $op in promote) pgsql_pre_promote return $? ;; esac ;; post) case $op in promote) delete_xlog_location PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | sed "s/ /\n/g" | head -1` if [ "$PROMOTE_NODE" != "$HOSTNAME" ]; then delete_master_baseline fi return $OCF_SUCCESS ;; demote) pgsql_post_demote return $? ;; start|stop) if [ "$HOSTNAME " = "$OCF_RESKEY_CRM_meta_notify_master_uname" ]; then control_slave_status fi return $OCF_SUCCESS ;; esac ;; esac return $OCF_SUCCESS } control_slave_status() { local rc local data_status local target local all_data_status local tmp_data_status local node_name local number_of_nodes all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $OCF_RESKEY_psql $psql_options -Atc \"${CHECK_REPLICATION_STATE_SQL}\""` rc=$? if [ $rc -eq 0 ]; then if [ -n "$all_data_status" ]; then all_data_status=`echo $all_data_status | sed "s/\n/ /g"` fi else report_psql_error $rc warn return 1 fi number_of_nodes=`echo $OCF_RESKEY_node_list | sed -e "s/ */ /g" | sed -e "s/^ \| $//g" | sed -e "s/ /\n/g" | wc -l` for target in $OCF_RESKEY_node_list; do if [ "$target" = "$HOSTNAME" ]; then continue fi data_status="DISCONNECT" if [ -n "$all_data_status" ]; then for tmp_data_status in $all_data_status; do node_name=`echo $tmp_data_status | cut -d "|" -f 1` state=`echo $tmp_data_status | cut -d "|" -f 2` sync_state=`echo $tmp_data_status | cut -d "|" -f 3` ocf_log debug "node=$node_name, state=$state, sync_state=$sync_state" if [ "$node_name" = "$target" ];then data_status="$state|$sync_state" break fi done fi case "$data_status" in "STREAMING|SYNC") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_PROMOTE" change_pgsql_status "$target" "HS:sync" ;; "STREAMING|ASYNC") change_data_status "$target" "$data_status" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then change_master_score "$target" "$CAN_NOT_PROMOTE" if ! is_sync_mode "$target"; then set_sync_mode "$target" fi else if [ $number_of_nodes -le 2 ]; then change_master_score "$target" "$CAN_PROMOTE" else # I can't determine which slave's data is newest in async mode. change_master_score "$target" "$CAN_NOT_PROMOTE" fi fi change_pgsql_status "$target" "HS:async" ;; "STREAMING|POTENTIAL") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" change_pgsql_status "$target" "HS:potential" ;; "DISCONNECT") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ] && is_sync_mode "$target"; then set_async_mode "$target" fi ;; *) change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ] && is_sync_mode "$target"; then set_async_mode "$target" fi change_pgsql_status "$target" "HS:connected" ;; esac done return 0 } # This function is designed to be called by start and post-demote. is_slave_right() { local output local my_timelineid local my_checkpoint local newest_timelineid local newest_location local tmp_timelineid local tmp_location local cmp_location local node_list if [ "$OPERATION" = "start" ]; then if [ x != x$OCF_RESKEY_CRM_meta_notify_master_uname ]; then node_list=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | sed "s/ /\n/g" | head -1` else return 0 fi elif [ "$OPERATION" = "notify" -a "$OCF_RESKEY_CRM_meta_notify_type" = "post" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "demote" ]; then node_list=$OCF_RESKEY_CRM_meta_notify_slave_uname else return 1 fi my_timelineid=`get_my_timeline_id` || return 1 my_checkpoint=`get_my_checkpoint` || return 1 ocf_log info "My Timeline ID and Checkpoint : $my_timelineid:$my_checkpoint" newest_timelineid="0" newest_location="0000000000000000" for node in ${node_list}; do while : do output=`$CRM_ATTR_REBOOT -N "$node" -n "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` if [ $? -ne 0 ]; then ocf_log warn "Can't get $node master baseline. Waiting..." sleep 1 else tmp_timelineid=`echo $output | cut -d ":" -f 1` tmp_location=`echo $output | cut -d ":" -f 2` ocf_log info "$node master baseline : $tmp_timelineid:$tmp_location" # get newer location cmp_location=`echo -e "$tmp_location\n$newest_location" | sort -r | head -1` if [ "$cmp_location" = "$tmp_location" ]; then newest_location=$tmp_location if [ "$tmp_timelineid" -ge "$newest_timelineid" ]; then newest_timelineid=$tmp_timelineid fi fi break fi done done if [ "$my_timelineid" -gt "$newest_timelineid" ]; then ocf_log info "My Timeline ID is newer than Master's baseline." return 0 fi if [ "$OCF_RESKEY_stop_on_demote" = "no" -a "$my_timelineid" -lt "$newest_timelineid" ]; then ocf_log err "My data may be inconsistent because my Timeline ID is too old." return 1 fi # get older location cmp_location=`echo -e "$newest_location\n$my_checkpoint" | sort | head -1` if [ "$my_checkpoint" = "$cmp_location" ]; then return 0 fi ocf_log err "My data is inconsistent." return 1 } is_master_right() { local old local new local output local data_status local node local mylocation local count local newestXlog local oldfile local newfile ocf_log debug "Checking if master is right." data_status=`$CRM_ATTR_FOREVER -N "$HOSTNAME" -n "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi else if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a "$data_status" != "STREAMING|ASYNC" -a "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi fi ocf_log info "My data status=$data_status." show_xlog_location if [ $? -ne 0 ]; then ocf_log err "Failed to show my xlog location." exit $OCF_ERR_GENERIC fi old=0 for count in `seq $OCF_RESKEY_xlog_check_count`; do if [ -f ${XLOG_NOTE_FILE}.$count ]; then old=$count continue fi break done new=`expr $old + 1` # get xlog locations of all nodes for node in ${OCF_RESKEY_node_list}; do output=`$CRM_ATTR_REBOOT -N "$node" -n "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null` if [ $? -ne 0 ]; then ocf_log warn "Can't get $node xlog location." continue else ocf_log info "$node xlog location : $output" echo "$node $output" >> ${XLOG_NOTE_FILE}.${new} if [ "$node" = "$HOSTNAME" ]; then mylocation=$output fi fi done oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null` newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null` if [ "$oldfile" != "$newfile" ]; then # reset counter rm -f ${XLOG_NOTE_FILE}.* echo -e "$newfile" > ${XLOG_NOTE_FILE}.0 return 1 fi if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then newestXlog=`echo -e "$newfile" | sort -t " " -k 2,3 -r | head -1 | cut -d " " -f 2` if [ "$newestXlog" = "$mylocation" ]; then ocf_log info "I have a master right." $CRM_MASTER -v $PROMOTE_ME return 0 fi change_data_status "$HOSTNAME" "DISCONNECT" ocf_log info "I don't have correct master data." # reset counter rm -f ${XLOG_NOTE_FILE}.* echo -e "$newfile" > ${XLOG_NOTE_FILE}.0 fi return 1 } is_replication() { if [ "$OCF_RESKEY_rep_mode" != "none" ]; then return 0 fi return 1 } get_my_checkpoint() { local output local output1 local output2 local log1 local log2 output=`$OCF_RESKEY_pgctldata $OCF_RESKEY_pgdata | grep "Latest checkpoint location:" | sed -e "s/Latest checkpoint location: *//g"` if [ $? -ne 0 ]; then ocf_log err "Can't get my latest checkpoint location." return 1 fi output1=`echo "$output" | cut -d "/" -f 1` output2=`echo "$output" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` echo "${log1}${log2}" return 0 } get_my_location() { local rc local output local replay_loc local receive_loc local output1 local output2 local log1 local log2 local newer_location output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $OCF_RESKEY_psql $psql_options -Atc \"${CHECK_XLOG_LOC_SQL}\""` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc warn ocf_log err "Can't get my xlog location." return 1 fi replay_loc=`echo $output | cut -d "|" -f 1` receive_loc=`echo $output | cut -d "|" -f 2` output1=`echo "$replay_loc" | cut -d "/" -f 1` output2=`echo "$replay_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` replay_loc="${log1}${log2}" output1=`echo "$receive_loc" | cut -d "/" -f 1` output2=`echo "$receive_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` receive_loc="${log1}${log2}" newer_location=`echo -e "$replay_loc\n$receive_loc" | sort -r | head -1` echo "$newer_location" return 0 } get_my_timeline_id() { local timelineid timelineid=`$OCF_RESKEY_pgctldata $OCF_RESKEY_pgdata | grep "Latest checkpoint's TimeLineID:" | sed -e "s/Latest checkpoint's TimeLineID: *//g"` if [ $? -ne 0 ]; then ocf_log err "Can't get my Timeline ID." return 1 fi echo $timelineid return 0 } show_xlog_location() { local location location=`get_my_location` || return 1 $CRM_ATTR_REBOOT -N "$HOSTNAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location" return $? } delete_xlog_location() { $CRM_ATTR_REBOOT -N "$HOSTNAME" -n "$PGSQL_XLOG_LOC_NAME" -D return $? } show_master_baseline() { local rc local location local timelineid runasowner -q err "$OCF_RESKEY_psql $psql_options -c 'CHECKPOINT'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc warn fi location=`get_my_location` timelineid=`get_my_timeline_id` ocf_log info "My master baseline : $timelineid:$location." $CRM_ATTR_REBOOT -N "$HOSTNAME" -n "$PGSQL_MASTER_BASELINE" -v "$timelineid:$location" return $? } delete_master_baseline() { $CRM_ATTR_REBOOT -N "$HOSTNAME" -n "$PGSQL_MASTER_BASELINE" -D return $? } set_async_mode_all() { [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0 ocf_log info "Set all nodes into async mode." runasowner -q err "echo "" > \"$REP_MODE_CONF\"" if [ $? -ne 0 ]; then ocf_log err "Can't set all nodes into async mode." return 1 fi return 0 } set_async_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then if ! echo $sync_node_in_conf | grep "$1"; then ocf_log info "$1 is already in async mode." return 0 else ocf_log info "Setup $1 into async mode." sync_node_in_conf=`echo $sync_node_in_conf | sed "s/$1//g" | sed "s/^,//g" | sed "s/,,/,/g" | sed "s/,$//g"` if [ -n $sync_node_in_conf ]; then echo "synchronous_standby_names = '$sync_node_in_conf'" > "$REP_MODE_CONF" else echo "" > "$REP_MODE_CONF" fi fi else ocf_log info "$1 is already in async mode." return 0 fi ocf_log info "All synced nodes : \"$sync_node_in_conf\"" reload_conf return $? } set_sync_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then if echo "$sync_node_in_conf" | grep "$1"; then ocf_log info "$1 is already in sync mode." return 0 else ocf_log info "Setup $1 into sync mode." echo "synchronous_standby_names = '$sync_node_in_conf,$1'" > "$REP_MODE_CONF" fi else ocf_log info "Setup $1 into sync mode." echo "synchronous_standby_names = '$1'" > "$REP_MODE_CONF" fi sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` ocf_log info "All synced nodes : \"$sync_node_in_conf\"" reload_conf return $? } is_sync_mode() { local target sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2 | sed "s/,/ /g"` if [ -n "$sync_node_in_conf" ]; then for target in $sync_node_in_conf; do if [ "$target" = "$1" ];then return 0 fi done fi return 1 } reload_conf() { # Invoke pg_ctl runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload" if [ $? -eq 0 ]; then ocf_log info "Reload configuration file." else ocf_log err "Can't reload configuration file." return 1 fi return 0 } make_recovery_conf() { runasowner "touch $RECOVERY_CONF" if [ $? -ne 0 ]; then ocf_log err "Can't create recovery.conf." return 1 fi cat > $RECOVERY_CONF <$2 by $HOSTNAME is prohibited." return 0 fi fi ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2." $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2" if [ $? -ne 0 ]; then ocf_log err "Can't change $PGSQL_STATUS_ATTR." return 1 fi fi return 0 } # change pgsql-data-status. # arg1:node, arg2: value change_data_status() { local output if ! node_exist $1; then return 0 fi while : do output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$output" != "$2" ]; then ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2." exec_func_with_timeout "$CRM_ATTR_FOREVER" "-N $1 -n $PGSQL_DATA_STATUS_ATTR -v \"$2\"" $OCF_RESKEY_crm_attr_timeout if [ $? -ne 0 ]; then ocf_log err "Can't change $PGSQL_DATA_STATUS_ATTR." return 1 fi else break fi done return 0 } # change master-score # arg1:node, arg2: score change_master_score() { local rsc local instance local current_score if ! is_node_online $1; then return 0 fi rsc=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if [ "${rsc}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then instance=`expr $instance + 1` continue fi current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-${rsc}:${instance}" -G -q 2>/dev/null` if [ -n "$current_score" -a "$current_score" != "$2" ]; then ocf_log info "Changing ${rsc}:${instance} master score on $1 : $current_score->$2." $CRM_ATTR_REBOOT -N "$target" -n "master-${rsc}:${instance}" -v "$2" if [ $? -ne 0 ]; then ocf_log err "Can't change master score." return 1 fi fi instance=`expr $instance + 1` done return 0 } report_psql_error() { local rc local loglevel rc=$1 loglevel=${2:-err} ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running" if [ $rc -eq 1 ]; then ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." elif [ $rc -eq 2 ]; then ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." elif [ $rc -eq 3 ]; then ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." fi } # # timeout management function # arg1 : command # arg2 : command's args # arg3 : timeout(s) # exec_func_with_timeout() { local func_pid local count local rc $1 `eval echo $2` & func_pid=$! count=0 while kill -0 $func_pid >/dev/null 2>&1; do sleep 1 count=`expr $count + 1` if [ $count -ge $3 ]; then ocf_log debug "Execute $1 time out." kill -KILL $func_pid >/dev/null 2>&1 return 0 fi done wait $func_pid rc=$? return $rc } is_node_online() { crm_mon -1 -n | grep -e "^Node $1 " -e "^Node $1:" | grep -q -v "OFFLINE" return $? } node_exist() { crm_mon -1 -n | grep -q "^Node $1" return $? } check_binary2() { if ! have_binary "$1"; then ocf_log err "Setup problem: couldn't find command: $1" return 1 fi return 0 } check_config() { local rc=0 if [ ! -f "$1" ]; then if ocf_is_probe; then ocf_log info "Configuration file is $1 not readable during probe." rc=1 else ocf_log err "Configuration file $1 doesn't exist" rc=2 fi fi return $rc } # Validate most critical parameters pgsql_validate_all() { if ! check_binary2 "$OCF_RESKEY_pgctl" || ! check_binary2 "$OCF_RESKEY_psql"; then return $OCF_ERR_INSTALLED fi if [ -n "$OCF_RESKEY_config" -a ! -f "$OCF_RESKEY_config" ]; then check_config "$OCF_RESKEY_config" [ $? -eq 2 ] && return $OCF_ERR_INSTALLED fi getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_pgdba doesn't exist"; return $OCF_ERR_INSTALLED; fi if ocf_is_probe; then ocf_log info "Don't check $OCF_RESKEY_pgdata during probe" else if ! runasowner "test -w $OCF_RESKEY_pgdata"; then ocf_log err "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM; fi fi if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor password can't be empty" return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor_user has to be set if monitor_password is set" return $OCF_ERR_CONFIGURED fi if is_replication; then if ! ocf_is_ms; then ocf_log err "Replication requires Master/Slave configuration." return $OCF_ERR_CONFIGURED fi if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then ocf_log err "Invalid rep_mode : $OCF_RESKEY_rep_mode" return $OCF_ERR_CONFIGURED fi if [ ! -d "$OCF_RESKEY_tmpdir" ]; then ocf_log err "$OCF_RESKEY_tmpdir directory doesn't exist." return $OCF_ERR_INSTALLED fi if [ ! -n "$OCF_RESKEY_master_ip" ]; then ocf_log err "master_ip can't be empty." return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_node_list" ]; then ocf_log err "node_list can't be empty." return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_restore_command" ]; then ocf_log err "restore_command can't be empty." return $OCF_ERR_CONFIGURED fi if ! check_binary2 "$OCF_RESKEY_pgctldata"; then return $OCF_ERR_INSTALLED fi fi return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -f "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then ocf_log err "Can't create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then ocf_log err "Can't change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then ocf_log err "Can't change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then ocf_log err "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi rm $OCF_RESKEY_socketdir/test.$$ fi } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label if is_replication; then RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount" CAN_NOT_PROMOTE="-INFINITY" CAN_PROMOTE="100" PROMOTE_ME="1000" CHECK_MS_SQL="select pg_is_in_recovery()" CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()" CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication" PGSQL_STATUS_ATTR="pgsql-status" PGSQL_DATA_STATUS_ATTR="pgsql-data-status" PGSQL_XLOG_LOC_NAME="pgsql-xlog-loc" PGSQL_MASTER_BASELINE="pgsql-master-baseline" HOSTNAME=`uname -n` OPERATION=$1 fi case "$1" in methods) pgsql_methods exit $?;; meta-data) meta_data exit $OCF_SUCCESS;; esac # $OCF_RESKEY_pgdata has to be initialized at this momemnt : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`} pgsql_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi # make psql command options if [ -n "$OCF_RESKEY_monitor_user" ]; then PGUSER=$OCF_RESKEY_monitor_user; export PGUSER PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" else psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" fi if [ -n "$OCF_RESKEY_pghost" ]; then psql_options="$psql_options -h $OCF_RESKEY_pghost" else if [ -n "$OCF_RESKEY_socketdir" ]; then psql_options="$psql_options -h $OCF_RESKEY_socketdir" fi fi # What kind of method was invoked? case "$1" in status) if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi;; monitor) pgsql_monitor exit $?;; start) pgsql_start exit $?;; promote) pgsql_promote exit $?;; demote) pgsql_demote exit $?;; notify) pgsql_notify exit $?;; stop) pgsql_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac