[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-ha-dev
Subject: Re: [Linux-ha-dev] Cannot send T_STARTING
From: Alan Robertson <alanr () unix ! sh>
Date: 2003-06-18 20:02:07
[Download RAW message or body]
Ozan Eren Bilgen wrote:
> Hi,
>
> The code segment and necessary definitions are attached below. Maybe I did
> something wrong, because this error realises every time.
>
> gdb says nothing more then he says in he first line "new thread (a
> number) ..." which is probably not interested with this failure.
>
> ltrace is more then 300K. So I didn't attach it to respect your inbox.
> If you want to see, please visit:
>
> http://gsu.linux.org.tr/~mavi/2ncm/ltrace_2003-06-18
Thanks! I excerpted the relevant part below.
>
> The SIGINT in the ltrace is my ctrl+c in order to say to my program to
> terminate its job.
>
> How to enable debug?
You can enable debug from heartbeat with -d. If you give it -ddddddd then
it will be EXTREMELY verbose. You will know almost everything that it does ;-)
Here's the bug:
if (hac->we->llc_ops->sendnodemsg (hac->we, reply, hac->they->nodename)
== HA_OK)
> {
> cl_log (LOG_ERR, "Cannot send message to [%s]", hac->they->nodename);
> cl_log (LOG_ERR, "REASON: %s", hac->we->llc_ops->errmsg (hac->we));
> ha_log_message (reply);
The bug is that you had an == HA_OK which should have been != HA_OK. This
is why including the source code is essential. Sometimes it's just that it
is necessary to get other eyeballs on the source to see what the author
can't see...
> Ozan Eren Bilgen
>
>
>
> ------------------------------------------------------------------------
>
> #include <stdio.h>
> #include <string.h>
> #include <heartbeat/ha_msg.h>
> #include <heartbeat/heartbeat.h>
> #include <heartbeat/hb_api.h>
> #include <clplumbing/cl_log.h>
> #include <clplumbing/cl_signal.h>
> #include <malloc.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <error.h>
> #include <errno.h>
> #include <time.h>
>
> #include "2ncm.h"
> #include "rscops.h"
> #include "config.h"
> #include "callback.h"
>
>
> int
> main (int argc, char **argv)
> {
> struct ha_cluster *hac;
>
> struct ha_msg *reply;
>
> ...
>
> cl_log_enable_stderr (TRUE);
> cl_log_set_entity (argv[0]);
> cl_log_set_facility (LOG_DAEMON);
>
> init (&hac);
>
> gethostname (hac->config->host, MAXHOSTLENGTH);
> parse_hacf (hac, "/etc/ha.d/ha.cf");
> cl_log(LOG_INFO, "/etc/ha.d/ha.cf parsed.");
> parse_haresources (hac, "/etc/ha.d/haresources");
> cl_log(LOG_INFO, "/etc/ha.d/haresources parsed.");
>
> if ((hac->we = ll_cluster_new ("heartbeat")) == NULL)
> {
> cl_log (LOG_ERR, "Cannot create a heartbeat instance.");
> disaster (hac, NO, JUST_FREE);
> }
> else
> {
> cl_log (LOG_INFO, "Returned a handle to the heartbeat library instance. (PID=%ld)", \
> (long) getpid ()); }
>
> if (hac->we->llc_ops->signon (hac->we, "2NCM") != HA_OK)
> {
> cl_log (LOG_ERR, "Cannot sign on to heartbeat.");
> cl_log (LOG_ERR, "REASON: %s", hac->we->llc_ops->errmsg (hac->we));
> disaster (hac, NO, DELETE);
> }
>
> cl_log (LOG_INFO, "Signed on as \"2NCM\".");
> #if 0
> if (hac->we->llc_ops->setfmode (hac->we, LLC_FILTER_PMODE) == HA_OK)
> {
> cl_log(LOG_INFO, "PMODE filter established.");
> }
> else
> {
> cl_log(LOG_ERR, "Cannot filter with PMODE!");
> }
> #endif
> init_nodewalk(hac);
> cl_log (LOG_INFO, "Nodewalk ended.");
>
> init_callback(hac);
> cl_log (LOG_INFO, "Callback fonctions add completed.");
>
> CL_SIGINTERRUPT (SIGINT, 1);
> CL_SIGNAL (SIGINT, gotsig);
>
> cl_log (LOG_INFO, "SIGINT hook done.");
>
> if (hac->we->llc_ops->setmsgsignal (hac->we, 0) == HA_OK)
> {
> cl_log (LOG_INFO, "setmsgsignal() is set to 0.");
> }
> else
> {
> cl_log (LOG_INFO, "Cannot set setmsgsignal() to 0.");
> };
>
> if (hac->config->our_active_links == 0)
> {
> /* OPPPSS! */
> cl_log (LOG_ERR, "2NCM started but we don't have any links UP!");
> }
> else
> {
> if ((hac->they->active_links > 0) && (hac->config->auto_failback == ON))
> {
> cl_log (LOG_INFO, "According to auto_failback, trying to send [%s] to [%s].", \
> T_STARTING, hac->they->nodename); /* We send a starting message and want to hear a \
> T_RESOURCES on callback */ reply = ha_msg_new (0);
> ha_msg_add (reply, F_TYPE, T_STARTING);
> if (hac->we->llc_ops->sendnodemsg (hac->we, reply, hac->they->nodename) == HA_OK)
> {
> cl_log (LOG_ERR, "Cannot send message to [%s]", hac->they->nodename);
> cl_log (LOG_ERR, "REASON: %s", hac->we->llc_ops->errmsg (hac->we));
> ha_log_message (reply);
> }
> else
> {
> cl_log(LOG_INFO, "[%s] message was sent. Waiting [%s] on the callback.", \
> T_STARTING, T_RESOURCES); }
> ha_msg_del (reply);
> reply = NULL;
> }
> else
> {
> cl_log (LOG_INFO, "[%s] has no active links, T_STARTING was not send.", \
> hac->they->nodename); }
> }
>
> cl_log (LOG_INFO, "Waiting for messages...");
> errno = 0;
>
> ...
>
> }
>
> /*
> * 2ncm.h - Internal definitions used entirely in 2NCM
> *
> * ...
> */
>
> #ifndef __2NCM_H__
> #define __2NCM_H__ 1
>
> #include <heartbeat/heartbeat.h>
> #include <heartbeat/ha_msg.h>
> #include <heartbeat/hb_api.h>
>
> #include "rscops.h"
> #include "config.h"
> #include "callback.h"
>
> /* ... */
>
> enum boolean { NO, YES };
> enum auto_failback_status { ON, OFF, LEGACY };
> enum resource_status { RELEASED, ABOUTTOBERELEASED, ACTIVATED, ABOUTTOBEACTIVATED \
> }; enum resource_level { ANY, LOCAL, FOREIGN, ALL };
> enum node_status { DEAD, UP, ACTIVE };
>
> /* ... */
>
> #define MAXHOSTLENGTH 200
>
> /* ... */
>
> struct ha_cluster
> {
> struct ll_cluster *we;
> struct othernode *they;
> struct cluster_configuration *config;
> struct resource_flags *flags;
> struct resource_operations *rscops;
> struct ha_service_group **ha_service_groups;
> };
>
> struct othernode
> {
> char *nodename;
> enum resource_level hold_resources;
> enum node_status status;
> enum boolean need_stonith;
> int active_links;
> };
>
> struct cluster_configuration
> {
> int our_active_links;
> enum auto_failback_status auto_failback; /* ON | OFF | LEGACY */
> char *host;
> time_t configuration_time_haresources;
> time_t configuration_time_hacf;
> time_t started;
> int ha_service_groups_count; /* =~ the number of useful lines in \
> /etc/ha.d/haresources */ };
>
> struct resource_flags
> {
> enum job_status resource_management;
> enum resource_level we_hold_resources;
> enum node_status status;
> };
>
> /* ... */
>
> #endif /* __2NCM_H__ */
>
>
> ------------------------------------------------------------------------
>
> lt-heartbeat: 2003/06/18_16:03:09 info: Logging defaulting to /var/log/ha-log
> lt-heartbeat: 2003/06/18_16:03:09 info: **************************
> lt-heartbeat: 2003/06/18_16:03:09 info: Configuration validated. Starting heartbeat \
> 1.0.2
> lt-heartbeat: 2003/06/18_16:03:09 info: lt-heartbeat: version 1.0.2
> lt-heartbeat: 2003/06/18_16:03:09 info: Heartbeat generation: 49
> lt-heartbeat: 2003/06/18_16:03:09 info: Starting serial heartbeat on tty /dev/ttyS0 \
> (19200 baud)
> lt-heartbeat: 2003/06/18_16:03:09 info: UDP Broadcast heartbeat started on port 694 \
> (694) interface eth0
> lt-heartbeat: 2003/06/18_16:03:09 info: pid 1115 locked in memory.
> lt-heartbeat: 2003/06/18_16:03:09 info: pid 1116 locked in memory.
> lt-heartbeat: 2003/06/18_16:03:09 info: pid 1117 locked in memory.
> lt-heartbeat: 2003/06/18_16:03:09 info: pid 1118 locked in memory.
> lt-heartbeat: 2003/06/18_16:03:09 info: pid 1119 locked in memory.
> lt-heartbeat: 2003/06/18_16:03:09 info: Local status now set to: 'up'
> lt-heartbeat: 2003/06/18_16:03:09 info: pid 1113 locked in memory.
> lt-heartbeat: 2003/06/18_16:03:11 info: Link spiderman:eth0 up.
> lt-heartbeat: 2003/06/18_16:03:11 info: Local status now set to: 'active'
> lt-heartbeat: 2003/06/18_16:03:11 info: Link mcfly:eth0 up.
> lt-heartbeat: 2003/06/18_16:03:11 info: Status update for node mcfly: status active
> lt-heartbeat: 2003/06/18_16:04:42 info: EOF from client pid 1121
Here's the relevant part of the ltrace output... I expected to see more
cross-library linkages - but calling through the function pointers seems to
defeat that :-(
cl_log(6, 0x0804dd00, 1194, 0x4008700e, 0x4018eee0/home/mavi/2ncm/2NCM:
info: Callback fonctions add completed.
) = 0
siginterrupt(2, 1, 1194, 0x4008700e, 0x4018eee0) = 0
cl_signal_set_simple_handler(2, 0x08048d70, 0, 0x4008700e, 0x4018eee0) = 0
cl_log(6, 0x0804dd22, 0, 0x4008700e, 0x4018eee0/home/mavi/2ncm/2NCM: info:
SIGINT hook done.
) = 0
cl_log(6, 0x0804dd34, 0, 0x4008700e, 0x4018eee0/home/mavi/2ncm/2NCM: info:
setmsgsignal() is set to 0.
) = 0
cl_log(6, 0x0804ddc0, 0x0804db87, 0x08052fd8,
0x4018eee0/home/mavi/2ncm/2NCM: info: According to auto_failback, trying to
send [starting] to [mcfly].
) = 0
ha_msg_new(0, 0x0804ddc0, 0x0804db87, 0x08052fd8, 0x4018eee0) = 0x08054cc0
ha_msg_add(0x08054cc0, 0x0804ddf9, 0x0804db87, 0x08052fd8, 0x4018eee0) = 1
cl_log(3, 0x0804ddfb, 0x08052fd8, 0x08052fd8,
0x4018eee0/home/mavi/2ncm/2NCM: ERROR: Cannot send message to [mcfly]
) = 0
cl_log(3, 0x0804d980, 0x40035040, 0x08052fd8,
0x4018eee0/home/mavi/2ncm/2NCM: ERROR: REASON:
) = 0
ha_log_message(0x08054cc0, 0x0804d980, 0x40035040, 0x08052fd8,
0x4018eee0/home/mavi/2ncm/2NCM: info: MSG: Dumping message with 2 fields
/home/mavi/2ncm/2NCM: info: MSG[0]: [t=starting]
/home/mavi/2ncm/2NCM: info: MSG[1]: [dest=mcfly]
) = 2
ha_msg_del(0x08054cc0, 0x0804d980, 0x40035040, 0x08052fd8, 0x4018eee0) = 0
cl_log(6, 0x0804de93, 0x40035040, 0x08052fd8,
0x4018eee0/home/mavi/2ncm/2NCM: info: Waiting for messages...
) = 0
__errno_location() = 0x4018ece0
ha_msg_value(0x08055de8, 0x0804ddf9, 0xbffffae8, 0x08049d51, 0x4018eee0) =
0x08055e08
ha_msg_value(0x08055de8, 0x0804dead, 0xbffffae8, 0x08049d51, 0x4018eee0) =
0x080546a8
cl_log(5, 0x0804dec0, 1, 0x08055e08, 0x080546a8/home/mavi/2ncm/2NCM: notice:
Got message 1 of type [hbapi-clstat] from [spiderman]
) = 0
ha_msg_del(0x08055de8, 0x0804dead, 0xbffffae8, 0x08049d51, 0x4018eee0) = 0
--- SIGINT (Interrupt) ---
cl_log(6, 0x0804d89a, 0, 0, 0/home/mavi/2ncm/2NCM: info: Recieved leave signal.
) = 0
breakpointed at 0x4012d767 (?)
cl_log(6, 0x0804d98b, 0xbffffaa8, 0x40031092,
0x08055de8/home/mavi/2ncm/2NCM: info: Signed off from HeartBeat.
) = 0
cl_log(6, 0x0804d9c0, 0xbffffaa8, 0x40031092,
0x08055de8/home/mavi/2ncm/2NCM: info: API Object is deleted.
) = 0
cl_log(5, 0x0804d9d7, 0xbffffaa8, 0x40031092,
0x08055de8/home/mavi/2ncm/2NCM: notice: Memory freed.
) = 0
cl_log(6, 0x0804df00, 0xbffffae8, 0x08049d51,
0x4018eee0/home/mavi/2ncm/2NCM: info: Exiting cleanly. (We'll be respawned)
--
Alan Robertson <alanr@unix.sh>
"Openness is the foundation and preservative of friendship... Let me claim
from you at all times your undisguised opinions." - William Wilberforce
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.community.tummy.com
http://lists.community.tummy.com/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic