[prev in list] [next in list] [prev in thread] [next in thread] 

List:       sylpheed
Subject:    [sylpheed:28401] Re: Bug(let) in the Unix version of sylpheed
From:       Stefaan A Eeckels <Stefaan.Eeckels () ecc ! lu>
Date:       2006-08-31 14:36:13
Message-ID: 20060831163613.a4a4fee9.Stefaan.Eeckels () ecc ! lu
[Download RAW message or body]

On Thu, 31 Aug 2006 10:12:37 +0200
Stefaan A Eeckels <Stefaan.Eeckels@ecc.lu> wrote:

> It certainly works for me.

Actually, it didn't work - I still got an increasing number of zombies
after about 24h, so I had another look at the code. I noticed that child
process structures could be destroyed without waiting for the process:

in src/action.c I added a wait before the destruction of the child
process structure (from the #ifdef to the #endif):

static void free_children(Children *children)
{
    ChildInfo *child_info;

    debug_print("Freeing children data %p\n", children);

    g_free(children->action);
    while (children->list != NULL) {
        child_info = (ChildInfo *)children->list->data;
        g_free(child_info->cmd);
        g_string_free(child_info->output, TRUE);
#ifdef G_OS_UNIX
        if (child_info->pid != 0) {
            if (waitpid(child_info->pid, NULL, 0) != child_info->pid) {
                perror("waitpid in free_children on child_info->pid\n");
            }
        }
#endif
        children->list = g_slist_remove(children->list, child_info);
        g_free(child_info);
    }
    g_free(children);
}

And adding a bit of debugging code showed that in libsylph/socket.c
there's a spot where children might get "lost":

static gboolean sock_get_address_info_async_cb(GIOChannel *source,
                                               GIOCondition condition,
                                               gpointer data)
{
    SockLookupData *lookup_data = (SockLookupData *)data;
    GList *addr_list = NULL;
    SockAddrData *addr_data;
    gsize bytes_read;
    gint ai_member[4];
    struct sockaddr *addr;
    pid_t wrv;

    for (;;) {
        if (g_io_channel_read(source, (gchar *)ai_member,
                sizeof(ai_member), &bytes_read) != G_IO_ERROR_NONE) {
            g_warning("sock_get_address_info_async_cb: "
                      "address length read error\n");
            break;
        }

        if (bytes_read == 0 || bytes_read != sizeof(ai_member))
                break;

        if (ai_member[0] == AF_UNSPEC) {
            g_warning("DNS lookup failed\n");
            break;
        }

        addr = g_malloc(ai_member[3]);
        if (g_io_channel_read(source, (gchar *)addr, ai_member[3],
              &bytes_read) != G_IO_ERROR_NONE) {
                g_warning("sock_get_address_info_async_cb: "
                          "address data read error\n");
                g_free(addr);
                break;
        }

        if (bytes_read != ai_member[3]) {
            g_warning("sock_get_address_info_async_cb: "
                      "incomplete address data\n");
            g_free(addr);
            break;
        }

        addr_data = g_new0(SockAddrData, 1);
        addr_data->family = ai_member[0];
        addr_data->socktype = ai_member[1];
        addr_data->protocol = ai_member[2];
        addr_data->addr_len = ai_member[3];
        addr_data->addr = addr;

        addr_list = g_list_append(addr_list, addr_data);
    }

    g_io_channel_shutdown(source, FALSE, NULL);
    g_io_channel_unref(source);

    kill(lookup_data->child_pid, SIGKILL);
    wrv = (pid_t)-1;
    while (wrv == (pid_t)-1) {
        if ((wrv = waitpid(lookup_data->child_pid, NULL, 0)) !=
                    lookup_data->child_pid) { 
            if (wrv == (pid_t)-1 && errno != EINTR) break; 
            perror("waitpid in sock_get_address_info_async_cb on"
                   "lookup_data->child_pid");
        } 
    }

    lookup_data->func(addr_list, lookup_data->data);

    g_free(lookup_data->hostname);
    g_free(lookup_data);

    return FALSE;
}

The waitpid() after the kill() can be interrupted and is not restarted.
As a result, zombies are left behind. I added some code to restart the
waitpid() call if it returns causes an EINTR error. 

I don't know if these are the correct fixes - a better way to ensure
that sylpheed doesn't fork() children without reliably waiting for them
seems to be desirable.

Take care,

-- 
Stefaan
-- 
As complexity rises, precise statements lose meaning,
and meaningful statements lose precision. -- Lotfi Zadeh 

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic