'Re: 503 service unavailable error'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       varnish-misc
Subject:    Re: 503 service unavailable error
From:       Tim Dunphy <bluethundr () gmail ! com>
Date:       2015-07-09 20:50:36
Message-ID: CAOZy0ekBDhvra1xdR9Rz1J4r1fZbFSCGmy3kdVR+9rLj8VQu0w () mail ! gmail ! com
[Download RAW message or body]

[Attachment #2 (multipart/alternative)]

Hey Jason,

You're never specifying any auth in your probe:
>
>   .probe = {
>   .request =
>    "GET /healthcheck.php HTTP/1.1"
>    "Host: wiki.example.com"
>    "Connection: close";

Yeah, understood. Actually when I mailed yesterday that was something I was
planning on doing. Not something I had done. But sometimes I'm not very
clear in explaining things.

At any rate, I was able to get the Basic Auth headers into my .probe
.request and the good news is it seems to have worked!!

This was the change that I made:

  .request =
   "GET /healthcheck.php HTTP/1.1"
   "Host: wiki.jokefire.com"
   "Authorization: Basic myBase64Hash=="
   "Connection: close";

So after that change was made and I cycled varnish I literally NEVER got
the 503 error again. Just an occasional 504 that went away on a page
reload. But nothing serious. And even that could probably be done away with
some VCL tweaking.

So after that success I made some modifications to the VCL to make it work
a little better with mediawiki. Here's the current state of my VCL for
anyone that's interested.

backend web1 {
  .host = "10.10.10.25";
  .port = "80";
  .connect_timeout = 3600s;
  .first_byte_timeout = 3600s;
  .between_bytes_timeout = 3600s;
  .max_connections = 70;
  .probe = {
  .request =
   "GET /healthcheck.php HTTP/1.1"
   "Host: wiki.example.com"
   "Authorization: Basic Base64Hash=="
   "Connection: close";
   .interval = 10m;
   .timeout = 60s;
   .window = 3;
   .threshold = 2;
   }
}

backend web2 {
  .host = "10.10.10.26";
  .port = "80";
  .connect_timeout = 3600s;
  .first_byte_timeout = 3600s;
  .between_bytes_timeout = 3600s;
  .max_connections = 70;
  .probe = {
  .request =
   "GET /healthcheck.php HTTP/1.1"
   "Host: wiki.example.com"
   "Authorization: Basic Base64Hash=="
   "Connection: close";
   .interval = 10m;
   .timeout = 60s;
   .window = 3;
   .threshold = 2;
   }
}

director www round-robin {
  { .backend = web1;   }
  { .backend = web2;  }
 }

# access control list for "purge": open to only localhost and other local
nodes
acl purge {
    "127.0.0.1";
}

sub vcl_recv {

   set req.http.host = regsub(req.http.host, "^www\.wiki\.example\.com$","
wiki.example.com");

    # Serve objects up to 2 minutes past their expiry if the backend
    # is slow to respond.
    set req.grace = 120s;

    if (! req.http.Authorization ~ "Basic myBase64Hash==")
      {
       error 401 "Restricted";
      }

    if (req.url ~ "&action=submit($|/)") {
        return (pass);
    }

   if (req.restarts == 0) {
           if (req.http.x-forwarded-for) {
                set req.http.X-Forwarded-For = req.http.X-Forwarded-For +
", " + client.ip;
        } else {
                set req.http.X-Forwarded-For = client.ip;
         }
    }

   set req.backend = www;

   # This uses the ACL action called "purge". Basically if a request to
   # PURGE the cache comes from anywhere other than localhost, ignore it.
    if (req.request == "PURGE")
        {if (!client.ip ~ purge)
          {error 405 "Not allowed.";}
    return(lookup);}

    if (req.request != "GET" && req.request != "HEAD" &&
        req.request != "PUT" && req.request != "POST" &&
        req.request != "TRACE" && req.request != "OPTIONS" &&
        req.request != "DELETE")
        {return(pipe);}     /* Non-RFC2616 or CONNECT which is weird. */

   # Pass anything other than GET and HEAD directly.
   if (req.request != "GET" && req.request != "HEAD")
       {return(pass);}      /* We only deal with GET and HEAD by default */

    # Pass requests from logged-in users directly.
    if (req.http.Authorization || req.http.Cookie)
       {return(pass);}      /* Not cacheable by default */

    # Pass any requests with the "If-None-Match" header directly.
    if (req.http.If-None-Match)
       {return(pass);}

    # normalize Accept-Encoding to reduce vary
    if (req.http.Accept-Encoding) {
       if (req.http.User-Agent ~ "MSIE 6") {
          unset req.http.Accept-Encoding;
        } elsif (req.http.Accept-Encoding ~ "gzip") {
          set req.http.Accept-Encoding = "gzip";
        } elsif (req.http.Accept-Encoding ~ "deflate") {
          set req.http.Accept-Encoding = "deflate";
        } else {
         unset req.http.Accept-Encoding;
       }
    }

    return (lookup);
}

sub vcl_pipe {
       # Note that only the first request to the backend will have
       # X-Forwarded-For set.  If you use X-Forwarded-For and want to
       # have it set for all requests, make sure to have:
       # set req.http.connection = "close";

        # This is otherwise not necessary if you do not do any request
rewriting.
        set req.http.connection = "close";
}

# Called if the cache has a copy of the page.
sub vcl_hit {
        if (req.request == "PURGE")
            {ban_url(req.url);
            error 200 "Purged";}

        if (!obj.ttl > 0s)
           {return(pass);}
}

# Called if the cache does not have a copy of the page.
sub vcl_miss {
        if (req.request == "PURGE")
           {error 200 "Not in cache";}
}

# Called after a document has been successfully retrieved from the backend.
sub vcl_fetch {
      # set minimum timeouts to auto-discard stored objects
      #       set beresp.prefetch = -30s;
       set beresp.grace = 120s;

        if (beresp.ttl < 48h) {
          set beresp.ttl = 48h;}

        if (!beresp.ttl > 0s)
            {return(hit_for_pass);}

        if (beresp.http.Set-Cookie)
            {return(hit_for_pass);}
        #if (beresp.http.Cache-Control ~ "(private|no-cache|no-store)")
        #           {return(hit_for_pass);}
        if (req.http.Authorization && !beresp.http.Cache-Control ~ "public")
            {return(hit_for_pass);}

}

sub vcl_error {

  if (obj.status == 401) {
  set obj.http.Content-Type = "text/html; charset=utf-8";
  set obj.http.WWW-Authenticate = "Basic realm=Secured";
  synthetic {"

   <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  "
http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">

    <HTML>
    <HEAD>
    <TITLE>Error</TITLE>
    <META HTTP-EQUIV='Content-Type' CONTENT='text/html;'>
    </HEAD>
    <BODY><H1>401 Unauthorized (varnish)</H1></BODY>
    </HTML>
    "};
     return (deliver);
    }
}

sub vcl_deliver {
     if (obj.hits> 0) {
      set resp.http.X-Cache = "HIT";
     } else {
        set resp.http.X-Cache = "MISS";
     }
 }

Now, all that's left to do is to set those completely insane timeouts I've
been using to try and troubleshoot the problem to something a little more
reasonable.

Thanks for all the help!

Tim

On Thu, Jul 9, 2015 at 9:01 AM, Jason Price <japrice@gmail.com> wrote:

> You're never specifying any auth in your probe:
>
>   .probe = {
>   .request =
>    "GET /healthcheck.php HTTP/1.1"
>    "Host: wiki.example.com"
>    "Connection: close";
>
> I don't know the proper way to specify it, but you'll need to play
> around with curl, wireshark and varnish probes until you get it right.
>
> May be easier to test with telnet invocations:
>
> telnet 10.10.10.26 80
> GET /healthcheck.php HTTP/1.1
> Host: wiki.example.com
> Authorization: Basic ???????????????
> Connection: close
>
>
> The above should give you an auth failure request.  Twiddle with that
> until you get a successful authentication request, then translate it
> into the probe .request format.  The link you provided gives you
> everything else you need.
>
> -Jason
>
> On Wed, Jul 8, 2015 at 11:19 PM, Tim Dunphy <bluethundr@gmail.com> wrote:
> >> that interval and window on your web server is scary..... what you're
> >> saying is 'check each web server every 10 minutes, and only fail it
> >> after 3 failures'
> >
> >
> > Hah!! Agreed. I was just trying to rule the connect timeouts out of the
> > picture as to why the failures were happening!
> > I plan to set them to more normal intervals once I'm finished testing and
> > I've been able to get this to work.
> >
> >>
> >>
> >> next time you see the issue, look at:
> >> varnishadm -n <varnish_name> debug.health
> >
> >
> > Hmm you may have a point as to the back ends. Varnish is indeed seeing
> them
> > as 'sick' when I encounter the 503 error:
> >
> >
> > [root@varnish1:~] #varnishadm -n  varnish1   debug.health
> > Backend web1 is Sick
> > Current states  good:  0 threshold:  2 window:  3
> > Average responsetime of good probes: 0.000000
> > Oldest                                                    Newest
> > ================================================================
> > ------------------------------------------------------4444444444 Good
> IPv4
> > ------------------------------------------------------XXXXXXXXXX Good
> Xmit
> > ------------------------------------------------------RRRRRRRRRR Good
> Recv
> > ----------------------------------------------------HH---------- Happy
> > Backend web2 is Sick
> > Current states  good:  0 threshold:  2 window:  3
> > Average responsetime of good probes: 0.000000
> > Oldest                                                    Newest
> > ================================================================
> > ------------------------------------------------------4444444444 Good
> IPv4
> > ------------------------------------------------------XXXXXXXXXX Good
> Xmit
> > ------------------------------------------------------RRRRRRRRRR Good
> Recv
> > ----------------------------------------------------HH---------- Happy
> >
> >>
> >>
> >> I'd be willing to bet that varnish is just failing the backends.  Try
> >> running the healthcheck manually from the varnish boxes:
> >> curl -H "Host:kiki.example.com" -v "http://10.10.10.26/healthcheck.php"
> >> And see if you're actually getting good healthchecks.  If you're not,
> >> then you need to look at your backends (specifically healthcheck.php)
> >
> >
> > But if I perform the curl you're suggesting, I am able to retrieve the
> > healthcheck.php file!!
> >
> > #curl --user admin:somepass -H "Host:wiki.example.com" -v
> > "http://10.10.10.25/healthcheck.php"
> > * About to connect() to 52.5.117.61 port 80 (#0)
> > *   Trying 52.5.117.61... connected
> > * Connected to 52.5.117.61 (52.5.117.61) port 80 (#0)
> > * Server auth using Basic with user 'admin'
> >> GET /healthcheck.php HTTP/1.1
> >> Authorization: Basic SomeBase64Hash==
> >> User-Agent: curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7
> >> NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2
> >> Accept: */*
> >> Host:wiki.example.com
> >>
> > < HTTP/1.1 200 OK
> > < Date: Thu, 09 Jul 2015 02:10:35 GMT
> > < Server: Apache/2.4.6 (CentOS) OpenSSL/1.0.1e-fips mod_fcgid/2.3.9
> > PHP/5.4.42 SVN/1.7.14 mod_wsgi/3.4 Python/2.7.5
> > < X-Powered-By: PHP/5.4.42
> > < Content-Length: 5
> > < Content-Type: text/html; charset=UTF-8
> > <
> > good
> > * Connection #0 to host 52.5.117.61 left intact
> > * Closing connection #0
> >
> > But in the curl I just did I was specifying the user auth. Which got me
> to
> > thinking, maybe I'm handing apache basic auth in the wrong way in my VCL
> > file?
> >
> > To test this idea out, I commented out the basic auth lines in my apache
> > config. Then cycled the services on both apache servers and both varnish
> > servers.
> >
> > When I ran the test you gave me again, this is the result I got back:
> >
> > #varnishadm -n  varnish1   debug.health
> > Backend web1 is Healthy
> > Current states  good:  3 threshold:  2 window:  3
> > Average responsetime of good probes: 0.032781
> > Oldest                                                    Newest
> > ================================================================
> > ---------------------------------------------------------------4 Good
> IPv4
> > ---------------------------------------------------------------X Good
> Xmit
> > ---------------------------------------------------------------R Good
> Recv
> > -------------------------------------------------------------HHH Happy
> > Backend web2 is Healthy
> > Current states  good:  3 threshold:  2 window:  3
> > Average responsetime of good probes: 0.032889
> > Oldest                                                    Newest
> > ================================================================
> > ---------------------------------------------------------------4 Good
> IPv4
> > ---------------------------------------------------------------X Good
> Xmit
> > ---------------------------------------------------------------R Good
> Recv
> > -------------------------------------------------------------HHH Happy
> >
> > Everbody's happy again!!
> >
> > And I tried browsing around the wiki for quite a long time. And there
> were
> > NO 503 errors the entire time I was using it. Which tells me that I am,
> > indeed, not handling auth correctly in my VCL.
> >
> > The way I thought I solved the problem was by adding a .request to the
> web
> > server definitions that specified the headers to do a GET on the health
> > check:
> >
> > .request =
> >    "GET /healthcheck.php HTTP/1.1"
> >    "Host: wiki.example.com"
> >    "Connection: close";
> >
> > The reason I thought this worked was because, after I'd restarted varnish
> > with that change in place I was able to log into the wiki with basic
> auth in
> > the web browser. And then I'd be able to use it for a while before the
> > back-end would come up as 'sick' in varnish again which would cause the
> 503
> > error.
> >
> > I then tried following this advice again, which I had also tried earlier
> > without much luck:
> >
> > http://blog.tenya.me/blog/2011/12/14/varnish-http-authentication/
> >
> > Which tells you to add this section to your VCL file:
> >
> >  if (! req.http.Authorization ~ "Basic SomeBase64Hash==")
> >       {
> >        error 401 "Restricted";
> >       }
> >
> > And then add this sub_vcl section:
> >
> > sub vcl_error {
> >
> >   if (obj.status == 401) {
> >   set obj.http.Content-Type = "text/html; charset=utf-8";
> >   set obj.http.WWW-Authenticate = "Basic realm=Secured";
> >   synthetic {"
> >
> >    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
> > "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
> >
> >     <HTML>
> >     <HEAD>
> >     <TITLE>Error</TITLE>
> >     <META HTTP-EQUIV='Content-Type' CONTENT='text/html;'>
> >     </HEAD>
> >     <BODY><H1>401 Unauthorized (varnish)</H1></BODY>
> >     </HTML>
> >     "};
> >      return (deliver);
> >     }
> > }
> >
> > And after restarting varnish again on both nodes, with authentication in
> > place in the VHOST configs on the web servers I was able to log into the
> > wiki site again and browse around for a while.
> >
> > But then after some browsing around the back ends would go sick again and
> > you would see the 503:
> >
> > #varnishadm -n  varnish1   debug.health
> > Backend web1 is Sick
> > Current states  good:  1 threshold:  2 window:  3
> > Average responsetime of good probes: 0.000000
> > Oldest                                                    Newest
> > ================================================================
> > --------------------------------------------------------------44 Good
> IPv4
> > --------------------------------------------------------------XX Good
> Xmit
> > --------------------------------------------------------------RR Good
> Recv
> > ------------------------------------------------------------HH-- Happy
> > Backend web2 is Sick
> > Current states  good:  1 threshold:  2 window:  3
> > Average responsetime of good probes: 0.000000
> > Oldest                                                    Newest
> > ================================================================
> > --------------------------------------------------------------44 Good
> IPv4
> > --------------------------------------------------------------XX Good
> Xmit
> > --------------------------------------------------------------RR Good
> Recv
> > ------------------------------------------------------------HH-- Happy
> >
> > So SOMETHING must still be off with how I'm handling authentication in my
> > VCL config. The next step I'm thinking of trying involves passing the
> > authentication headers to the .request section of my web server
> definition.
> > Although I'm not sure if it'll work. I'll let you guys know if it does.
> >
> > But I'd like to present the current state of my VLC again in case anyone
> has
> > any insight or knowledge to share that may help.
> >
> > backend web1 {
> >
> >   .host = "10.10.10.25";
> >
> >   .port = "80";
> >
> >   .connect_timeout = 3600s;
> >
> >   .first_byte_timeout = 3600s;
> >
> >   .between_bytes_timeout = 3600s;
> >
> >   .max_connections = 70;
> >
> >   .probe = {
> >
> >   .request =
> >
> >    "GET /healthcheck.php HTTP/1.1"
> >
> >    "Host: wiki.example.com"
> >
> >    "Connection: close";
> >
> >    .interval = 10m;
> >
> >    .timeout = 60s;
> >
> >    .window = 3;
> >
> >    .threshold = 2;
> >
> >    }
> >
> > }
> >
> > backend web2 {
> >
> >   .host = "10.10.10.26";
> >
> >   .port = "80";
> >
> >   .connect_timeout = 3600s;
> >
> >   .first_byte_timeout = 3600s;
> >
> >   .between_bytes_timeout = 3600s;
> >
> >   .max_connections = 70;
> >
> >   .probe = {
> >
> >   .request =
> >
> >    "GET /healthcheck.php HTTP/1.1"
> >
> >    "Host: wiki.example.com"
> >
> >    "Connection: close";
> >
> >    .interval = 10m;
> >
> >    .timeout = 60s;
> >
> >    .window = 3;
> >
> >    .threshold = 2;
> >
> >    }
> >
> > }
> >
> > director www round-robin {
> >
> >   { .backend = web1;   }
> >
> >   { .backend = web2;  }
> >
> >  }
> >
> > sub vcl_recv {
> >
> >      if (! req.http.Authorization ~ "Basic Base64Hash==")
> >
> >       {
> >
> >        error 401 "Restricted";
> >
> >       }
> >
> >     if (req.url ~ "&action=submit($|/)") {
> >
> >         return (pass);
> >
> >     }
> >
> >     set req.backend = www;
> >
> >     return (lookup);
> >
> > }
> >
> > sub vcl_fetch {
> >
> >       set beresp.ttl = 3600s;
> >
> >       set beresp.grace = 4h;
> >
> >       return (deliver);
> >
> > }
> >
> > sub vcl_error {
> >
> >   if (obj.status == 401) {
> >
> >   set obj.http.Content-Type = "text/html; charset=utf-8";
> >
> >   set obj.http.WWW-Authenticate = "Basic realm=Secured";
> >
> >   synthetic {"
> >
> >
> >    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
> > "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
> >
> >
> >     <HTML>
> >
> >     <HEAD>
> >
> >     <TITLE>Error</TITLE>
> >
> >     <META HTTP-EQUIV='Content-Type' CONTENT='text/html;'>
> >
> >     </HEAD>
> >
> >     <BODY><H1>401 Unauthorized (varnish)</H1></BODY>
> >
> >     </HTML>
> >
> >     "};
> >
> >      return (deliver);
> >
> >     }
> >
> > }
> >
> > sub vcl_deliver {
> >
> >      if (obj.hits> 0) {
> >
> >       set resp.http.X-Cache = "HIT";
> >
> >      } else {
> >
> >         set resp.http.X-Cache = "MISS";
> >
> >      }
> >
> >  }
> >
> > Once again I genuinely appreciate the help of this list, and hope I
> haven't
> > worn out my welcome! ;)
> >
> > Thanks,
> > Tim
> >
> >
> > On Wed, Jul 8, 2015 at 9:31 PM, Jason Price <japrice@gmail.com> wrote:
> >>
> >> that interval and window on your web server is scary..... what you're
> >> saying is 'check each web server every 10 minutes, and only fail it
> >> after 3 failures'
> >>
> >> next time you see the issue, look at:
> >>
> >> varnishadm -n <varnish_name> debug.health
> >>
> >> I'd be willing to bet that varnish is just failing the backends.  Try
> >> running the healthcheck manually from the varnish boxes:
> >>
> >> curl -H "Host:kiki.example.com" -v "http://10.10.10.26/healthcheck.php"
> >>
> >> And see if you're actually getting good healthchecks.  If you're not,
> >> then you need to look at your backends (specifically healthcheck.php)
> >>
> >> On Wed, Jul 8, 2015 at 12:14 PM, Tim Dunphy <bluethundr@gmail.com>
> wrote:
> >> > Hi guys,
> >> >
> >> >
> >> >  I'm having an issue where my varnish server will stop working after a
> >> > while
> >> > of browsing around the site I'm using it with and throw a 503 server
> >> > unavailable error.
> >> >
> >> > In my varnish logs I'm getting a 'no backend connection error':
> >> >
> >> >    10 FetchError   c no backend connection
> >> >    10 VCL_call     c error deliver
> >> >    10 VCL_call     c deliver deliver
> >> >    10 TxProtocol   c HTTP/1.1
> >> >    10 TxStatus     c 503
> >> >    10 TxResponse   c Service Unavailable
> >> >    10 TxHeader     c Server: Varnish
> >> >
> >> >
> >> > And if I do a GET on the healthcheck from the command line on the
> >> > varnish
> >> > server, I get a 503 response from varnish:
> >> >
> >> > #GET http://wiki.example.com/healthcheck.php
> >> >
> >> > <?xml version="1.0" encoding="utf-8"?>
> >> > <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
> >> >  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
> >> > <html>
> >> >   <head>
> >> >     <title>503 Service Unavailable</title>
> >> >   </head>
> >> >   <body>
> >> >     <h1>Error 503 Service Unavailable</h1>
> >> >     <p>Service Unavailable</p>
> >> >     <h3>Guru Meditation:</h3>
> >> >     <p>XID: 2107225059</p>
> >> >     <hr>
> >> >     <p>Varnish cache server</p>
> >> >   </body>
> >> > </html>
> >> >
> >> > But if I do another GET on the healthcheck file from the varnish
> server
> >> > to
> >> > another apache VHOST on the same server as the wiki site that responds
> >> > to
> >> > the IP of the web server instead of the IP for the varnish server, the
> >> > GET
> >> > works:
> >> >
> >> > #GET http://ops1.example.com/healthcheck.php
> >> > good
> >> >
> >> >
> >> > So I'm not sure why varnish is having trouble reaching the HC file.
> The
> >> > web
> >> > server is a little far from the varnish server. The varnish machines
> are
> >> > in
> >> > NYC and the web servers are in northern Virginia.
> >> >
> >> > So I tried setting the timeouts in the varnish config to a really high
> >> > number. And that was working for a while. But today I noticed that it
> >> > stopped working. I'll have to restart the varnish service and browse
> the
> >> > site for a while. Then it'll stop working again and produce the 503
> >> > error.
> >> > It's pretty annoying!
> >> >
> >> > I was wondering if there might be something in my VCL I could tweak to
> >> > make
> >> > this work? Or if the fact is that the web servers are simply too far
> >> > from
> >> > varnish for this to be practical.
> >> >
> >> > Here's my VCL file. It's pretty basic:
> >> >
> >> > backend web1 {
> >> >   .host = "10.10.10.25";
> >> >   .port = "80";
> >> >   .connect_timeout = 1200s;
> >> >   .first_byte_timeout = 1200s;
> >> >   .between_bytes_timeout = 1200s;
> >> >   .max_connections = 70;
> >> >   .probe = {
> >> >   .request =
> >> >    "GET /healthcheck.php HTTP/1.1"
> >> >    "Host: wiki.example.com"
> >> >    "Connection: close";
> >> >    .interval = 10m;
> >> >    .timeout = 60s;
> >> >    .window = 3;
> >> >    .threshold = 2;
> >> >    }
> >> > }
> >> >
> >> > backend web2 {
> >> >   .host = "10.10.10.26";
> >> >   .port = "80";
> >> >   .connect_timeout = 1200s;
> >> >   .first_byte_timeout = 1200s;
> >> >   .between_bytes_timeout = 1200s;
> >> >   .max_connections = 70;
> >> >   .probe = {
> >> >   .request =
> >> >    "GET /healthcheck.php HTTP/1.1"
> >> >    "Host: wiki.example.com"
> >> >    "Connection: close";
> >> >    .interval = 10m;
> >> >    .timeout = 60s;
> >> >    .window = 3;
> >> >    .threshold = 2;
> >> >    }
> >> > }
> >> >
> >> > director www round-robin {
> >> >   { .backend = web1;   }
> >> >   { .backend = web2;  }
> >> >  }
> >> >
> >> > sub vcl_recv {
> >> >
> >> >     if (req.url ~ "&action=submit($|/)") {
> >> >         return (pass);
> >> >     }
> >> >
> >> >     set req.backend = www;
> >> >     return (lookup);
> >> > }
> >> >
> >> > sub vcl_fetch {
> >> >       set beresp.ttl = 3600s;
> >> >       set beresp.grace = 4h;
> >> >       return (deliver);
> >> > }
> >> >
> >> >
> >> > sub vcl_deliver {
> >> >      if (obj.hits> 0) {
> >> >       set resp.http.X-Cache = "HIT";
> >> >      } else {
> >> >         set resp.http.X-Cache = "MISS";
> >> >      }
> >> >  }
> >> >
> >> > Thanks,
> >> > Tim
> >> >
> >> >
> >> >
> >> > --
> >> > GPG me!!
> >> >
> >> > gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B
> >> >
> >> >
> >> > _______________________________________________
> >> > varnish-misc mailing list
> >> > varnish-misc@varnish-cache.org
> >> > https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
> >
> >
> >
> >
> > --
> > GPG me!!
> >
> > gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B
> >
>

-- 
GPG me!!

gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B

[Attachment #5 (text/html)]

<div dir="ltr">Hey Jason,<div><br></div><div><blockquote style="margin:0px 0px 0px \
0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex" \
class="gmail_quote"><span style="font-size:12.8px">You&#39;re never specifying any \
auth in your probe:</span><br style="font-size:12.8px"><span \
style="font-size:12.8px"></span><br><span style="font-size:12.8px">   .probe = \
{</span><br><span style="font-size:12.8px">   .request =</span><br><span \
style="font-size:12.8px">     &quot;GET /healthcheck.php \
HTTP/1.1&quot;</span><br><span style="font-size:12.8px">     &quot;Host:  <a \
href="http://wiki.example.com/" rel="noreferrer" \
target="_blank">wiki.example.com</a>&quot;</span><br><span style="font-size:12.8px">  \
&quot;Connection: close&quot;;</span></blockquote><div><br></div><div>Yeah, \
understood. Actually when I mailed yesterday that was something I was planning on \
doing. Not something I had done. But sometimes I&#39;m not very clear in explaining \
things. <br><br></div><div>At any rate, I was able to get the Basic Auth headers into \
my .probe .request and the good news is it seems to have \
worked!!<br><br></div><div>This was the change that I made:<br><br>   .request =<br>  \
&quot;GET /healthcheck.php HTTP/1.1&quot;<br>     &quot;Host: <a \
href="http://wiki.jokefire.com">wiki.jokefire.com</a>&quot;<br>     \
&quot;Authorization: Basic myBase64Hash==&quot;<br>     &quot;Connection: \
close&quot;;<br></div><br><br></div><div>So after that change was made and I cycled \
varnish I literally NEVER got the 503 error again. Just an occasional 504 that went \
away on a page reload. But nothing serious. And even that could probably be done away \
with some VCL tweaking. <br><br></div><div>So after that success I made some \
modifications to the VCL to make it work a little better with mediawiki. Here&#39;s \
the current state of my VCL for anyone that&#39;s interested. <br><br><br>backend \
web1 {<br>   .host = "10.10.10.25";<br>   .port = &quot;80&quot;;<br>   \
.connect_timeout = 3600s;<br>   .first_byte_timeout = 3600s;<br>   \
.between_bytes_timeout = 3600s;<br>   .max_connections = 70;<br>   .probe = {<br>   \
.request =<br>     &quot;GET /healthcheck.php HTTP/1.1&quot;<br>     &quot;Host: <a \
href="http://wiki.example.com">wiki.example.com</a>&quot;<br>     \
&quot;Authorization: Basic Base64Hash==&quot;<br>     &quot;Connection: \
close&quot;;<br>     .interval = 10m;<br>     .timeout = 60s;<br>     .window = \
3;<br>     .threshold = 2;<br>     }<br>}<br><br>backend web2 {<br>   .host = \
"10.10.10.26";<br>   .port = &quot;80&quot;;<br>   .connect_timeout = 3600s;<br>   \
.first_byte_timeout = 3600s;<br>   .between_bytes_timeout = 3600s;<br>   \
.max_connections = 70;<br>   .probe = {<br>   .request =<br>     &quot;GET \
/healthcheck.php HTTP/1.1&quot;<br>     &quot;Host: <a \
href="http://wiki.example.com">wiki.example.com</a>&quot;<br>     \
&quot;Authorization: Basic Base64Hash==&quot;<br>     &quot;Connection: \
close&quot;;<br>     .interval = 10m;<br>     .timeout = 60s;<br>     .window = \
3;<br>     .threshold = 2;<br>     }<br>}<br><br><br>director www round-robin {<br>   \
{ .backend = web1;     }<br>   { .backend = web2;   }<br>  }<br><br># access control \
list for &quot;purge&quot;: open to only localhost and other local nodes<br>acl purge \
{<br>       &quot;127.0.0.1&quot;;<br>}<br><br>sub vcl_recv {<br><br><br>     set \
req.http.host = regsub(req.http.host, &quot;^www\.wiki\.example\.com$&quot;,&quot;<a \
href="http://wiki.example.com">wiki.example.com</a>&quot;);<br><br>       # Serve \
objects up to 2 minutes past their expiry if the backend<br>       # is slow to \
respond.<br>       set req.grace = 120s;<br><br>       if (! req.http.Authorization ~ \
&quot;Basic myBase64Hash==&quot;)<br>           {<br>             error 401 \
&quot;Restricted&quot;;<br>           }<br><br>       if (req.url ~ \
&quot;&amp;action=submit($|/)&quot;) {<br>               return (pass);<br>       \
}<br><br>     if (req.restarts == 0) {<br>                     if \
(req.http.x-forwarded-for) {<br>                               set \
req.http.X-Forwarded-For = req.http.X-Forwarded-For + &quot;, &quot; + client.ip;<br> \
} else {<br>                               set req.http.X-Forwarded-For = \
client.ip;<br>                 }<br>       }<br><br>     set req.backend = \
www;<br><br>     # This uses the ACL action called &quot;purge&quot;. Basically if a \
request to<br>     # PURGE the cache comes from anywhere other than localhost, ignore \
it.<br>       if (req.request == &quot;PURGE&quot;)<br>               {if (!client.ip \
~ purge)<br>                   {error 405 &quot;Not allowed.&quot;;}<br>       \
return(lookup);}<br><br>       if (req.request != &quot;GET&quot; &amp;&amp; \
req.request != &quot;HEAD&quot; &amp;&amp;<br>               req.request != \
&quot;PUT&quot; &amp;&amp; req.request != &quot;POST&quot; &amp;&amp;<br>             \
req.request != &quot;TRACE&quot; &amp;&amp; req.request != &quot;OPTIONS&quot; \
&amp;&amp;<br>               req.request != &quot;DELETE&quot;)<br>               \
{return(pipe);}         /* Non-RFC2616 or CONNECT which is weird. */<br><br><br>     \
# Pass anything other than GET and HEAD directly.<br>     if (req.request != \
&quot;GET&quot; &amp;&amp; req.request != &quot;HEAD&quot;)<br>             \
{return(pass);}           /* We only deal with GET and HEAD by default */<br><br>     \
# Pass requests from logged-in users directly.<br>       if (req.http.Authorization \
|| req.http.Cookie)<br>             {return(pass);}           /* Not cacheable by \
default */<br><br>       # Pass any requests with the &quot;If-None-Match&quot; \
header directly.<br>       if (req.http.If-None-Match)<br>             \
{return(pass);}<br><br>       # normalize Accept-Encoding to reduce vary<br>       if \
(req.http.Accept-Encoding) {<br>             if (req.http.User-Agent ~ &quot;MSIE \
6&quot;) {<br>                   unset req.http.Accept-Encoding;<br>               } \
elsif (req.http.Accept-Encoding ~ &quot;gzip&quot;) {<br>                   set \
req.http.Accept-Encoding = &quot;gzip&quot;;<br>               } elsif \
(req.http.Accept-Encoding ~ &quot;deflate&quot;) {<br>                   set \
req.http.Accept-Encoding = &quot;deflate&quot;;<br>               } else {<br>        \
unset req.http.Accept-Encoding;<br>             }<br>       }<br><br>       return \
(lookup);<br>}<br><br>sub vcl_pipe {<br>             # Note that only the first \
request to the backend will have<br>             # X-Forwarded-For set.   If you use \
X-Forwarded-For and want to<br>             # have it set for all requests, make sure \
to have:<br>             # set req.http.connection = &quot;close&quot;;<br><br>       \
# This is otherwise not necessary if you do not do any request rewriting.<br>         \
set req.http.connection = &quot;close&quot;;<br>}<br><br># Called if the cache has a \
copy of the page.<br>sub vcl_hit {<br>               if (req.request == \
&quot;PURGE&quot;)<br>                       {ban_url(req.url);<br>                   \
error 200 &quot;Purged&quot;;}<br><br>               if (!obj.ttl &gt; 0s)<br>        \
{return(pass);}<br>}<br><br><br># Called if the cache does not have a copy of the \
page.<br>sub vcl_miss {<br>               if (req.request == &quot;PURGE&quot;)<br>   \
{error 200 &quot;Not in cache&quot;;}<br>}<br><br># Called after a document has been \
successfully retrieved from the backend.<br>sub vcl_fetch {<br>           # set \
minimum timeouts to auto-discard stored objects<br>           #             set \
beresp.prefetch = -30s;<br>             set beresp.grace = 120s;<br><br>              \
if (beresp.ttl &lt; 48h) {<br>                   set beresp.ttl = 48h;}<br><br>       \
if (!beresp.ttl &gt; 0s)<br>                       {return(hit_for_pass);}<br><br>    \
if (beresp.http.Set-Cookie)<br>                       {return(hit_for_pass);}<br>     \
#if (beresp.http.Cache-Control ~ &quot;(private|no-cache|no-store)&quot;)<br>         \
#                     {return(hit_for_pass);}<br>               if \
(req.http.Authorization &amp;&amp; !beresp.http.Cache-Control ~ \
&quot;public&quot;)<br>                       \
{return(hit_for_pass);}<br><br>}<br><br>sub vcl_error {<br><br>   if (obj.status == \
401) {<br>   set obj.http.Content-Type = &quot;text/html; charset=utf-8&quot;;<br>   \
set obj.http.WWW-Authenticate = &quot;Basic realm=Secured&quot;;<br>   synthetic \
{&quot;<br><br>     &lt;!DOCTYPE HTML PUBLIC &quot;-//W3C//DTD HTML 4.01 \
Transitional//EN&quot;   &quot;<a \
href="http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd</a>&quot;&gt;<br><br> \
&lt;HTML&gt;<br>       &lt;HEAD&gt;<br>       &lt;TITLE&gt;Error&lt;/TITLE&gt;<br>    \
&lt;META HTTP-EQUIV=&#39;Content-Type&#39; CONTENT=&#39;text/html;&#39;&gt;<br>       \
&lt;/HEAD&gt;<br>       &lt;BODY&gt;&lt;H1&gt;401 Unauthorized \
(varnish)&lt;/H1&gt;&lt;/BODY&gt;<br>       &lt;/HTML&gt;<br>       &quot;};<br>      \
return (deliver);<br>       }<br>}<br><br>sub vcl_deliver {<br>         if \
(obj.hits&gt; 0) {<br>           set resp.http.X-Cache = &quot;HIT&quot;;<br>         \
} else {<br>               set resp.http.X-Cache = &quot;MISS&quot;;<br>         \
}<br>  }<br><br></div><div>Now, all that&#39;s left to do is to set those completely \
insane timeouts I&#39;ve been using to try and troubleshoot the problem to something \
a little more reasonable. <br><br></div><div>Thanks for all the \
help!<br><br></div><div>Tim<br></div></div><div class="gmail_extra"><br><div \
class="gmail_quote">On Thu, Jul 9, 2015 at 9:01 AM, Jason Price <span \
dir="ltr">&lt;<a href="mailto:japrice@gmail.com" \
target="_blank">japrice@gmail.com</a>&gt;</span> wrote:<br><blockquote \
class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc \
solid;padding-left:1ex">You&#39;re never specifying any auth in your probe:<br> <span \
                class=""><br>
   .probe = {<br>
   .request =<br>
     &quot;GET /healthcheck.php HTTP/1.1&quot;<br>
     &quot;Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>&quot;<br>  &quot;Connection: close&quot;;<br>
<br>
</span>I don&#39;t know the proper way to specify it, but you&#39;ll need to play<br>
around with curl, wireshark and varnish probes until you get it right.<br>
<br>
May be easier to test with telnet invocations:<br>
<br>
telnet 10.10.10.26 80<br>
<span class="">GET /healthcheck.php HTTP/1.1<br>
Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a><br> </span>Authorization: Basic \
                ???????????????<br>
Connection: close<br>
<br>
<br>
The above should give you an auth failure request.   Twiddle with that<br>
until you get a successful authentication request, then translate it<br>
into the probe .request format.   The link you provided gives you<br>
everything else you need.<br>
<span class="HOEnZb"><font color="#888888"><br>
-Jason<br>
</font></span><div class="HOEnZb"><div class="h5"><br>
On Wed, Jul 8, 2015 at 11:19 PM, Tim Dunphy &lt;<a \
href="mailto:bluethundr@gmail.com">bluethundr@gmail.com</a>&gt; wrote:<br> &gt;&gt; \
that interval and window on your web server is scary..... what you&#39;re<br> \
&gt;&gt; saying is &#39;check each web server every 10 minutes, and only fail it<br> \
&gt;&gt; after 3 failures&#39;<br> &gt;<br>
&gt;<br>
&gt; Hah!! Agreed. I was just trying to rule the connect timeouts out of the<br>
&gt; picture as to why the failures were happening!<br>
&gt; I plan to set them to more normal intervals once I&#39;m finished testing \
and<br> &gt; I&#39;ve been able to get this to work.<br>
&gt;<br>
&gt;&gt;<br>
&gt;&gt;<br>
&gt;&gt; next time you see the issue, look at:<br>
&gt;&gt; varnishadm -n &lt;varnish_name&gt; debug.health<br>
&gt;<br>
&gt;<br>
&gt; Hmm you may have a point as to the back ends. Varnish is indeed seeing them<br>
&gt; as &#39;sick&#39; when I encounter the 503 error:<br>
&gt;<br>
&gt;<br>
&gt; [root@varnish1:~] #varnishadm -n   varnish1     debug.health<br>
&gt; Backend web1 is Sick<br>
&gt; Current states   good:   0 threshold:   2 window:   3<br>
&gt; Average responsetime of good probes: 0.000000<br>
&gt; Oldest                                                                           \
Newest<br> &gt; ================================================================<br>
&gt; ------------------------------------------------------4444444444 Good IPv4<br>
&gt; ------------------------------------------------------XXXXXXXXXX Good Xmit<br>
&gt; ------------------------------------------------------RRRRRRRRRR Good Recv<br>
&gt; ----------------------------------------------------HH---------- Happy<br>
&gt; Backend web2 is Sick<br>
&gt; Current states   good:   0 threshold:   2 window:   3<br>
&gt; Average responsetime of good probes: 0.000000<br>
&gt; Oldest                                                                           \
Newest<br> &gt; ================================================================<br>
&gt; ------------------------------------------------------4444444444 Good IPv4<br>
&gt; ------------------------------------------------------XXXXXXXXXX Good Xmit<br>
&gt; ------------------------------------------------------RRRRRRRRRR Good Recv<br>
&gt; ----------------------------------------------------HH---------- Happy<br>
&gt;<br>
&gt;&gt;<br>
&gt;&gt;<br>
&gt;&gt; I&#39;d be willing to bet that varnish is just failing the backends.   \
Try<br> &gt;&gt; running the healthcheck manually from the varnish boxes:<br>
&gt;&gt; curl -H &quot;Host:<a href="http://kiki.example.com" rel="noreferrer" \
target="_blank">kiki.example.com</a>&quot; -v &quot;<a \
href="http://10.10.10.26/healthcheck.php" rel="noreferrer" \
target="_blank">http://10.10.10.26/healthcheck.php</a>&quot;<br> &gt;&gt; And see if \
you&#39;re actually getting good healthchecks.   If you&#39;re not,<br> &gt;&gt; then \
you need to look at your backends (specifically healthcheck.php)<br> &gt;<br>
&gt;<br>
&gt; But if I perform the curl you&#39;re suggesting, I am able to retrieve the<br>
&gt; healthcheck.php file!!<br>
&gt;<br>
&gt; #curl --user admin:somepass -H &quot;Host:<a href="http://wiki.example.com" \
rel="noreferrer" target="_blank">wiki.example.com</a>&quot; -v<br> &gt; &quot;<a \
href="http://10.10.10.25/healthcheck.php" rel="noreferrer" \
target="_blank">http://10.10.10.25/healthcheck.php</a>&quot;<br> &gt; * About to \
connect() to 52.5.117.61 port 80 (#0)<br> &gt; *     Trying 52.5.117.61... \
connected<br> &gt; * Connected to 52.5.117.61 (52.5.117.61) port 80 (#0)<br>
&gt; * Server auth using Basic with user &#39;admin&#39;<br>
&gt;&gt; GET /healthcheck.php HTTP/1.1<br>
&gt;&gt; Authorization: Basic SomeBase64Hash==<br>
&gt;&gt; User-Agent: curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7<br>
&gt;&gt; NSS/<a href="http://3.14.0.0" rel="noreferrer" target="_blank">3.14.0.0</a> \
zlib/1.2.3 libidn/1.18 libssh2/1.4.2<br> &gt;&gt; Accept: */*<br>
&gt;&gt; Host:<a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a><br> &gt;&gt;<br>
&gt; &lt; HTTP/1.1 200 OK<br>
&gt; &lt; Date: Thu, 09 Jul 2015 02:10:35 GMT<br>
&gt; &lt; Server: Apache/2.4.6 (CentOS) OpenSSL/1.0.1e-fips mod_fcgid/2.3.9<br>
&gt; PHP/5.4.42 SVN/1.7.14 mod_wsgi/3.4 Python/2.7.5<br>
&gt; &lt; X-Powered-By: PHP/5.4.42<br>
&gt; &lt; Content-Length: 5<br>
&gt; &lt; Content-Type: text/html; charset=UTF-8<br>
&gt; &lt;<br>
&gt; good<br>
&gt; * Connection #0 to host 52.5.117.61 left intact<br>
&gt; * Closing connection #0<br>
&gt;<br>
&gt; But in the curl I just did I was specifying the user auth. Which got me to<br>
&gt; thinking, maybe I&#39;m handing apache basic auth in the wrong way in my VCL<br>
&gt; file?<br>
&gt;<br>
&gt; To test this idea out, I commented out the basic auth lines in my apache<br>
&gt; config. Then cycled the services on both apache servers and both varnish<br>
&gt; servers.<br>
&gt;<br>
&gt; When I ran the test you gave me again, this is the result I got back:<br>
&gt;<br>
&gt; #varnishadm -n   varnish1     debug.health<br>
&gt; Backend web1 is Healthy<br>
&gt; Current states   good:   3 threshold:   2 window:   3<br>
&gt; Average responsetime of good probes: 0.032781<br>
&gt; Oldest                                                                           \
Newest<br> &gt; ================================================================<br>
&gt; ---------------------------------------------------------------4 Good IPv4<br>
&gt; ---------------------------------------------------------------X Good Xmit<br>
&gt; ---------------------------------------------------------------R Good Recv<br>
&gt; -------------------------------------------------------------HHH Happy<br>
&gt; Backend web2 is Healthy<br>
&gt; Current states   good:   3 threshold:   2 window:   3<br>
&gt; Average responsetime of good probes: 0.032889<br>
&gt; Oldest                                                                           \
Newest<br> &gt; ================================================================<br>
&gt; ---------------------------------------------------------------4 Good IPv4<br>
&gt; ---------------------------------------------------------------X Good Xmit<br>
&gt; ---------------------------------------------------------------R Good Recv<br>
&gt; -------------------------------------------------------------HHH Happy<br>
&gt;<br>
&gt; Everbody&#39;s happy again!!<br>
&gt;<br>
&gt; And I tried browsing around the wiki for quite a long time. And there were<br>
&gt; NO 503 errors the entire time I was using it. Which tells me that I am,<br>
&gt; indeed, not handling auth correctly in my VCL.<br>
&gt;<br>
&gt; The way I thought I solved the problem was by adding a .request to the web<br>
&gt; server definitions that specified the headers to do a GET on the health<br>
&gt; check:<br>
&gt;<br>
&gt; .request =<br>
&gt;      &quot;GET /healthcheck.php HTTP/1.1&quot;<br>
&gt;      &quot;Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>&quot;<br> &gt;      &quot;Connection: \
close&quot;;<br> &gt;<br>
&gt; The reason I thought this worked was because, after I&#39;d restarted \
varnish<br> &gt; with that change in place I was able to log into the wiki with basic \
auth in<br> &gt; the web browser. And then I&#39;d be able to use it for a while \
before the<br> &gt; back-end would come up as &#39;sick&#39; in varnish again which \
would cause the 503<br> &gt; error.<br>
&gt;<br>
&gt; I then tried following this advice again, which I had also tried earlier<br>
&gt; without much luck:<br>
&gt;<br>
&gt; <a href="http://blog.tenya.me/blog/2011/12/14/varnish-http-authentication/" \
rel="noreferrer" target="_blank">http://blog.tenya.me/blog/2011/12/14/varnish-http-authentication/</a><br>
 &gt;<br>
&gt; Which tells you to add this section to your VCL file:<br>
&gt;<br>
&gt;   if (! req.http.Authorization ~ &quot;Basic SomeBase64Hash==&quot;)<br>
&gt;           {<br>
&gt;            error 401 &quot;Restricted&quot;;<br>
&gt;           }<br>
&gt;<br>
&gt; And then add this sub_vcl section:<br>
&gt;<br>
&gt; sub vcl_error {<br>
&gt;<br>
&gt;     if (obj.status == 401) {<br>
&gt;     set obj.http.Content-Type = &quot;text/html; charset=utf-8&quot;;<br>
&gt;     set obj.http.WWW-Authenticate = &quot;Basic realm=Secured&quot;;<br>
&gt;     synthetic {&quot;<br>
&gt;<br>
&gt;      &lt;!DOCTYPE HTML PUBLIC &quot;-//W3C//DTD HTML 4.01 \
Transitional//EN&quot;<br> &gt; &quot;<a \
href="http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd" rel="noreferrer" \
target="_blank">http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd</a>&quot;&gt;<br>
 &gt;<br>
&gt;        &lt;HTML&gt;<br>
&gt;        &lt;HEAD&gt;<br>
&gt;        &lt;TITLE&gt;Error&lt;/TITLE&gt;<br>
&gt;        &lt;META HTTP-EQUIV=&#39;Content-Type&#39; \
CONTENT=&#39;text/html;&#39;&gt;<br> &gt;        &lt;/HEAD&gt;<br>
&gt;        &lt;BODY&gt;&lt;H1&gt;401 Unauthorized \
(varnish)&lt;/H1&gt;&lt;/BODY&gt;<br> &gt;        &lt;/HTML&gt;<br>
&gt;        &quot;};<br>
&gt;         return (deliver);<br>
&gt;        }<br>
&gt; }<br>
&gt;<br>
&gt; And after restarting varnish again on both nodes, with authentication in<br>
&gt; place in the VHOST configs on the web servers I was able to log into the<br>
&gt; wiki site again and browse around for a while.<br>
&gt;<br>
&gt; But then after some browsing around the back ends would go sick again and<br>
&gt; you would see the 503:<br>
&gt;<br>
&gt; #varnishadm -n   varnish1     debug.health<br>
&gt; Backend web1 is Sick<br>
&gt; Current states   good:   1 threshold:   2 window:   3<br>
&gt; Average responsetime of good probes: 0.000000<br>
&gt; Oldest                                                                           \
Newest<br> &gt; ================================================================<br>
&gt; --------------------------------------------------------------44 Good IPv4<br>
&gt; --------------------------------------------------------------XX Good Xmit<br>
&gt; --------------------------------------------------------------RR Good Recv<br>
&gt; ------------------------------------------------------------HH-- Happy<br>
&gt; Backend web2 is Sick<br>
&gt; Current states   good:   1 threshold:   2 window:   3<br>
&gt; Average responsetime of good probes: 0.000000<br>
&gt; Oldest                                                                           \
Newest<br> &gt; ================================================================<br>
&gt; --------------------------------------------------------------44 Good IPv4<br>
&gt; --------------------------------------------------------------XX Good Xmit<br>
&gt; --------------------------------------------------------------RR Good Recv<br>
&gt; ------------------------------------------------------------HH-- Happy<br>
&gt;<br>
&gt; So SOMETHING must still be off with how I&#39;m handling authentication in \
my<br> &gt; VCL config. The next step I&#39;m thinking of trying involves passing \
the<br> &gt; authentication headers to the .request section of my web server \
definition.<br> &gt; Although I&#39;m not sure if it&#39;ll work. I&#39;ll let you \
guys know if it does.<br> &gt;<br>
&gt; But I&#39;d like to present the current state of my VLC again in case anyone \
has<br> &gt; any insight or knowledge to share that may help.<br>
&gt;<br>
&gt; backend web1 {<br>
&gt;<br>
&gt;     .host = &quot;10.10.10.25&quot;;<br>
&gt;<br>
&gt;     .port = &quot;80&quot;;<br>
&gt;<br>
&gt;     .connect_timeout = 3600s;<br>
&gt;<br>
&gt;     .first_byte_timeout = 3600s;<br>
&gt;<br>
&gt;     .between_bytes_timeout = 3600s;<br>
&gt;<br>
&gt;     .max_connections = 70;<br>
&gt;<br>
&gt;     .probe = {<br>
&gt;<br>
&gt;     .request =<br>
&gt;<br>
&gt;      &quot;GET /healthcheck.php HTTP/1.1&quot;<br>
&gt;<br>
&gt;      &quot;Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>&quot;<br> &gt;<br>
&gt;      &quot;Connection: close&quot;;<br>
&gt;<br>
&gt;      .interval = 10m;<br>
&gt;<br>
&gt;      .timeout = 60s;<br>
&gt;<br>
&gt;      .window = 3;<br>
&gt;<br>
&gt;      .threshold = 2;<br>
&gt;<br>
&gt;      }<br>
&gt;<br>
&gt; }<br>
&gt;<br>
&gt; backend web2 {<br>
&gt;<br>
&gt;     .host = &quot;10.10.10.26&quot;;<br>
&gt;<br>
&gt;     .port = &quot;80&quot;;<br>
&gt;<br>
&gt;     .connect_timeout = 3600s;<br>
&gt;<br>
&gt;     .first_byte_timeout = 3600s;<br>
&gt;<br>
&gt;     .between_bytes_timeout = 3600s;<br>
&gt;<br>
&gt;     .max_connections = 70;<br>
&gt;<br>
&gt;     .probe = {<br>
&gt;<br>
&gt;     .request =<br>
&gt;<br>
&gt;      &quot;GET /healthcheck.php HTTP/1.1&quot;<br>
&gt;<br>
&gt;      &quot;Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>&quot;<br> &gt;<br>
&gt;      &quot;Connection: close&quot;;<br>
&gt;<br>
&gt;      .interval = 10m;<br>
&gt;<br>
&gt;      .timeout = 60s;<br>
&gt;<br>
&gt;      .window = 3;<br>
&gt;<br>
&gt;      .threshold = 2;<br>
&gt;<br>
&gt;      }<br>
&gt;<br>
&gt; }<br>
&gt;<br>
&gt; director www round-robin {<br>
&gt;<br>
&gt;     { .backend = web1;     }<br>
&gt;<br>
&gt;     { .backend = web2;   }<br>
&gt;<br>
&gt;   }<br>
&gt;<br>
&gt; sub vcl_recv {<br>
&gt;<br>
&gt;         if (! req.http.Authorization ~ &quot;Basic Base64Hash==&quot;)<br>
&gt;<br>
&gt;           {<br>
&gt;<br>
&gt;            error 401 &quot;Restricted&quot;;<br>
&gt;<br>
&gt;           }<br>
&gt;<br>
&gt;        if (req.url ~ &quot;&amp;action=submit($|/)&quot;) {<br>
&gt;<br>
&gt;              return (pass);<br>
&gt;<br>
&gt;        }<br>
&gt;<br>
&gt;        set req.backend = www;<br>
&gt;<br>
&gt;        return (lookup);<br>
&gt;<br>
&gt; }<br>
&gt;<br>
&gt; sub vcl_fetch {<br>
&gt;<br>
&gt;           set beresp.ttl = 3600s;<br>
&gt;<br>
&gt;           set beresp.grace = 4h;<br>
&gt;<br>
&gt;           return (deliver);<br>
&gt;<br>
&gt; }<br>
&gt;<br>
&gt; sub vcl_error {<br>
&gt;<br>
&gt;     if (obj.status == 401) {<br>
&gt;<br>
&gt;     set obj.http.Content-Type = &quot;text/html; charset=utf-8&quot;;<br>
&gt;<br>
&gt;     set obj.http.WWW-Authenticate = &quot;Basic realm=Secured&quot;;<br>
&gt;<br>
&gt;     synthetic {&quot;<br>
&gt;<br>
&gt;<br>
&gt;      &lt;!DOCTYPE HTML PUBLIC &quot;-//W3C//DTD HTML 4.01 \
Transitional//EN&quot;<br> &gt; &quot;<a \
href="http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd" rel="noreferrer" \
target="_blank">http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd</a>&quot;&gt;<br>
 &gt;<br>
&gt;<br>
&gt;        &lt;HTML&gt;<br>
&gt;<br>
&gt;        &lt;HEAD&gt;<br>
&gt;<br>
&gt;        &lt;TITLE&gt;Error&lt;/TITLE&gt;<br>
&gt;<br>
&gt;        &lt;META HTTP-EQUIV=&#39;Content-Type&#39; \
CONTENT=&#39;text/html;&#39;&gt;<br> &gt;<br>
&gt;        &lt;/HEAD&gt;<br>
&gt;<br>
&gt;        &lt;BODY&gt;&lt;H1&gt;401 Unauthorized \
(varnish)&lt;/H1&gt;&lt;/BODY&gt;<br> &gt;<br>
&gt;        &lt;/HTML&gt;<br>
&gt;<br>
&gt;        &quot;};<br>
&gt;<br>
&gt;         return (deliver);<br>
&gt;<br>
&gt;        }<br>
&gt;<br>
&gt; }<br>
&gt;<br>
&gt; sub vcl_deliver {<br>
&gt;<br>
&gt;         if (obj.hits&gt; 0) {<br>
&gt;<br>
&gt;           set resp.http.X-Cache = &quot;HIT&quot;;<br>
&gt;<br>
&gt;         } else {<br>
&gt;<br>
&gt;              set resp.http.X-Cache = &quot;MISS&quot;;<br>
&gt;<br>
&gt;         }<br>
&gt;<br>
&gt;   }<br>
&gt;<br>
&gt; Once again I genuinely appreciate the help of this list, and hope I \
haven&#39;t<br> &gt; worn out my welcome! ;)<br>
&gt;<br>
&gt; Thanks,<br>
&gt; Tim<br>
&gt;<br>
&gt;<br>
&gt; On Wed, Jul 8, 2015 at 9:31 PM, Jason Price &lt;<a \
href="mailto:japrice@gmail.com">japrice@gmail.com</a>&gt; wrote:<br> &gt;&gt;<br>
&gt;&gt; that interval and window on your web server is scary..... what \
you&#39;re<br> &gt;&gt; saying is &#39;check each web server every 10 minutes, and \
only fail it<br> &gt;&gt; after 3 failures&#39;<br>
&gt;&gt;<br>
&gt;&gt; next time you see the issue, look at:<br>
&gt;&gt;<br>
&gt;&gt; varnishadm -n &lt;varnish_name&gt; debug.health<br>
&gt;&gt;<br>
&gt;&gt; I&#39;d be willing to bet that varnish is just failing the backends.   \
Try<br> &gt;&gt; running the healthcheck manually from the varnish boxes:<br>
&gt;&gt;<br>
&gt;&gt; curl -H &quot;Host:<a href="http://kiki.example.com" rel="noreferrer" \
target="_blank">kiki.example.com</a>&quot; -v &quot;<a \
href="http://10.10.10.26/healthcheck.php" rel="noreferrer" \
target="_blank">http://10.10.10.26/healthcheck.php</a>&quot;<br> &gt;&gt;<br>
&gt;&gt; And see if you&#39;re actually getting good healthchecks.   If you&#39;re \
not,<br> &gt;&gt; then you need to look at your backends (specifically \
healthcheck.php)<br> &gt;&gt;<br>
&gt;&gt; On Wed, Jul 8, 2015 at 12:14 PM, Tim Dunphy &lt;<a \
href="mailto:bluethundr@gmail.com">bluethundr@gmail.com</a>&gt; wrote:<br> &gt;&gt; \
&gt; Hi guys,<br> &gt;&gt; &gt;<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt;   I&#39;m having an issue where my varnish server will stop working \
after a<br> &gt;&gt; &gt; while<br>
&gt;&gt; &gt; of browsing around the site I&#39;m using it with and throw a 503 \
server<br> &gt;&gt; &gt; unavailable error.<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; In my varnish logs I&#39;m getting a &#39;no backend connection \
error&#39;:<br> &gt;&gt; &gt;<br>
&gt;&gt; &gt;      10 FetchError     c no backend connection<br>
&gt;&gt; &gt;      10 VCL_call        c error deliver<br>
&gt;&gt; &gt;      10 VCL_call        c deliver deliver<br>
&gt;&gt; &gt;      10 TxProtocol     c HTTP/1.1<br>
&gt;&gt; &gt;      10 TxStatus        c 503<br>
&gt;&gt; &gt;      10 TxResponse     c Service Unavailable<br>
&gt;&gt; &gt;      10 TxHeader        c Server: Varnish<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; And if I do a GET on the healthcheck from the command line on the<br>
&gt;&gt; &gt; varnish<br>
&gt;&gt; &gt; server, I get a 503 response from varnish:<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; #GET <a href="http://wiki.example.com/healthcheck.php" rel="noreferrer" \
target="_blank">http://wiki.example.com/healthcheck.php</a><br> &gt;&gt; &gt;<br>
&gt;&gt; &gt; &lt;?xml version=&quot;1.0&quot; encoding=&quot;utf-8&quot;?&gt;<br>
&gt;&gt; &gt; &lt;!DOCTYPE html PUBLIC &quot;-//W3C//DTD XHTML 1.0 \
Strict//EN&quot;<br> &gt;&gt; &gt;   &quot;<a \
href="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" rel="noreferrer" \
target="_blank">http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd</a>&quot;&gt;<br> \
&gt;&gt; &gt; &lt;html&gt;<br> &gt;&gt; &gt;     &lt;head&gt;<br>
&gt;&gt; &gt;        &lt;title&gt;503 Service Unavailable&lt;/title&gt;<br>
&gt;&gt; &gt;     &lt;/head&gt;<br>
&gt;&gt; &gt;     &lt;body&gt;<br>
&gt;&gt; &gt;        &lt;h1&gt;Error 503 Service Unavailable&lt;/h1&gt;<br>
&gt;&gt; &gt;        &lt;p&gt;Service Unavailable&lt;/p&gt;<br>
&gt;&gt; &gt;        &lt;h3&gt;Guru Meditation:&lt;/h3&gt;<br>
&gt;&gt; &gt;        &lt;p&gt;XID: <a href="tel:2107225059" \
value="+12107225059">2107225059</a>&lt;/p&gt;<br> &gt;&gt; &gt;        &lt;hr&gt;<br>
&gt;&gt; &gt;        &lt;p&gt;Varnish cache server&lt;/p&gt;<br>
&gt;&gt; &gt;     &lt;/body&gt;<br>
&gt;&gt; &gt; &lt;/html&gt;<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; But if I do another GET on the healthcheck file from the varnish \
server<br> &gt;&gt; &gt; to<br>
&gt;&gt; &gt; another apache VHOST on the same server as the wiki site that \
responds<br> &gt;&gt; &gt; to<br>
&gt;&gt; &gt; the IP of the web server instead of the IP for the varnish server, \
the<br> &gt;&gt; &gt; GET<br>
&gt;&gt; &gt; works:<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; #GET <a href="http://ops1.example.com/healthcheck.php" rel="noreferrer" \
target="_blank">http://ops1.example.com/healthcheck.php</a><br> &gt;&gt; &gt; \
good<br> &gt;&gt; &gt;<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; So I&#39;m not sure why varnish is having trouble reaching the HC file. \
The<br> &gt;&gt; &gt; web<br>
&gt;&gt; &gt; server is a little far from the varnish server. The varnish machines \
are<br> &gt;&gt; &gt; in<br>
&gt;&gt; &gt; NYC and the web servers are in northern Virginia.<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; So I tried setting the timeouts in the varnish config to a really \
high<br> &gt;&gt; &gt; number. And that was working for a while. But today I noticed \
that it<br> &gt;&gt; &gt; stopped working. I&#39;ll have to restart the varnish \
service and browse the<br> &gt;&gt; &gt; site for a while. Then it&#39;ll stop \
working again and produce the 503<br> &gt;&gt; &gt; error.<br>
&gt;&gt; &gt; It&#39;s pretty annoying!<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; I was wondering if there might be something in my VCL I could tweak \
to<br> &gt;&gt; &gt; make<br>
&gt;&gt; &gt; this work? Or if the fact is that the web servers are simply too \
far<br> &gt;&gt; &gt; from<br>
&gt;&gt; &gt; varnish for this to be practical.<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; Here&#39;s my VCL file. It&#39;s pretty basic:<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; backend web1 {<br>
&gt;&gt; &gt;     .host = &quot;10.10.10.25&quot;;<br>
&gt;&gt; &gt;     .port = &quot;80&quot;;<br>
&gt;&gt; &gt;     .connect_timeout = 1200s;<br>
&gt;&gt; &gt;     .first_byte_timeout = 1200s;<br>
&gt;&gt; &gt;     .between_bytes_timeout = 1200s;<br>
&gt;&gt; &gt;     .max_connections = 70;<br>
&gt;&gt; &gt;     .probe = {<br>
&gt;&gt; &gt;     .request =<br>
&gt;&gt; &gt;      &quot;GET /healthcheck.php HTTP/1.1&quot;<br>
&gt;&gt; &gt;      &quot;Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>&quot;<br> &gt;&gt; &gt;      &quot;Connection: \
close&quot;;<br> &gt;&gt; &gt;      .interval = 10m;<br>
&gt;&gt; &gt;      .timeout = 60s;<br>
&gt;&gt; &gt;      .window = 3;<br>
&gt;&gt; &gt;      .threshold = 2;<br>
&gt;&gt; &gt;      }<br>
&gt;&gt; &gt; }<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; backend web2 {<br>
&gt;&gt; &gt;     .host = &quot;10.10.10.26&quot;;<br>
&gt;&gt; &gt;     .port = &quot;80&quot;;<br>
&gt;&gt; &gt;     .connect_timeout = 1200s;<br>
&gt;&gt; &gt;     .first_byte_timeout = 1200s;<br>
&gt;&gt; &gt;     .between_bytes_timeout = 1200s;<br>
&gt;&gt; &gt;     .max_connections = 70;<br>
&gt;&gt; &gt;     .probe = {<br>
&gt;&gt; &gt;     .request =<br>
&gt;&gt; &gt;      &quot;GET /healthcheck.php HTTP/1.1&quot;<br>
&gt;&gt; &gt;      &quot;Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>&quot;<br> &gt;&gt; &gt;      &quot;Connection: \
close&quot;;<br> &gt;&gt; &gt;      .interval = 10m;<br>
&gt;&gt; &gt;      .timeout = 60s;<br>
&gt;&gt; &gt;      .window = 3;<br>
&gt;&gt; &gt;      .threshold = 2;<br>
&gt;&gt; &gt;      }<br>
&gt;&gt; &gt; }<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; director www round-robin {<br>
&gt;&gt; &gt;     { .backend = web1;     }<br>
&gt;&gt; &gt;     { .backend = web2;   }<br>
&gt;&gt; &gt;   }<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; sub vcl_recv {<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt;        if (req.url ~ &quot;&amp;action=submit($|/)&quot;) {<br>
&gt;&gt; &gt;              return (pass);<br>
&gt;&gt; &gt;        }<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt;        set req.backend = www;<br>
&gt;&gt; &gt;        return (lookup);<br>
&gt;&gt; &gt; }<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; sub vcl_fetch {<br>
&gt;&gt; &gt;           set beresp.ttl = 3600s;<br>
&gt;&gt; &gt;           set beresp.grace = 4h;<br>
&gt;&gt; &gt;           return (deliver);<br>
&gt;&gt; &gt; }<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; sub vcl_deliver {<br>
&gt;&gt; &gt;         if (obj.hits&gt; 0) {<br>
&gt;&gt; &gt;           set resp.http.X-Cache = &quot;HIT&quot;;<br>
&gt;&gt; &gt;         } else {<br>
&gt;&gt; &gt;              set resp.http.X-Cache = &quot;MISS&quot;;<br>
&gt;&gt; &gt;         }<br>
&gt;&gt; &gt;   }<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; Thanks,<br>
&gt;&gt; &gt; Tim<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; --<br>
&gt;&gt; &gt; GPG me!!<br>
&gt;&gt; &gt;<br>
&gt;&gt; &gt; gpg --keyserver <a href="http://pool.sks-keyservers.net" \
rel="noreferrer" target="_blank">pool.sks-keyservers.net</a> --recv-keys F186197B<br> \
&gt;&gt; &gt;<br> &gt;&gt; &gt;<br>
&gt;&gt; &gt; _______________________________________________<br>
&gt;&gt; &gt; varnish-misc mailing list<br>
&gt;&gt; &gt; <a href="mailto:varnish-misc@varnish-cache.org">varnish-misc@varnish-cache.org</a><br>
 &gt;&gt; &gt; <a href="https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc" \
rel="noreferrer" target="_blank">https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc</a><br>
 &gt;<br>
&gt;<br>
&gt;<br>
&gt;<br>
&gt; --<br>
&gt; GPG me!!<br>
&gt;<br>
&gt; gpg --keyserver <a href="http://pool.sks-keyservers.net" rel="noreferrer" \
target="_blank">pool.sks-keyservers.net</a> --recv-keys F186197B<br> &gt;<br>
</div></div></blockquote></div><br><br clear="all"><br>-- <br><div \
class="gmail_signature">GPG me!!<br><br>gpg --keyserver <a \
href="http://pool.sks-keyservers.net" target="_blank">pool.sks-keyservers.net</a> \
--recv-keys F186197B<br><br></div> </div>

_______________________________________________
varnish-misc mailing list
varnish-misc@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc

[prev in list] [next in list] [prev in thread] [next in thread]