[prev in list] [next in list] [prev in thread] [next in thread]
List: varnish-misc
Subject: Re: 503 service unavailable error
From: Tim Dunphy <bluethundr () gmail ! com>
Date: 2015-07-09 20:50:36
Message-ID: CAOZy0ekBDhvra1xdR9Rz1J4r1fZbFSCGmy3kdVR+9rLj8VQu0w () mail ! gmail ! com
[Download RAW message or body]
[Attachment #2 (multipart/alternative)]
Hey Jason,
You're never specifying any auth in your probe:
>
> .probe = {
> .request =
> "GET /healthcheck.php HTTP/1.1"
> "Host: wiki.example.com"
> "Connection: close";
Yeah, understood. Actually when I mailed yesterday that was something I was
planning on doing. Not something I had done. But sometimes I'm not very
clear in explaining things.
At any rate, I was able to get the Basic Auth headers into my .probe
.request and the good news is it seems to have worked!!
This was the change that I made:
.request =
"GET /healthcheck.php HTTP/1.1"
"Host: wiki.jokefire.com"
"Authorization: Basic myBase64Hash=="
"Connection: close";
So after that change was made and I cycled varnish I literally NEVER got
the 503 error again. Just an occasional 504 that went away on a page
reload. But nothing serious. And even that could probably be done away with
some VCL tweaking.
So after that success I made some modifications to the VCL to make it work
a little better with mediawiki. Here's the current state of my VCL for
anyone that's interested.
backend web1 {
.host = "10.10.10.25";
.port = "80";
.connect_timeout = 3600s;
.first_byte_timeout = 3600s;
.between_bytes_timeout = 3600s;
.max_connections = 70;
.probe = {
.request =
"GET /healthcheck.php HTTP/1.1"
"Host: wiki.example.com"
"Authorization: Basic Base64Hash=="
"Connection: close";
.interval = 10m;
.timeout = 60s;
.window = 3;
.threshold = 2;
}
}
backend web2 {
.host = "10.10.10.26";
.port = "80";
.connect_timeout = 3600s;
.first_byte_timeout = 3600s;
.between_bytes_timeout = 3600s;
.max_connections = 70;
.probe = {
.request =
"GET /healthcheck.php HTTP/1.1"
"Host: wiki.example.com"
"Authorization: Basic Base64Hash=="
"Connection: close";
.interval = 10m;
.timeout = 60s;
.window = 3;
.threshold = 2;
}
}
director www round-robin {
{ .backend = web1; }
{ .backend = web2; }
}
# access control list for "purge": open to only localhost and other local
nodes
acl purge {
"127.0.0.1";
}
sub vcl_recv {
set req.http.host = regsub(req.http.host, "^www\.wiki\.example\.com$","
wiki.example.com");
# Serve objects up to 2 minutes past their expiry if the backend
# is slow to respond.
set req.grace = 120s;
if (! req.http.Authorization ~ "Basic myBase64Hash==")
{
error 401 "Restricted";
}
if (req.url ~ "&action=submit($|/)") {
return (pass);
}
if (req.restarts == 0) {
if (req.http.x-forwarded-for) {
set req.http.X-Forwarded-For = req.http.X-Forwarded-For +
", " + client.ip;
} else {
set req.http.X-Forwarded-For = client.ip;
}
}
set req.backend = www;
# This uses the ACL action called "purge". Basically if a request to
# PURGE the cache comes from anywhere other than localhost, ignore it.
if (req.request == "PURGE")
{if (!client.ip ~ purge)
{error 405 "Not allowed.";}
return(lookup);}
if (req.request != "GET" && req.request != "HEAD" &&
req.request != "PUT" && req.request != "POST" &&
req.request != "TRACE" && req.request != "OPTIONS" &&
req.request != "DELETE")
{return(pipe);} /* Non-RFC2616 or CONNECT which is weird. */
# Pass anything other than GET and HEAD directly.
if (req.request != "GET" && req.request != "HEAD")
{return(pass);} /* We only deal with GET and HEAD by default */
# Pass requests from logged-in users directly.
if (req.http.Authorization || req.http.Cookie)
{return(pass);} /* Not cacheable by default */
# Pass any requests with the "If-None-Match" header directly.
if (req.http.If-None-Match)
{return(pass);}
# normalize Accept-Encoding to reduce vary
if (req.http.Accept-Encoding) {
if (req.http.User-Agent ~ "MSIE 6") {
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
} else {
unset req.http.Accept-Encoding;
}
}
return (lookup);
}
sub vcl_pipe {
# Note that only the first request to the backend will have
# X-Forwarded-For set. If you use X-Forwarded-For and want to
# have it set for all requests, make sure to have:
# set req.http.connection = "close";
# This is otherwise not necessary if you do not do any request
rewriting.
set req.http.connection = "close";
}
# Called if the cache has a copy of the page.
sub vcl_hit {
if (req.request == "PURGE")
{ban_url(req.url);
error 200 "Purged";}
if (!obj.ttl > 0s)
{return(pass);}
}
# Called if the cache does not have a copy of the page.
sub vcl_miss {
if (req.request == "PURGE")
{error 200 "Not in cache";}
}
# Called after a document has been successfully retrieved from the backend.
sub vcl_fetch {
# set minimum timeouts to auto-discard stored objects
# set beresp.prefetch = -30s;
set beresp.grace = 120s;
if (beresp.ttl < 48h) {
set beresp.ttl = 48h;}
if (!beresp.ttl > 0s)
{return(hit_for_pass);}
if (beresp.http.Set-Cookie)
{return(hit_for_pass);}
#if (beresp.http.Cache-Control ~ "(private|no-cache|no-store)")
# {return(hit_for_pass);}
if (req.http.Authorization && !beresp.http.Cache-Control ~ "public")
{return(hit_for_pass);}
}
sub vcl_error {
if (obj.status == 401) {
set obj.http.Content-Type = "text/html; charset=utf-8";
set obj.http.WWW-Authenticate = "Basic realm=Secured";
synthetic {"
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "
http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
<HTML>
<HEAD>
<TITLE>Error</TITLE>
<META HTTP-EQUIV='Content-Type' CONTENT='text/html;'>
</HEAD>
<BODY><H1>401 Unauthorized (varnish)</H1></BODY>
</HTML>
"};
return (deliver);
}
}
sub vcl_deliver {
if (obj.hits> 0) {
set resp.http.X-Cache = "HIT";
} else {
set resp.http.X-Cache = "MISS";
}
}
Now, all that's left to do is to set those completely insane timeouts I've
been using to try and troubleshoot the problem to something a little more
reasonable.
Thanks for all the help!
Tim
On Thu, Jul 9, 2015 at 9:01 AM, Jason Price <japrice@gmail.com> wrote:
> You're never specifying any auth in your probe:
>
> .probe = {
> .request =
> "GET /healthcheck.php HTTP/1.1"
> "Host: wiki.example.com"
> "Connection: close";
>
> I don't know the proper way to specify it, but you'll need to play
> around with curl, wireshark and varnish probes until you get it right.
>
> May be easier to test with telnet invocations:
>
> telnet 10.10.10.26 80
> GET /healthcheck.php HTTP/1.1
> Host: wiki.example.com
> Authorization: Basic ???????????????
> Connection: close
>
>
> The above should give you an auth failure request. Twiddle with that
> until you get a successful authentication request, then translate it
> into the probe .request format. The link you provided gives you
> everything else you need.
>
> -Jason
>
> On Wed, Jul 8, 2015 at 11:19 PM, Tim Dunphy <bluethundr@gmail.com> wrote:
> >> that interval and window on your web server is scary..... what you're
> >> saying is 'check each web server every 10 minutes, and only fail it
> >> after 3 failures'
> >
> >
> > Hah!! Agreed. I was just trying to rule the connect timeouts out of the
> > picture as to why the failures were happening!
> > I plan to set them to more normal intervals once I'm finished testing and
> > I've been able to get this to work.
> >
> >>
> >>
> >> next time you see the issue, look at:
> >> varnishadm -n <varnish_name> debug.health
> >
> >
> > Hmm you may have a point as to the back ends. Varnish is indeed seeing
> them
> > as 'sick' when I encounter the 503 error:
> >
> >
> > [root@varnish1:~] #varnishadm -n varnish1 debug.health
> > Backend web1 is Sick
> > Current states good: 0 threshold: 2 window: 3
> > Average responsetime of good probes: 0.000000
> > Oldest Newest
> > ================================================================
> > ------------------------------------------------------4444444444 Good
> IPv4
> > ------------------------------------------------------XXXXXXXXXX Good
> Xmit
> > ------------------------------------------------------RRRRRRRRRR Good
> Recv
> > ----------------------------------------------------HH---------- Happy
> > Backend web2 is Sick
> > Current states good: 0 threshold: 2 window: 3
> > Average responsetime of good probes: 0.000000
> > Oldest Newest
> > ================================================================
> > ------------------------------------------------------4444444444 Good
> IPv4
> > ------------------------------------------------------XXXXXXXXXX Good
> Xmit
> > ------------------------------------------------------RRRRRRRRRR Good
> Recv
> > ----------------------------------------------------HH---------- Happy
> >
> >>
> >>
> >> I'd be willing to bet that varnish is just failing the backends. Try
> >> running the healthcheck manually from the varnish boxes:
> >> curl -H "Host:kiki.example.com" -v "http://10.10.10.26/healthcheck.php"
> >> And see if you're actually getting good healthchecks. If you're not,
> >> then you need to look at your backends (specifically healthcheck.php)
> >
> >
> > But if I perform the curl you're suggesting, I am able to retrieve the
> > healthcheck.php file!!
> >
> > #curl --user admin:somepass -H "Host:wiki.example.com" -v
> > "http://10.10.10.25/healthcheck.php"
> > * About to connect() to 52.5.117.61 port 80 (#0)
> > * Trying 52.5.117.61... connected
> > * Connected to 52.5.117.61 (52.5.117.61) port 80 (#0)
> > * Server auth using Basic with user 'admin'
> >> GET /healthcheck.php HTTP/1.1
> >> Authorization: Basic SomeBase64Hash==
> >> User-Agent: curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7
> >> NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2
> >> Accept: */*
> >> Host:wiki.example.com
> >>
> > < HTTP/1.1 200 OK
> > < Date: Thu, 09 Jul 2015 02:10:35 GMT
> > < Server: Apache/2.4.6 (CentOS) OpenSSL/1.0.1e-fips mod_fcgid/2.3.9
> > PHP/5.4.42 SVN/1.7.14 mod_wsgi/3.4 Python/2.7.5
> > < X-Powered-By: PHP/5.4.42
> > < Content-Length: 5
> > < Content-Type: text/html; charset=UTF-8
> > <
> > good
> > * Connection #0 to host 52.5.117.61 left intact
> > * Closing connection #0
> >
> > But in the curl I just did I was specifying the user auth. Which got me
> to
> > thinking, maybe I'm handing apache basic auth in the wrong way in my VCL
> > file?
> >
> > To test this idea out, I commented out the basic auth lines in my apache
> > config. Then cycled the services on both apache servers and both varnish
> > servers.
> >
> > When I ran the test you gave me again, this is the result I got back:
> >
> > #varnishadm -n varnish1 debug.health
> > Backend web1 is Healthy
> > Current states good: 3 threshold: 2 window: 3
> > Average responsetime of good probes: 0.032781
> > Oldest Newest
> > ================================================================
> > ---------------------------------------------------------------4 Good
> IPv4
> > ---------------------------------------------------------------X Good
> Xmit
> > ---------------------------------------------------------------R Good
> Recv
> > -------------------------------------------------------------HHH Happy
> > Backend web2 is Healthy
> > Current states good: 3 threshold: 2 window: 3
> > Average responsetime of good probes: 0.032889
> > Oldest Newest
> > ================================================================
> > ---------------------------------------------------------------4 Good
> IPv4
> > ---------------------------------------------------------------X Good
> Xmit
> > ---------------------------------------------------------------R Good
> Recv
> > -------------------------------------------------------------HHH Happy
> >
> > Everbody's happy again!!
> >
> > And I tried browsing around the wiki for quite a long time. And there
> were
> > NO 503 errors the entire time I was using it. Which tells me that I am,
> > indeed, not handling auth correctly in my VCL.
> >
> > The way I thought I solved the problem was by adding a .request to the
> web
> > server definitions that specified the headers to do a GET on the health
> > check:
> >
> > .request =
> > "GET /healthcheck.php HTTP/1.1"
> > "Host: wiki.example.com"
> > "Connection: close";
> >
> > The reason I thought this worked was because, after I'd restarted varnish
> > with that change in place I was able to log into the wiki with basic
> auth in
> > the web browser. And then I'd be able to use it for a while before the
> > back-end would come up as 'sick' in varnish again which would cause the
> 503
> > error.
> >
> > I then tried following this advice again, which I had also tried earlier
> > without much luck:
> >
> > http://blog.tenya.me/blog/2011/12/14/varnish-http-authentication/
> >
> > Which tells you to add this section to your VCL file:
> >
> > if (! req.http.Authorization ~ "Basic SomeBase64Hash==")
> > {
> > error 401 "Restricted";
> > }
> >
> > And then add this sub_vcl section:
> >
> > sub vcl_error {
> >
> > if (obj.status == 401) {
> > set obj.http.Content-Type = "text/html; charset=utf-8";
> > set obj.http.WWW-Authenticate = "Basic realm=Secured";
> > synthetic {"
> >
> > <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
> > "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
> >
> > <HTML>
> > <HEAD>
> > <TITLE>Error</TITLE>
> > <META HTTP-EQUIV='Content-Type' CONTENT='text/html;'>
> > </HEAD>
> > <BODY><H1>401 Unauthorized (varnish)</H1></BODY>
> > </HTML>
> > "};
> > return (deliver);
> > }
> > }
> >
> > And after restarting varnish again on both nodes, with authentication in
> > place in the VHOST configs on the web servers I was able to log into the
> > wiki site again and browse around for a while.
> >
> > But then after some browsing around the back ends would go sick again and
> > you would see the 503:
> >
> > #varnishadm -n varnish1 debug.health
> > Backend web1 is Sick
> > Current states good: 1 threshold: 2 window: 3
> > Average responsetime of good probes: 0.000000
> > Oldest Newest
> > ================================================================
> > --------------------------------------------------------------44 Good
> IPv4
> > --------------------------------------------------------------XX Good
> Xmit
> > --------------------------------------------------------------RR Good
> Recv
> > ------------------------------------------------------------HH-- Happy
> > Backend web2 is Sick
> > Current states good: 1 threshold: 2 window: 3
> > Average responsetime of good probes: 0.000000
> > Oldest Newest
> > ================================================================
> > --------------------------------------------------------------44 Good
> IPv4
> > --------------------------------------------------------------XX Good
> Xmit
> > --------------------------------------------------------------RR Good
> Recv
> > ------------------------------------------------------------HH-- Happy
> >
> > So SOMETHING must still be off with how I'm handling authentication in my
> > VCL config. The next step I'm thinking of trying involves passing the
> > authentication headers to the .request section of my web server
> definition.
> > Although I'm not sure if it'll work. I'll let you guys know if it does.
> >
> > But I'd like to present the current state of my VLC again in case anyone
> has
> > any insight or knowledge to share that may help.
> >
> > backend web1 {
> >
> > .host = "10.10.10.25";
> >
> > .port = "80";
> >
> > .connect_timeout = 3600s;
> >
> > .first_byte_timeout = 3600s;
> >
> > .between_bytes_timeout = 3600s;
> >
> > .max_connections = 70;
> >
> > .probe = {
> >
> > .request =
> >
> > "GET /healthcheck.php HTTP/1.1"
> >
> > "Host: wiki.example.com"
> >
> > "Connection: close";
> >
> > .interval = 10m;
> >
> > .timeout = 60s;
> >
> > .window = 3;
> >
> > .threshold = 2;
> >
> > }
> >
> > }
> >
> > backend web2 {
> >
> > .host = "10.10.10.26";
> >
> > .port = "80";
> >
> > .connect_timeout = 3600s;
> >
> > .first_byte_timeout = 3600s;
> >
> > .between_bytes_timeout = 3600s;
> >
> > .max_connections = 70;
> >
> > .probe = {
> >
> > .request =
> >
> > "GET /healthcheck.php HTTP/1.1"
> >
> > "Host: wiki.example.com"
> >
> > "Connection: close";
> >
> > .interval = 10m;
> >
> > .timeout = 60s;
> >
> > .window = 3;
> >
> > .threshold = 2;
> >
> > }
> >
> > }
> >
> > director www round-robin {
> >
> > { .backend = web1; }
> >
> > { .backend = web2; }
> >
> > }
> >
> > sub vcl_recv {
> >
> > if (! req.http.Authorization ~ "Basic Base64Hash==")
> >
> > {
> >
> > error 401 "Restricted";
> >
> > }
> >
> > if (req.url ~ "&action=submit($|/)") {
> >
> > return (pass);
> >
> > }
> >
> > set req.backend = www;
> >
> > return (lookup);
> >
> > }
> >
> > sub vcl_fetch {
> >
> > set beresp.ttl = 3600s;
> >
> > set beresp.grace = 4h;
> >
> > return (deliver);
> >
> > }
> >
> > sub vcl_error {
> >
> > if (obj.status == 401) {
> >
> > set obj.http.Content-Type = "text/html; charset=utf-8";
> >
> > set obj.http.WWW-Authenticate = "Basic realm=Secured";
> >
> > synthetic {"
> >
> >
> > <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
> > "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
> >
> >
> > <HTML>
> >
> > <HEAD>
> >
> > <TITLE>Error</TITLE>
> >
> > <META HTTP-EQUIV='Content-Type' CONTENT='text/html;'>
> >
> > </HEAD>
> >
> > <BODY><H1>401 Unauthorized (varnish)</H1></BODY>
> >
> > </HTML>
> >
> > "};
> >
> > return (deliver);
> >
> > }
> >
> > }
> >
> > sub vcl_deliver {
> >
> > if (obj.hits> 0) {
> >
> > set resp.http.X-Cache = "HIT";
> >
> > } else {
> >
> > set resp.http.X-Cache = "MISS";
> >
> > }
> >
> > }
> >
> > Once again I genuinely appreciate the help of this list, and hope I
> haven't
> > worn out my welcome! ;)
> >
> > Thanks,
> > Tim
> >
> >
> > On Wed, Jul 8, 2015 at 9:31 PM, Jason Price <japrice@gmail.com> wrote:
> >>
> >> that interval and window on your web server is scary..... what you're
> >> saying is 'check each web server every 10 minutes, and only fail it
> >> after 3 failures'
> >>
> >> next time you see the issue, look at:
> >>
> >> varnishadm -n <varnish_name> debug.health
> >>
> >> I'd be willing to bet that varnish is just failing the backends. Try
> >> running the healthcheck manually from the varnish boxes:
> >>
> >> curl -H "Host:kiki.example.com" -v "http://10.10.10.26/healthcheck.php"
> >>
> >> And see if you're actually getting good healthchecks. If you're not,
> >> then you need to look at your backends (specifically healthcheck.php)
> >>
> >> On Wed, Jul 8, 2015 at 12:14 PM, Tim Dunphy <bluethundr@gmail.com>
> wrote:
> >> > Hi guys,
> >> >
> >> >
> >> > I'm having an issue where my varnish server will stop working after a
> >> > while
> >> > of browsing around the site I'm using it with and throw a 503 server
> >> > unavailable error.
> >> >
> >> > In my varnish logs I'm getting a 'no backend connection error':
> >> >
> >> > 10 FetchError c no backend connection
> >> > 10 VCL_call c error deliver
> >> > 10 VCL_call c deliver deliver
> >> > 10 TxProtocol c HTTP/1.1
> >> > 10 TxStatus c 503
> >> > 10 TxResponse c Service Unavailable
> >> > 10 TxHeader c Server: Varnish
> >> >
> >> >
> >> > And if I do a GET on the healthcheck from the command line on the
> >> > varnish
> >> > server, I get a 503 response from varnish:
> >> >
> >> > #GET http://wiki.example.com/healthcheck.php
> >> >
> >> > <?xml version="1.0" encoding="utf-8"?>
> >> > <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
> >> > "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
> >> > <html>
> >> > <head>
> >> > <title>503 Service Unavailable</title>
> >> > </head>
> >> > <body>
> >> > <h1>Error 503 Service Unavailable</h1>
> >> > <p>Service Unavailable</p>
> >> > <h3>Guru Meditation:</h3>
> >> > <p>XID: 2107225059</p>
> >> > <hr>
> >> > <p>Varnish cache server</p>
> >> > </body>
> >> > </html>
> >> >
> >> > But if I do another GET on the healthcheck file from the varnish
> server
> >> > to
> >> > another apache VHOST on the same server as the wiki site that responds
> >> > to
> >> > the IP of the web server instead of the IP for the varnish server, the
> >> > GET
> >> > works:
> >> >
> >> > #GET http://ops1.example.com/healthcheck.php
> >> > good
> >> >
> >> >
> >> > So I'm not sure why varnish is having trouble reaching the HC file.
> The
> >> > web
> >> > server is a little far from the varnish server. The varnish machines
> are
> >> > in
> >> > NYC and the web servers are in northern Virginia.
> >> >
> >> > So I tried setting the timeouts in the varnish config to a really high
> >> > number. And that was working for a while. But today I noticed that it
> >> > stopped working. I'll have to restart the varnish service and browse
> the
> >> > site for a while. Then it'll stop working again and produce the 503
> >> > error.
> >> > It's pretty annoying!
> >> >
> >> > I was wondering if there might be something in my VCL I could tweak to
> >> > make
> >> > this work? Or if the fact is that the web servers are simply too far
> >> > from
> >> > varnish for this to be practical.
> >> >
> >> > Here's my VCL file. It's pretty basic:
> >> >
> >> > backend web1 {
> >> > .host = "10.10.10.25";
> >> > .port = "80";
> >> > .connect_timeout = 1200s;
> >> > .first_byte_timeout = 1200s;
> >> > .between_bytes_timeout = 1200s;
> >> > .max_connections = 70;
> >> > .probe = {
> >> > .request =
> >> > "GET /healthcheck.php HTTP/1.1"
> >> > "Host: wiki.example.com"
> >> > "Connection: close";
> >> > .interval = 10m;
> >> > .timeout = 60s;
> >> > .window = 3;
> >> > .threshold = 2;
> >> > }
> >> > }
> >> >
> >> > backend web2 {
> >> > .host = "10.10.10.26";
> >> > .port = "80";
> >> > .connect_timeout = 1200s;
> >> > .first_byte_timeout = 1200s;
> >> > .between_bytes_timeout = 1200s;
> >> > .max_connections = 70;
> >> > .probe = {
> >> > .request =
> >> > "GET /healthcheck.php HTTP/1.1"
> >> > "Host: wiki.example.com"
> >> > "Connection: close";
> >> > .interval = 10m;
> >> > .timeout = 60s;
> >> > .window = 3;
> >> > .threshold = 2;
> >> > }
> >> > }
> >> >
> >> > director www round-robin {
> >> > { .backend = web1; }
> >> > { .backend = web2; }
> >> > }
> >> >
> >> > sub vcl_recv {
> >> >
> >> > if (req.url ~ "&action=submit($|/)") {
> >> > return (pass);
> >> > }
> >> >
> >> > set req.backend = www;
> >> > return (lookup);
> >> > }
> >> >
> >> > sub vcl_fetch {
> >> > set beresp.ttl = 3600s;
> >> > set beresp.grace = 4h;
> >> > return (deliver);
> >> > }
> >> >
> >> >
> >> > sub vcl_deliver {
> >> > if (obj.hits> 0) {
> >> > set resp.http.X-Cache = "HIT";
> >> > } else {
> >> > set resp.http.X-Cache = "MISS";
> >> > }
> >> > }
> >> >
> >> > Thanks,
> >> > Tim
> >> >
> >> >
> >> >
> >> > --
> >> > GPG me!!
> >> >
> >> > gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B
> >> >
> >> >
> >> > _______________________________________________
> >> > varnish-misc mailing list
> >> > varnish-misc@varnish-cache.org
> >> > https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
> >
> >
> >
> >
> > --
> > GPG me!!
> >
> > gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B
> >
>
--
GPG me!!
gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B
[Attachment #5 (text/html)]
<div dir="ltr">Hey Jason,<div><br></div><div><blockquote style="margin:0px 0px 0px \
0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex" \
class="gmail_quote"><span style="font-size:12.8px">You're never specifying any \
auth in your probe:</span><br style="font-size:12.8px"><span \
style="font-size:12.8px"></span><br><span style="font-size:12.8px"> .probe = \
{</span><br><span style="font-size:12.8px"> .request =</span><br><span \
style="font-size:12.8px"> "GET /healthcheck.php \
HTTP/1.1"</span><br><span style="font-size:12.8px"> "Host: <a \
href="http://wiki.example.com/" rel="noreferrer" \
target="_blank">wiki.example.com</a>"</span><br><span style="font-size:12.8px"> \
"Connection: close";</span></blockquote><div><br></div><div>Yeah, \
understood. Actually when I mailed yesterday that was something I was planning on \
doing. Not something I had done. But sometimes I'm not very clear in explaining \
things. <br><br></div><div>At any rate, I was able to get the Basic Auth headers into \
my .probe .request and the good news is it seems to have \
worked!!<br><br></div><div>This was the change that I made:<br><br> .request =<br> \
"GET /healthcheck.php HTTP/1.1"<br> "Host: <a \
href="http://wiki.jokefire.com">wiki.jokefire.com</a>"<br> \
"Authorization: Basic myBase64Hash=="<br> "Connection: \
close";<br></div><br><br></div><div>So after that change was made and I cycled \
varnish I literally NEVER got the 503 error again. Just an occasional 504 that went \
away on a page reload. But nothing serious. And even that could probably be done away \
with some VCL tweaking. <br><br></div><div>So after that success I made some \
modifications to the VCL to make it work a little better with mediawiki. Here's \
the current state of my VCL for anyone that's interested. <br><br><br>backend \
web1 {<br> .host = "10.10.10.25";<br> .port = "80";<br> \
.connect_timeout = 3600s;<br> .first_byte_timeout = 3600s;<br> \
.between_bytes_timeout = 3600s;<br> .max_connections = 70;<br> .probe = {<br> \
.request =<br> "GET /healthcheck.php HTTP/1.1"<br> "Host: <a \
href="http://wiki.example.com">wiki.example.com</a>"<br> \
"Authorization: Basic Base64Hash=="<br> "Connection: \
close";<br> .interval = 10m;<br> .timeout = 60s;<br> .window = \
3;<br> .threshold = 2;<br> }<br>}<br><br>backend web2 {<br> .host = \
"10.10.10.26";<br> .port = "80";<br> .connect_timeout = 3600s;<br> \
.first_byte_timeout = 3600s;<br> .between_bytes_timeout = 3600s;<br> \
.max_connections = 70;<br> .probe = {<br> .request =<br> "GET \
/healthcheck.php HTTP/1.1"<br> "Host: <a \
href="http://wiki.example.com">wiki.example.com</a>"<br> \
"Authorization: Basic Base64Hash=="<br> "Connection: \
close";<br> .interval = 10m;<br> .timeout = 60s;<br> .window = \
3;<br> .threshold = 2;<br> }<br>}<br><br><br>director www round-robin {<br> \
{ .backend = web1; }<br> { .backend = web2; }<br> }<br><br># access control \
list for "purge": open to only localhost and other local nodes<br>acl purge \
{<br> "127.0.0.1";<br>}<br><br>sub vcl_recv {<br><br><br> set \
req.http.host = regsub(req.http.host, "^www\.wiki\.example\.com$","<a \
href="http://wiki.example.com">wiki.example.com</a>");<br><br> # Serve \
objects up to 2 minutes past their expiry if the backend<br> # is slow to \
respond.<br> set req.grace = 120s;<br><br> if (! req.http.Authorization ~ \
"Basic myBase64Hash==")<br> {<br> error 401 \
"Restricted";<br> }<br><br> if (req.url ~ \
"&action=submit($|/)") {<br> return (pass);<br> \
}<br><br> if (req.restarts == 0) {<br> if \
(req.http.x-forwarded-for) {<br> set \
req.http.X-Forwarded-For = req.http.X-Forwarded-For + ", " + client.ip;<br> \
} else {<br> set req.http.X-Forwarded-For = \
client.ip;<br> }<br> }<br><br> set req.backend = \
www;<br><br> # This uses the ACL action called "purge". Basically if a \
request to<br> # PURGE the cache comes from anywhere other than localhost, ignore \
it.<br> if (req.request == "PURGE")<br> {if (!client.ip \
~ purge)<br> {error 405 "Not allowed.";}<br> \
return(lookup);}<br><br> if (req.request != "GET" && \
req.request != "HEAD" &&<br> req.request != \
"PUT" && req.request != "POST" &&<br> \
req.request != "TRACE" && req.request != "OPTIONS" \
&&<br> req.request != "DELETE")<br> \
{return(pipe);} /* Non-RFC2616 or CONNECT which is weird. */<br><br><br> \
# Pass anything other than GET and HEAD directly.<br> if (req.request != \
"GET" && req.request != "HEAD")<br> \
{return(pass);} /* We only deal with GET and HEAD by default */<br><br> \
# Pass requests from logged-in users directly.<br> if (req.http.Authorization \
|| req.http.Cookie)<br> {return(pass);} /* Not cacheable by \
default */<br><br> # Pass any requests with the "If-None-Match" \
header directly.<br> if (req.http.If-None-Match)<br> \
{return(pass);}<br><br> # normalize Accept-Encoding to reduce vary<br> if \
(req.http.Accept-Encoding) {<br> if (req.http.User-Agent ~ "MSIE \
6") {<br> unset req.http.Accept-Encoding;<br> } \
elsif (req.http.Accept-Encoding ~ "gzip") {<br> set \
req.http.Accept-Encoding = "gzip";<br> } elsif \
(req.http.Accept-Encoding ~ "deflate") {<br> set \
req.http.Accept-Encoding = "deflate";<br> } else {<br> \
unset req.http.Accept-Encoding;<br> }<br> }<br><br> return \
(lookup);<br>}<br><br>sub vcl_pipe {<br> # Note that only the first \
request to the backend will have<br> # X-Forwarded-For set. If you use \
X-Forwarded-For and want to<br> # have it set for all requests, make sure \
to have:<br> # set req.http.connection = "close";<br><br> \
# This is otherwise not necessary if you do not do any request rewriting.<br> \
set req.http.connection = "close";<br>}<br><br># Called if the cache has a \
copy of the page.<br>sub vcl_hit {<br> if (req.request == \
"PURGE")<br> {ban_url(req.url);<br> \
error 200 "Purged";}<br><br> if (!obj.ttl > 0s)<br> \
{return(pass);}<br>}<br><br><br># Called if the cache does not have a copy of the \
page.<br>sub vcl_miss {<br> if (req.request == "PURGE")<br> \
{error 200 "Not in cache";}<br>}<br><br># Called after a document has been \
successfully retrieved from the backend.<br>sub vcl_fetch {<br> # set \
minimum timeouts to auto-discard stored objects<br> # set \
beresp.prefetch = -30s;<br> set beresp.grace = 120s;<br><br> \
if (beresp.ttl < 48h) {<br> set beresp.ttl = 48h;}<br><br> \
if (!beresp.ttl > 0s)<br> {return(hit_for_pass);}<br><br> \
if (beresp.http.Set-Cookie)<br> {return(hit_for_pass);}<br> \
#if (beresp.http.Cache-Control ~ "(private|no-cache|no-store)")<br> \
# {return(hit_for_pass);}<br> if \
(req.http.Authorization && !beresp.http.Cache-Control ~ \
"public")<br> \
{return(hit_for_pass);}<br><br>}<br><br>sub vcl_error {<br><br> if (obj.status == \
401) {<br> set obj.http.Content-Type = "text/html; charset=utf-8";<br> \
set obj.http.WWW-Authenticate = "Basic realm=Secured";<br> synthetic \
{"<br><br> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 \
Transitional//EN" "<a \
href="http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd</a>"><br><br> \
<HTML><br> <HEAD><br> <TITLE>Error</TITLE><br> \
<META HTTP-EQUIV='Content-Type' CONTENT='text/html;'><br> \
</HEAD><br> <BODY><H1>401 Unauthorized \
(varnish)</H1></BODY><br> </HTML><br> "};<br> \
return (deliver);<br> }<br>}<br><br>sub vcl_deliver {<br> if \
(obj.hits> 0) {<br> set resp.http.X-Cache = "HIT";<br> \
} else {<br> set resp.http.X-Cache = "MISS";<br> \
}<br> }<br><br></div><div>Now, all that's left to do is to set those completely \
insane timeouts I've been using to try and troubleshoot the problem to something \
a little more reasonable. <br><br></div><div>Thanks for all the \
help!<br><br></div><div>Tim<br></div></div><div class="gmail_extra"><br><div \
class="gmail_quote">On Thu, Jul 9, 2015 at 9:01 AM, Jason Price <span \
dir="ltr"><<a href="mailto:japrice@gmail.com" \
target="_blank">japrice@gmail.com</a>></span> wrote:<br><blockquote \
class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc \
solid;padding-left:1ex">You're never specifying any auth in your probe:<br> <span \
class=""><br>
.probe = {<br>
.request =<br>
"GET /healthcheck.php HTTP/1.1"<br>
"Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>"<br> "Connection: close";<br>
<br>
</span>I don't know the proper way to specify it, but you'll need to play<br>
around with curl, wireshark and varnish probes until you get it right.<br>
<br>
May be easier to test with telnet invocations:<br>
<br>
telnet 10.10.10.26 80<br>
<span class="">GET /healthcheck.php HTTP/1.1<br>
Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a><br> </span>Authorization: Basic \
???????????????<br>
Connection: close<br>
<br>
<br>
The above should give you an auth failure request. Twiddle with that<br>
until you get a successful authentication request, then translate it<br>
into the probe .request format. The link you provided gives you<br>
everything else you need.<br>
<span class="HOEnZb"><font color="#888888"><br>
-Jason<br>
</font></span><div class="HOEnZb"><div class="h5"><br>
On Wed, Jul 8, 2015 at 11:19 PM, Tim Dunphy <<a \
href="mailto:bluethundr@gmail.com">bluethundr@gmail.com</a>> wrote:<br> >> \
that interval and window on your web server is scary..... what you're<br> \
>> saying is 'check each web server every 10 minutes, and only fail it<br> \
>> after 3 failures'<br> ><br>
><br>
> Hah!! Agreed. I was just trying to rule the connect timeouts out of the<br>
> picture as to why the failures were happening!<br>
> I plan to set them to more normal intervals once I'm finished testing \
and<br> > I've been able to get this to work.<br>
><br>
>><br>
>><br>
>> next time you see the issue, look at:<br>
>> varnishadm -n <varnish_name> debug.health<br>
><br>
><br>
> Hmm you may have a point as to the back ends. Varnish is indeed seeing them<br>
> as 'sick' when I encounter the 503 error:<br>
><br>
><br>
> [root@varnish1:~] #varnishadm -n varnish1 debug.health<br>
> Backend web1 is Sick<br>
> Current states good: 0 threshold: 2 window: 3<br>
> Average responsetime of good probes: 0.000000<br>
> Oldest \
Newest<br> > ================================================================<br>
> ------------------------------------------------------4444444444 Good IPv4<br>
> ------------------------------------------------------XXXXXXXXXX Good Xmit<br>
> ------------------------------------------------------RRRRRRRRRR Good Recv<br>
> ----------------------------------------------------HH---------- Happy<br>
> Backend web2 is Sick<br>
> Current states good: 0 threshold: 2 window: 3<br>
> Average responsetime of good probes: 0.000000<br>
> Oldest \
Newest<br> > ================================================================<br>
> ------------------------------------------------------4444444444 Good IPv4<br>
> ------------------------------------------------------XXXXXXXXXX Good Xmit<br>
> ------------------------------------------------------RRRRRRRRRR Good Recv<br>
> ----------------------------------------------------HH---------- Happy<br>
><br>
>><br>
>><br>
>> I'd be willing to bet that varnish is just failing the backends. \
Try<br> >> running the healthcheck manually from the varnish boxes:<br>
>> curl -H "Host:<a href="http://kiki.example.com" rel="noreferrer" \
target="_blank">kiki.example.com</a>" -v "<a \
href="http://10.10.10.26/healthcheck.php" rel="noreferrer" \
target="_blank">http://10.10.10.26/healthcheck.php</a>"<br> >> And see if \
you're actually getting good healthchecks. If you're not,<br> >> then \
you need to look at your backends (specifically healthcheck.php)<br> ><br>
><br>
> But if I perform the curl you're suggesting, I am able to retrieve the<br>
> healthcheck.php file!!<br>
><br>
> #curl --user admin:somepass -H "Host:<a href="http://wiki.example.com" \
rel="noreferrer" target="_blank">wiki.example.com</a>" -v<br> > "<a \
href="http://10.10.10.25/healthcheck.php" rel="noreferrer" \
target="_blank">http://10.10.10.25/healthcheck.php</a>"<br> > * About to \
connect() to 52.5.117.61 port 80 (#0)<br> > * Trying 52.5.117.61... \
connected<br> > * Connected to 52.5.117.61 (52.5.117.61) port 80 (#0)<br>
> * Server auth using Basic with user 'admin'<br>
>> GET /healthcheck.php HTTP/1.1<br>
>> Authorization: Basic SomeBase64Hash==<br>
>> User-Agent: curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7<br>
>> NSS/<a href="http://3.14.0.0" rel="noreferrer" target="_blank">3.14.0.0</a> \
zlib/1.2.3 libidn/1.18 libssh2/1.4.2<br> >> Accept: */*<br>
>> Host:<a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a><br> >><br>
> < HTTP/1.1 200 OK<br>
> < Date: Thu, 09 Jul 2015 02:10:35 GMT<br>
> < Server: Apache/2.4.6 (CentOS) OpenSSL/1.0.1e-fips mod_fcgid/2.3.9<br>
> PHP/5.4.42 SVN/1.7.14 mod_wsgi/3.4 Python/2.7.5<br>
> < X-Powered-By: PHP/5.4.42<br>
> < Content-Length: 5<br>
> < Content-Type: text/html; charset=UTF-8<br>
> <<br>
> good<br>
> * Connection #0 to host 52.5.117.61 left intact<br>
> * Closing connection #0<br>
><br>
> But in the curl I just did I was specifying the user auth. Which got me to<br>
> thinking, maybe I'm handing apache basic auth in the wrong way in my VCL<br>
> file?<br>
><br>
> To test this idea out, I commented out the basic auth lines in my apache<br>
> config. Then cycled the services on both apache servers and both varnish<br>
> servers.<br>
><br>
> When I ran the test you gave me again, this is the result I got back:<br>
><br>
> #varnishadm -n varnish1 debug.health<br>
> Backend web1 is Healthy<br>
> Current states good: 3 threshold: 2 window: 3<br>
> Average responsetime of good probes: 0.032781<br>
> Oldest \
Newest<br> > ================================================================<br>
> ---------------------------------------------------------------4 Good IPv4<br>
> ---------------------------------------------------------------X Good Xmit<br>
> ---------------------------------------------------------------R Good Recv<br>
> -------------------------------------------------------------HHH Happy<br>
> Backend web2 is Healthy<br>
> Current states good: 3 threshold: 2 window: 3<br>
> Average responsetime of good probes: 0.032889<br>
> Oldest \
Newest<br> > ================================================================<br>
> ---------------------------------------------------------------4 Good IPv4<br>
> ---------------------------------------------------------------X Good Xmit<br>
> ---------------------------------------------------------------R Good Recv<br>
> -------------------------------------------------------------HHH Happy<br>
><br>
> Everbody's happy again!!<br>
><br>
> And I tried browsing around the wiki for quite a long time. And there were<br>
> NO 503 errors the entire time I was using it. Which tells me that I am,<br>
> indeed, not handling auth correctly in my VCL.<br>
><br>
> The way I thought I solved the problem was by adding a .request to the web<br>
> server definitions that specified the headers to do a GET on the health<br>
> check:<br>
><br>
> .request =<br>
> "GET /healthcheck.php HTTP/1.1"<br>
> "Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>"<br> > "Connection: \
close";<br> ><br>
> The reason I thought this worked was because, after I'd restarted \
varnish<br> > with that change in place I was able to log into the wiki with basic \
auth in<br> > the web browser. And then I'd be able to use it for a while \
before the<br> > back-end would come up as 'sick' in varnish again which \
would cause the 503<br> > error.<br>
><br>
> I then tried following this advice again, which I had also tried earlier<br>
> without much luck:<br>
><br>
> <a href="http://blog.tenya.me/blog/2011/12/14/varnish-http-authentication/" \
rel="noreferrer" target="_blank">http://blog.tenya.me/blog/2011/12/14/varnish-http-authentication/</a><br>
><br>
> Which tells you to add this section to your VCL file:<br>
><br>
> if (! req.http.Authorization ~ "Basic SomeBase64Hash==")<br>
> {<br>
> error 401 "Restricted";<br>
> }<br>
><br>
> And then add this sub_vcl section:<br>
><br>
> sub vcl_error {<br>
><br>
> if (obj.status == 401) {<br>
> set obj.http.Content-Type = "text/html; charset=utf-8";<br>
> set obj.http.WWW-Authenticate = "Basic realm=Secured";<br>
> synthetic {"<br>
><br>
> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 \
Transitional//EN"<br> > "<a \
href="http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd" rel="noreferrer" \
target="_blank">http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd</a>"><br>
><br>
> <HTML><br>
> <HEAD><br>
> <TITLE>Error</TITLE><br>
> <META HTTP-EQUIV='Content-Type' \
CONTENT='text/html;'><br> > </HEAD><br>
> <BODY><H1>401 Unauthorized \
(varnish)</H1></BODY><br> > </HTML><br>
> "};<br>
> return (deliver);<br>
> }<br>
> }<br>
><br>
> And after restarting varnish again on both nodes, with authentication in<br>
> place in the VHOST configs on the web servers I was able to log into the<br>
> wiki site again and browse around for a while.<br>
><br>
> But then after some browsing around the back ends would go sick again and<br>
> you would see the 503:<br>
><br>
> #varnishadm -n varnish1 debug.health<br>
> Backend web1 is Sick<br>
> Current states good: 1 threshold: 2 window: 3<br>
> Average responsetime of good probes: 0.000000<br>
> Oldest \
Newest<br> > ================================================================<br>
> --------------------------------------------------------------44 Good IPv4<br>
> --------------------------------------------------------------XX Good Xmit<br>
> --------------------------------------------------------------RR Good Recv<br>
> ------------------------------------------------------------HH-- Happy<br>
> Backend web2 is Sick<br>
> Current states good: 1 threshold: 2 window: 3<br>
> Average responsetime of good probes: 0.000000<br>
> Oldest \
Newest<br> > ================================================================<br>
> --------------------------------------------------------------44 Good IPv4<br>
> --------------------------------------------------------------XX Good Xmit<br>
> --------------------------------------------------------------RR Good Recv<br>
> ------------------------------------------------------------HH-- Happy<br>
><br>
> So SOMETHING must still be off with how I'm handling authentication in \
my<br> > VCL config. The next step I'm thinking of trying involves passing \
the<br> > authentication headers to the .request section of my web server \
definition.<br> > Although I'm not sure if it'll work. I'll let you \
guys know if it does.<br> ><br>
> But I'd like to present the current state of my VLC again in case anyone \
has<br> > any insight or knowledge to share that may help.<br>
><br>
> backend web1 {<br>
><br>
> .host = "10.10.10.25";<br>
><br>
> .port = "80";<br>
><br>
> .connect_timeout = 3600s;<br>
><br>
> .first_byte_timeout = 3600s;<br>
><br>
> .between_bytes_timeout = 3600s;<br>
><br>
> .max_connections = 70;<br>
><br>
> .probe = {<br>
><br>
> .request =<br>
><br>
> "GET /healthcheck.php HTTP/1.1"<br>
><br>
> "Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>"<br> ><br>
> "Connection: close";<br>
><br>
> .interval = 10m;<br>
><br>
> .timeout = 60s;<br>
><br>
> .window = 3;<br>
><br>
> .threshold = 2;<br>
><br>
> }<br>
><br>
> }<br>
><br>
> backend web2 {<br>
><br>
> .host = "10.10.10.26";<br>
><br>
> .port = "80";<br>
><br>
> .connect_timeout = 3600s;<br>
><br>
> .first_byte_timeout = 3600s;<br>
><br>
> .between_bytes_timeout = 3600s;<br>
><br>
> .max_connections = 70;<br>
><br>
> .probe = {<br>
><br>
> .request =<br>
><br>
> "GET /healthcheck.php HTTP/1.1"<br>
><br>
> "Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>"<br> ><br>
> "Connection: close";<br>
><br>
> .interval = 10m;<br>
><br>
> .timeout = 60s;<br>
><br>
> .window = 3;<br>
><br>
> .threshold = 2;<br>
><br>
> }<br>
><br>
> }<br>
><br>
> director www round-robin {<br>
><br>
> { .backend = web1; }<br>
><br>
> { .backend = web2; }<br>
><br>
> }<br>
><br>
> sub vcl_recv {<br>
><br>
> if (! req.http.Authorization ~ "Basic Base64Hash==")<br>
><br>
> {<br>
><br>
> error 401 "Restricted";<br>
><br>
> }<br>
><br>
> if (req.url ~ "&action=submit($|/)") {<br>
><br>
> return (pass);<br>
><br>
> }<br>
><br>
> set req.backend = www;<br>
><br>
> return (lookup);<br>
><br>
> }<br>
><br>
> sub vcl_fetch {<br>
><br>
> set beresp.ttl = 3600s;<br>
><br>
> set beresp.grace = 4h;<br>
><br>
> return (deliver);<br>
><br>
> }<br>
><br>
> sub vcl_error {<br>
><br>
> if (obj.status == 401) {<br>
><br>
> set obj.http.Content-Type = "text/html; charset=utf-8";<br>
><br>
> set obj.http.WWW-Authenticate = "Basic realm=Secured";<br>
><br>
> synthetic {"<br>
><br>
><br>
> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 \
Transitional//EN"<br> > "<a \
href="http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd" rel="noreferrer" \
target="_blank">http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd</a>"><br>
><br>
><br>
> <HTML><br>
><br>
> <HEAD><br>
><br>
> <TITLE>Error</TITLE><br>
><br>
> <META HTTP-EQUIV='Content-Type' \
CONTENT='text/html;'><br> ><br>
> </HEAD><br>
><br>
> <BODY><H1>401 Unauthorized \
(varnish)</H1></BODY><br> ><br>
> </HTML><br>
><br>
> "};<br>
><br>
> return (deliver);<br>
><br>
> }<br>
><br>
> }<br>
><br>
> sub vcl_deliver {<br>
><br>
> if (obj.hits> 0) {<br>
><br>
> set resp.http.X-Cache = "HIT";<br>
><br>
> } else {<br>
><br>
> set resp.http.X-Cache = "MISS";<br>
><br>
> }<br>
><br>
> }<br>
><br>
> Once again I genuinely appreciate the help of this list, and hope I \
haven't<br> > worn out my welcome! ;)<br>
><br>
> Thanks,<br>
> Tim<br>
><br>
><br>
> On Wed, Jul 8, 2015 at 9:31 PM, Jason Price <<a \
href="mailto:japrice@gmail.com">japrice@gmail.com</a>> wrote:<br> >><br>
>> that interval and window on your web server is scary..... what \
you're<br> >> saying is 'check each web server every 10 minutes, and \
only fail it<br> >> after 3 failures'<br>
>><br>
>> next time you see the issue, look at:<br>
>><br>
>> varnishadm -n <varnish_name> debug.health<br>
>><br>
>> I'd be willing to bet that varnish is just failing the backends. \
Try<br> >> running the healthcheck manually from the varnish boxes:<br>
>><br>
>> curl -H "Host:<a href="http://kiki.example.com" rel="noreferrer" \
target="_blank">kiki.example.com</a>" -v "<a \
href="http://10.10.10.26/healthcheck.php" rel="noreferrer" \
target="_blank">http://10.10.10.26/healthcheck.php</a>"<br> >><br>
>> And see if you're actually getting good healthchecks. If you're \
not,<br> >> then you need to look at your backends (specifically \
healthcheck.php)<br> >><br>
>> On Wed, Jul 8, 2015 at 12:14 PM, Tim Dunphy <<a \
href="mailto:bluethundr@gmail.com">bluethundr@gmail.com</a>> wrote:<br> >> \
> Hi guys,<br> >> ><br>
>> ><br>
>> > I'm having an issue where my varnish server will stop working \
after a<br> >> > while<br>
>> > of browsing around the site I'm using it with and throw a 503 \
server<br> >> > unavailable error.<br>
>> ><br>
>> > In my varnish logs I'm getting a 'no backend connection \
error':<br> >> ><br>
>> > 10 FetchError c no backend connection<br>
>> > 10 VCL_call c error deliver<br>
>> > 10 VCL_call c deliver deliver<br>
>> > 10 TxProtocol c HTTP/1.1<br>
>> > 10 TxStatus c 503<br>
>> > 10 TxResponse c Service Unavailable<br>
>> > 10 TxHeader c Server: Varnish<br>
>> ><br>
>> ><br>
>> > And if I do a GET on the healthcheck from the command line on the<br>
>> > varnish<br>
>> > server, I get a 503 response from varnish:<br>
>> ><br>
>> > #GET <a href="http://wiki.example.com/healthcheck.php" rel="noreferrer" \
target="_blank">http://wiki.example.com/healthcheck.php</a><br> >> ><br>
>> > <?xml version="1.0" encoding="utf-8"?><br>
>> > <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 \
Strict//EN"<br> >> > "<a \
href="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" rel="noreferrer" \
target="_blank">http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd</a>"><br> \
>> > <html><br> >> > <head><br>
>> > <title>503 Service Unavailable</title><br>
>> > </head><br>
>> > <body><br>
>> > <h1>Error 503 Service Unavailable</h1><br>
>> > <p>Service Unavailable</p><br>
>> > <h3>Guru Meditation:</h3><br>
>> > <p>XID: <a href="tel:2107225059" \
value="+12107225059">2107225059</a></p><br> >> > <hr><br>
>> > <p>Varnish cache server</p><br>
>> > </body><br>
>> > </html><br>
>> ><br>
>> > But if I do another GET on the healthcheck file from the varnish \
server<br> >> > to<br>
>> > another apache VHOST on the same server as the wiki site that \
responds<br> >> > to<br>
>> > the IP of the web server instead of the IP for the varnish server, \
the<br> >> > GET<br>
>> > works:<br>
>> ><br>
>> > #GET <a href="http://ops1.example.com/healthcheck.php" rel="noreferrer" \
target="_blank">http://ops1.example.com/healthcheck.php</a><br> >> > \
good<br> >> ><br>
>> ><br>
>> > So I'm not sure why varnish is having trouble reaching the HC file. \
The<br> >> > web<br>
>> > server is a little far from the varnish server. The varnish machines \
are<br> >> > in<br>
>> > NYC and the web servers are in northern Virginia.<br>
>> ><br>
>> > So I tried setting the timeouts in the varnish config to a really \
high<br> >> > number. And that was working for a while. But today I noticed \
that it<br> >> > stopped working. I'll have to restart the varnish \
service and browse the<br> >> > site for a while. Then it'll stop \
working again and produce the 503<br> >> > error.<br>
>> > It's pretty annoying!<br>
>> ><br>
>> > I was wondering if there might be something in my VCL I could tweak \
to<br> >> > make<br>
>> > this work? Or if the fact is that the web servers are simply too \
far<br> >> > from<br>
>> > varnish for this to be practical.<br>
>> ><br>
>> > Here's my VCL file. It's pretty basic:<br>
>> ><br>
>> > backend web1 {<br>
>> > .host = "10.10.10.25";<br>
>> > .port = "80";<br>
>> > .connect_timeout = 1200s;<br>
>> > .first_byte_timeout = 1200s;<br>
>> > .between_bytes_timeout = 1200s;<br>
>> > .max_connections = 70;<br>
>> > .probe = {<br>
>> > .request =<br>
>> > "GET /healthcheck.php HTTP/1.1"<br>
>> > "Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>"<br> >> > "Connection: \
close";<br> >> > .interval = 10m;<br>
>> > .timeout = 60s;<br>
>> > .window = 3;<br>
>> > .threshold = 2;<br>
>> > }<br>
>> > }<br>
>> ><br>
>> > backend web2 {<br>
>> > .host = "10.10.10.26";<br>
>> > .port = "80";<br>
>> > .connect_timeout = 1200s;<br>
>> > .first_byte_timeout = 1200s;<br>
>> > .between_bytes_timeout = 1200s;<br>
>> > .max_connections = 70;<br>
>> > .probe = {<br>
>> > .request =<br>
>> > "GET /healthcheck.php HTTP/1.1"<br>
>> > "Host: <a href="http://wiki.example.com" rel="noreferrer" \
target="_blank">wiki.example.com</a>"<br> >> > "Connection: \
close";<br> >> > .interval = 10m;<br>
>> > .timeout = 60s;<br>
>> > .window = 3;<br>
>> > .threshold = 2;<br>
>> > }<br>
>> > }<br>
>> ><br>
>> > director www round-robin {<br>
>> > { .backend = web1; }<br>
>> > { .backend = web2; }<br>
>> > }<br>
>> ><br>
>> > sub vcl_recv {<br>
>> ><br>
>> > if (req.url ~ "&action=submit($|/)") {<br>
>> > return (pass);<br>
>> > }<br>
>> ><br>
>> > set req.backend = www;<br>
>> > return (lookup);<br>
>> > }<br>
>> ><br>
>> > sub vcl_fetch {<br>
>> > set beresp.ttl = 3600s;<br>
>> > set beresp.grace = 4h;<br>
>> > return (deliver);<br>
>> > }<br>
>> ><br>
>> ><br>
>> > sub vcl_deliver {<br>
>> > if (obj.hits> 0) {<br>
>> > set resp.http.X-Cache = "HIT";<br>
>> > } else {<br>
>> > set resp.http.X-Cache = "MISS";<br>
>> > }<br>
>> > }<br>
>> ><br>
>> > Thanks,<br>
>> > Tim<br>
>> ><br>
>> ><br>
>> ><br>
>> > --<br>
>> > GPG me!!<br>
>> ><br>
>> > gpg --keyserver <a href="http://pool.sks-keyservers.net" \
rel="noreferrer" target="_blank">pool.sks-keyservers.net</a> --recv-keys F186197B<br> \
>> ><br> >> ><br>
>> > _______________________________________________<br>
>> > varnish-misc mailing list<br>
>> > <a href="mailto:varnish-misc@varnish-cache.org">varnish-misc@varnish-cache.org</a><br>
>> > <a href="https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc" \
rel="noreferrer" target="_blank">https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc</a><br>
><br>
><br>
><br>
><br>
> --<br>
> GPG me!!<br>
><br>
> gpg --keyserver <a href="http://pool.sks-keyservers.net" rel="noreferrer" \
target="_blank">pool.sks-keyservers.net</a> --recv-keys F186197B<br> ><br>
</div></div></blockquote></div><br><br clear="all"><br>-- <br><div \
class="gmail_signature">GPG me!!<br><br>gpg --keyserver <a \
href="http://pool.sks-keyservers.net" target="_blank">pool.sks-keyservers.net</a> \
--recv-keys F186197B<br><br></div> </div>
_______________________________________________
varnish-misc mailing list
varnish-misc@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic