Varnish doesn't keep serving cache when the backends are down - caching

I have Varnish running behind HAProxy and in front of NGINX. The HAProxy server deals with SSL, then forwards the traffic on port 80 to Varnish, which in turn refers to the NGINX servers.
I'd like to set Varnish up so that if the NGINX servers are all down, it continues to serve the cached content until they're back up. But I don't seem to be able to get it quite right. I'm running varnish-6.0.8 revision 97e54ada6ac578af332e52b44d2038bb4fa4cd4a. My VCL version is 4.1.
Here is my configuration (sanitized):
vcl 4.1;
import directors;
import std;
# Define Server A
backend server-a {
.host = "xx.xx.xx.xx";
.port = "80";
.max_connections = 100;
.probe = {
.url = "/varnish-check.txt";
.timeout = 1s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
}
# Define Server B
backend server-b {
.host = "xx.xx.xx.xx";
.port = "80";
.max_connections = 100;
.probe = {
.url = "/varnish-check.txt";
.timeout = 1s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
}
# Define Server C
backend server-c {
.host = "xx.xx.xx.xx";
.port = "8080";
.max_connections = 100;
.probe = {
.url = "/varnish-check.txt";
.timeout = 1s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
}
sub vcl_recv {
if (req.http.host == "example.com" || req.http.host == "example2.com") {
set req.backend_hint = server_b.backend();
}
elseif (req.http.host == "example3.com") {
set req.backend_hint = server_c.backend();
}
else {
set req.backend_hint = server_a.backend();
}
}
acl purge {
"localhost";
"127.0.0.1";
"::1";
"xx.xx.xx.xx";
"<IPv6>";
}
sub vcl_recv {
set req.http.X-Forwarded-For = regsub(req.http.X-Forwarded-For,"^([^,]+)(,[^,]+)*","\1");
if (req.method == "PURGE") {
if (!client.ip ~ purge) {
return (synth(405, "This IP is not allowed to send PURGE requests."));
}
if (req.http.X-Purge-Method == "regex") {
ban("obj.http.x-url ~ " + req.url + " && obj.http.x-host ~ " + req.http.host);
return (synth(200, "Banned"));
}
return (purge);
}
# Wordpress: don't cache these special pages
if (req.url ~ "(wp-admin|post\.php|edit\.php|wp-login)") {
return(pass);
}
# Wordpress: don't cache users who are logged-in or on password-protected pages
if (req.http.Cookie ~ "wordpress_logged_in_|resetpass|wp-postpass_") {
return(pass);
}
# Remove cookies
set req.http.Cookie = regsuball(req.http.Cookie, "comment_author_[a-zA-Z0-9_]+", "");
set req.http.Cookie = regsuball(req.http.Cookie, "has_js=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-1=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-time-1=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "wordpress_test_cookie=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "PHPSESSID=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "__utm.=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "_ga=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "utmctr=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "utmcmd.=[^;]+(; )?", "");
set req.http.Cookie = regsuball(req.http.Cookie, "utmccn.=[^;]+(; )?", "");
# Remove proxy header (see https://httpoxy.org/#mitigate-varnish)
unset req.http.proxy;
# Normalize query arguments (sort alphabetically)
set req.url = std.querysort(req.url);
# Strip trailing ? if it exists
if (req.url ~ "\?$") {
set req.url = regsub(req.url, "\?$", "");
}
# Limit requests to the following types
if (req.method !~ "^GET|HEAD|PUT|POST|TRACE|OPTIONS|PATCH|DELETE$") {
return (pipe);
}
# Only cache GET or HEAD requests to ensure that POST requests are always passed through, along with their cookies
if (req.method != "GET" && req.method != "HEAD") {
return (pass);
}
# Don't cache AJAX requests
if (req.http.X-Requested-With == "XMLHttpRequest") {
return(pass);
}
# Don't cache images and PDFs
if (req.url ~ "\.(gif|jpg|jpeg|bmp|png|pdf)$") {
return(pass);
}
# Don't cache large files (zip, audio, video, etc.)
if (req.url ~ "^[^?]*\.(7z|avi|bz2|flac|flv|gz|mka|mkv|mov|mp3|mp4|mpeg|mpg|ogg|ogm|opus|rar|tar|tgz|tbz|txz|wav|webm|wmv|xz|zip)(\?.*)?$") {
return (pipe);
}
# Add support for ESI
if (req.http.Authorization) {
return (pass);
}
# Wordpress: don't cache search results
if (req.url ~ "/\?s=") {
return (pass);
}
# Wordpress: don't cache REST API (hand-rolled APIs used by custom themes)
if (req.url ~ "/shared-gc/includes/rest-api/") {
return (pass);
}
# Wordpress: don't cache anything with a cache-breaking v=<random> parameter (see gc.loadCachedJSON() JS function)
if (req.url ~ "(\?|&)v=0") {
return (pass);
}
# Don't cache the special pages we use to generate PDFs from the Wordpress catalog site
if (req.url ~ "/generate-catalog/") {
return (pass);
}
# Respect the browser's desire for a fresh copy on hard refresh. This ban will only work if there are no further URL changes (e.g. set req.url = ...) after it
if (req.http.Cache-Control == "no-cache") {
ban("req.http.host == " + req.http.host + " && req.url == " + req.url);
}
# Are there cookies left with only spaces or that are empty?
if (req.http.cookie ~ "^\s*$") {
unset req.http.cookie;
}
# Remove all cookies to enable caching
unset req.http.Cookie;
return (hash);
}
sub vcl_hash {
# Ignore marketing-related url parameters when caching urls
set req.http.newUrl = req.url;
if (req.http.newUrl ~ "(\?|&)(utm_source|utm_medium|utm_campaign|utm_content|gclid|fbclid|cx|ie|cof|siteurl|gc_source|mkt_tok)=") {
set req.http.newUrl = regsuball(req.http.newUrl, "&(utm_source|utm_medium|utm_campaign|utm_content|gclid|fbclid|cx|ie|cof|siteurl|gc_source|mkt_tok)=([A-z0-9_\-\.%25]+)", "");
set req.http.newUrl = regsuball(req.http.newUrl, "\?(utm_source|utm_medium|utm_campaign|utm_content|gclid|fbclid|cx|ie|cof|siteurl|gc_source|mkt_tok)=([A-z0-9_\-\.%25]+)", "?");
set req.http.newUrl = regsub(req.http.newUrl, "\?&", "?");
set req.http.newUrl = regsub(req.http.newUrl, "\?$", "");
}
# Ignore hashes when caching urls
if (req.http.newUrl ~ "\#") {
set req.http.newUrl = regsub(req.http.newUrl, "\#.*$", "");
}
# Default vcl_hash, except replaced "req.url" with "req.http.newUrl"
hash_data(req.http.newUrl);
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
return (lookup);
}
sub vcl_backend_response {
set beresp.http.x-url = bereq.url;
set beresp.http.x-host = bereq.http.host;
# Set the TTL for the cache to thirty days and the grace period to twelve hours
set beresp.ttl = 30d;
set beresp.grace = 12h;
set beresp.keep = 24h;
# Set different TTLs for other hosts
# if (bereq.url ~ "(example.com|secondexample.com)") {
# set beresp.ttl = 30d;
#}
# Set 301 and 302 as uncacheable
if (beresp.status == 301 || beresp.status == 302) {
set beresp.http.Location = regsub(beresp.http.Location, ":[0-9]+", "");
# Don't cache redirects
set beresp.uncacheable = true;
}
# Cache 404 responses for five minutes (can be cleared by hard refresh)
if (beresp.status == 403 || beresp.status == 404)
{
set beresp.ttl = 5m;
}
# Check for the response status of background fetches from backend, and return (abandon) if the response is a “5XX” errors
if (bereq.is_bgfetch && beresp.status >= 500 && beresp.status <= 599) {
return (abandon);
}
}
sub vcl_deliver {
unset resp.http.x-url;
unset resp.http.x-host;
# Add debug header to see if it's a HIT/MISS and the number of hits, disable when not needed
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
} else {
set resp.http.X-Cache = "MISS";
}
set resp.http.X-Cache-Hits = obj.hits;
# Remove headers to improve security
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.X-Powered-By;
unset resp.http.Server;
}
sub vcl_init {
return (ok);
}
sub vcl_fini {
return (ok);
}
sub vcl_hit {
# If the object has a TTL equal to or greater than 0s, deliver it from the cache
if (obj.ttl >= 0s) {
return (deliver);
}
# Check whether Grace Mode is necessary
if (std.healthy(req.backend_hint)) {
if (obj.ttl + 10s > 0s) {
set req.http.grace = "normal(limited)";
return (deliver);
} else {
# If Grace Mode is not necessary, fetch a fresh object from the backend
return(miss);
}
} else {
# If Grace Mode is necessary (i.e. the backend is down), enter grace mode
if (obj.ttl + obj.grace > 0s) {
set req.http.grace = "full";
return (deliver);
} else {
# If there is no Grace Mode object available, send to the backend
return (miss);
}
}
}
This doesn't seem to work, but I can't see why. If a backend server goes down, the Varnish server quickly returns a 503 or other error. Any pointers as to what I've got wrong would be appreciated.
Relatedly, I really like the ability to clear the cache with a hard browser refresh. But it strikes me that it would be great to be able to negate that rule if the backend is down. Any idea how I go about that?
And, of course, if there are any obvious errors in here, I'd love to hear about them.
Thanks!

Apparently you have some logic in vcl_hit that inspects the backend health and interferes with the TTL and grace value.
However, there's a simpler way of handling what is called Stale If Error. Here's the VCL code you need:
vcl 4.1;
import std;
backend default {
.host = "xx.xx.xx.xx";
.port = "80";
.max_connections = 100;
.probe = {
.url = "/varnish-check.txt";
.timeout = 1s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
}
sub vcl_recv {
if (std.healthy(req.backend_hint)) {
set req.grace = 10s;
}
}
sub vcl_backend_response {
set beresp.grace = 24h;
}
Total object lifetime
To understand the logic behind the VCL code, you need to understand how Varnish determines the object lifetime.
The total object lifetime is the sum of the following elements:
Total object lifetime = TTL + grace +keep
As long as the sum of these elements is greater than zero, the object is kept around in cache. That doesn't mean revalidation doesn't happen.
If the remaining TTL drops below zero, Varnish attempts to connect to the origin server. But if there's grace left, it will do this asynchronously while it's serving the stale content to the client.
If the backend is down, the stale content is still served.
If the object has expired and is out of grace, synchronous revalidation happens. If there is some keep time left, the potential ETag and Last-Modified headers of the expired object are used and converted into If-None-Match and If-Modified-Since backend request headers.
Conditional requests are the only real feature of keep time, because synchronous revalidation puts client requests in the queue, unlike grace mode.
How to leverage grace mode for stale if error
Grace mode is Varnish's implementation of stale while revalidate and can be set in VCL using beresp.grace but also via Cache-Control: stale-while-revalidate=3600.
What we're trying to do here is take advantage of grace mode's capability to send stale content to the client while revalidating asynchronously.
As the VCL example shows we're saving the object with a grace value of 24 hours. However, when requesting the object, we're only using 10 seconds of grace as long as the backend is healthy.
If it turns out the backend doesn't respond, the original grace of 24 hours is used.
Using grace for stale if error works, but it's a bit of a hack
See https://www.youtube.com/watch?v=51WUTB1cUeM for a 2-minute video about grace mode.
Using vmod_stale
Varnish also has a proper implementation of stale if error, but it's only available in Varnish Enterprise.
We specifically built vmod_stale to handle situations where backends are down.
Here's some example VCL code that uses vmod_stale:
vcl 4.1;
import stale;
backend default {
.host = "xx.xx.xx.xx";
.port = "80";
.max_connections = 100;
.probe = {
.url = "/varnish-check.txt";
.timeout = 1s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
}
sub stale_if_error {
if (beresp.status >= 500 && stale.exists()) {
stale.revive(20m, 1h);
stale.deliver();
return (abandon);
}
}
sub vcl_backend_response {
call stale_if_error;
}
sub vcl_backend_error {
call stale_if_error;
}
This VCL example leverages stale.revive() to set new values for TTL and grace while respecting the original total expiration time.
If the new TTL and grace time in combination with the existing keep time exceed the total life time of the object, the overflow is deducted from the keep time.
See https://www.youtube.com/watch?v=6LY4Idt1e2Q for a video about this VMOD.

Related

Magento 2 varnish not caching, delivers new cookie each time

I'm trying to get Varnish to cache a magento store but it keeps setting different cookies each time I curl it:
$ curl -IL -X GET https://myurl.com/ |grep cookie
set-cookie: store=default; expires=Sat, 17-Apr-2021 19:51:22 GMT; Max-Age=31536000; path=/index.php/; HttpOnly
set-cookie: PHPSESSID=j5uhb0oe5qh8d212j12sfcnsaa; expires=Fri, 17-Apr-2020 20:51:22 GMT; Max-Age=3600; path=/; domain=myurl.com; HttpOnly
$ curl -IL -X GET https://myurl.com/ |grep cookie
set-cookie: store=default; expires=Sat, 17-Apr-2021 19:51:26 GMT; Max-Age=31536000; path=/index.php/; HttpOnly
set-cookie: PHPSESSID=dg09e4uaj9kiqo37rp4pk2g8co; expires=Fri, 17-Apr-2020 20:51:26 GMT; Max-Age=3600; path=/; domain=myurl.com; HttpOnly
Website is loading pretty slow.
My varnish config:
import std;
# The minimal Varnish version is 5.0
# For SSL offloading, pass the following header in your proxy server or load balancer: 'X-Forwarded-Proto: https'
backend default {
.host = "localhost";
.port = "8080";
.first_byte_timeout = 600s;
.probe = {
.url = "/health_check.php";
.timeout = 2s;
.interval = 5s;
.window = 10;
.threshold = 5;
}
}
acl purge {
"localhost";
"172.25.0.0"/16;
}
sub vcl_recv {
# set req.backend_hint = vdir.backend(); # send all traffic to the vdir director
if (req.method == "PURGE") {
if (client.ip !~ purge) {
return (synth(405, "Method not allowed"));
}
# To use the X-Pool header for purging varnish during automated deployments, make sure the X-Pool header
# has been added to the response in your backend server config. This is used, for example, by the
# capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
return (synth(400, "X-Magento-Tags-Pattern or X-Pool header required"));
}
if (req.http.X-Magento-Tags-Pattern) {
ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
}
if (req.http.X-Pool) {
ban("obj.http.X-Pool ~ " + req.http.X-Pool);
}
# If all Tags should be purged clear
# # ban everything to catch assets as well
if (req.http.X-Magento-Tags-Pattern == ".*") {
ban("req.url ~ .*");
}
return (synth(200, "Purged Magento"));
}
if (req.method != "GET" &&
req.method != "HEAD" &&
req.method != "PUT" &&
req.method != "POST" &&
req.method != "TRACE" &&
req.method != "OPTIONS" &&
req.method != "DELETE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
if (req.url ~ "/healthcheck") {
return (pass);
}
# We only deal with GET and HEAD by default
if (req.method != "GET" && req.method != "HEAD") {
return (pass);
}
# Bypass shopping cart, checkout and search requests
if (req.url ~ "/checkout" || req.url ~ "/catalogsearch") {
return (pass);
}
# Bypass health check requests
if (req.url ~ "/health_check.php") {
return (pass);
}
# Set initial grace period usage status
set req.http.grace = "none";
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://", "");
# collect all cookies
std.collect(req.http.Cookie);
# Compression filter. See https://www.varnish-cache.org/trac/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
unset req.http.Accept-Encoding;
}
}
# Remove Google gclid parameters to minimize the cache objects
set req.url = regsuball(req.url,"\?gclid=[^&]+$",""); # strips when QS = "?gclid=AAA"
set req.url = regsuball(req.url,"\?gclid=[^&]+&","?"); # strips when QS = "?gclid=AAA&foo=bar"
set req.url = regsuball(req.url,"&gclid=[^&]+",""); # strips when QS = "?foo=bar&gclid=AAA" or QS = "?foo=bar&gclid=AAA&bar=baz"
# Static files caching
if (req.url ~ "^/(pub/)?(media|static)/") {
# Static files should not be cached by default
#return (pass);
return (hash);
# But if you use a few locales and don't use CDN you can enable caching static files by commenting previous line (#return (pass);) and uncommenting next 3 lines
unset req.http.Https;
unset req.http.X-Forwarded-Proto;
unset req.http.Cookie;
}
return (hash);
}
sub vcl_hash {
if (req.http.cookie ~ "X-Magento-Vary=") {
hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
}
# For multi site configurations to not cache each other's content
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
# To make sure http users don't see ssl warning
if (req.http.X-Forwarded-Proto) {
hash_data(req.http.X-Forwarded-Proto);
}
}
sub vcl_backend_response {
set beresp.grace = 3d;
if (beresp.http.content-type ~ "text") {
set beresp.do_esi = true;
}
if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
set beresp.do_gzip = true;
}
if (beresp.http.X-Magento-Debug) {
set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
}
# cache only successfully responses and 404s
#if (beresp.status != 200 && beresp.status != 404)) {
# don't cache 404 nor 300 nor 500
if (beresp.status != 200) {
set beresp.ttl = 0s;
set beresp.uncacheable = true;
return (deliver);
} elsif (beresp.http.Cache-Control ~ "private") {
set beresp.uncacheable = true;
set beresp.ttl = 86400s;
return (deliver);
}
# validate if we need to cache it and prevent from setting cookie
if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
unset beresp.http.set-cookie;
}
# If page is not cacheable then bypass varnish for 2 minutes as Hit-For-Pass
if (beresp.ttl <= 0s ||
beresp.http.Surrogate-control ~ "no-store" ||
(!beresp.http.Surrogate-Control &&
beresp.http.Cache-Control ~ "no-cache|no-store") ||
beresp.http.Vary == "*") {
# Mark as Hit-For-Pass for the next 2 minutes
set beresp.ttl = 120s;
set beresp.uncacheable = true;
}
return (deliver);
}
sub vcl_deliver {
if (resp.http.X-Magento-Debug) {
if (resp.http.x-varnish ~ " ") {
set resp.http.X-Magento-Cache-Debug = "HIT";
set resp.http.Grace = req.http.grace;
} else {
set resp.http.X-Magento-Cache-Debug = "MISS";
}
} else {
unset resp.http.Age;
}
set resp.http.X-Test = "YEAH";
if (obj.hits > 0) { # Add debug header to see if it's a HIT/MISS and the number of hits, disable when not needed
set resp.http.X-Cache = "HIT";
} else {
set resp.http.X-Cache = "MISS";
}
# Please note that obj.hits behaviour changed in 4.0, now it counts per objecthead, not per object
# and obj.hits may not be reset in some cases where bans are in use. See bug 1492 for details.
# So take hits with a grain of salt
set resp.http.X-Cache-Hits = obj.hits;
# Not letting browser to cache non-static files.
if (resp.http.Cache-Control !~ "private" && req.url !~ "^/(pub/)?(media|static)/") {
set resp.http.Pragma = "no-cache";
set resp.http.Expires = "-1";
set resp.http.Cache-Control = "no-store, no-cache, must-revalidate, max-age=0";
}
unset resp.http.X-Magento-Debug;
unset resp.http.X-Magento-Tags;
unset resp.http.X-Powered-By;
unset resp.http.Server;
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.Link;
}
sub vcl_hit {
if (obj.ttl >= 0s) {
# Hit within TTL period
return (deliver);
}
if (std.healthy(req.backend_hint)) {
if (obj.ttl + 300s > 0s) {
# Hit after TTL expiration, but within grace period
set req.http.grace = "normal (healthy server)";
return (deliver);
} else {
# Hit after TTL and grace expiration
return (miss);
}
} else {
# server is not healthy, retrieve from cache
set req.http.grace = "unlimited (unhealthy server)";
return (deliver);
}
}
Caché:
$ bin/magento cache:status
Current status:
config: 1
layout: 0
block_html: 0
collections: 1
reflection: 1
db_ddl: 1
eav: 1
customer_notification: 1
config_integration: 1
config_integration_api: 1
full_page: 0
translate: 1
config_webservice: 1
vertex: 0
wp_gtm_categories: 1
php bin/magento config:show |grep -i cache
system/full_page_cache/varnish/access_list - localhost
system/full_page_cache/varnish/backend_host - localhost
system/full_page_cache/varnish/backend_port - 8080
system/full_page_cache/varnish/grace_period - 300
system/full_page_cache/caching_application - 2
Is this expected? site loads extremely slow, even if I reload it over and over again
Thanks for your time, gentleman.
Based on de VCL code, it doesn't look like Varnish is removing the PHPSESSID and the store cookies. One way to be sure, is by running the following varnishlog command on your Varnish server:
varnishlog -g request -i ReqUrl -I ReqUnset:cookie -I ReqHeader:Cookie -I bereqheader:Cookie -I berequnset:cookie -q "ReqUrl eq '/'"
This will look at requests from requests to the homepage and will list the following things:
URL
Cookies set by the client as a request header
A copy of the cookies, set as a backend request header
Possible cookies being unset in the client part of the VCL (e.g. in vcl_recv)
Possible cookies being unset in the backend part of the VCL (e.g. in vcl_backend_request)
This will give you a clear indication whether or not cookies are remove by Varnish. You can replace the cookie header filter with any other header potentially being removed.
If you want to know which VCL flow is run, you can add -i "VCL_*" and then you'll see if it is a HIT, a MISS, or a deliberate PASS.
Once you've done the necessary debugging, you'll know if it is a Varnish issue, or a PHP issue.
check for X-Magento-Vary cookie. It might be on a Ajax response and this cookie is in hash routine

Varnish 4 VCL - Strip defined query string parameters

I'm currently using Varnish 4 as a reverse proxy cache on a website. However I noticed that when a url is called with query string parameters it bypasses the varnish cache.
For example:
www.mywebsite.com = Cache HIT
www.mywebsite.com?gclid=123 = Cache
MISS
I want varnish to ignore several query string parameters when determining a match for the page such as Google's tracking parameters.
I added the following to my VCL file however when i load a url such as www.mywebsite.com?gclid=123 I see a 404 page so something isn't quite right.
# Normalize request url parameters before determining a page match.
set req.url = regsuball(req.url, "((\?)|&)(gclid|gclsrc|utm_content|utm_term|utm_campaign|utm_medium|utm_source|_ga)=[^&]*", "");
set req.url = regsub(req.url, "(\?&|\?|&)$", "");
Any help would be much appreciated.
Here's the full VCL file:
vcl 4.0;
import std;
# The minimal Varnish version is 4.0
# For SSL offloading, pass the following header in your proxy server or load balancer: 'X-Forwarded-Proto: https'
backend default {
.host = "127.2.0.1";
.port = "80";
.first_byte_timeout = 6s;
}
acl purge {
"localhost";
}
sub vcl_recv {
if (req.method == "PURGE") {
if (client.ip !~ purge) {
return (synth(405, "Method not allowed"));
}
# To use the X-Pool header for purging varnish during automated deployments, make sure the X-Pool header
# has been added to the response in your backend server config. This is used, for example, by the
# capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
return (synth(400, "X-Magento-Tags-Pattern or X-Pool header required"));
}
if (req.http.X-Magento-Tags-Pattern) {
ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
}
if (req.http.X-Pool) {
ban("obj.http.X-Pool ~ " + req.http.X-Pool);
}
return (synth(200, "Purged"));
}
if (req.method != "GET" &&
req.method != "HEAD" &&
req.method != "PUT" &&
req.method != "POST" &&
req.method != "TRACE" &&
req.method != "OPTIONS" &&
req.method != "DELETE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
# We only deal with GET and HEAD by default
if (req.method != "GET" && req.method != "HEAD") {
return (pass);
}
# Bypass shopping cart, checkout and search requests
if (req.url ~ "/checkout" || req.url ~ "/catalogsearch") {
return (pass);
}
# Bypass health check requests
if (req.url ~ "/pub/health_check.php") {
return (pass);
}
# Set initial grace period usage status
set req.http.grace = "none";
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://", "");
# Normalize request url parameters before determining a page match.
# strip normalized parameters from query string
set req.url = regsuball(req.url, "((\?)|&)(gclid|gclsrc|utm_content|utm_term|utm_campaign|utm_medium|utm_source|_ga)=[^&]*", "");
set req.url = regsub(req.url, "(\?&|\?|&)$", "");
# collect all cookies
std.collect(req.http.Cookie);
# Compression filter. See https://www.varnish-cache.org/trac/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
unset req.http.Accept-Encoding;
}
}
# Static files caching
if (req.url ~ "^/(pub/)?(media|static)/") {
# Static files should not be cached by default
return (pass);
# But if you use a few locales and don't use CDN you can enable caching static files by commenting previous line (#return (pass);) and uncommenting next 3 lines
#unset req.http.Https;
#unset req.http.X-Forwarded-Proto;
#unset req.http.Cookie;
}
return (hash);
}
sub vcl_hash {
if (req.http.cookie ~ "X-Magento-Vary=") {
hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
}
# For multi site configurations to not cache each other's content
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
# To make sure http users don't see ssl warning
if (req.http.X-Forwarded-Proto) {
hash_data(req.http.X-Forwarded-Proto);
}
}
sub vcl_backend_response {
set beresp.grace = 3d;
if (beresp.http.content-type ~ "text") {
set beresp.do_esi = true;
}
if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
set beresp.do_gzip = true;
}
if (beresp.http.X-Magento-Debug) {
set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
}
# cache only successfully responses and 404s
if (beresp.status != 200 && beresp.status != 404) {
set beresp.ttl = 0s;
set beresp.uncacheable = true;
return (deliver);
} elsif (beresp.http.Cache-Control ~ "private") {
set beresp.uncacheable = true;
set beresp.ttl = 86400s;
return (deliver);
}
# validate if we need to cache it and prevent from setting cookie
if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
unset beresp.http.set-cookie;
}
# If page is not cacheable then bypass varnish for 2 minutes as Hit-For-Pass
if (beresp.ttl <= 0s ||
beresp.http.Surrogate-control ~ "no-store" ||
(!beresp.http.Surrogate-Control &&
beresp.http.Cache-Control ~ "no-cache|no-store") ||
beresp.http.Vary == "*") {
# Mark as Hit-For-Pass for the next 2 minutes
set beresp.ttl = 120s;
set beresp.uncacheable = true;
}
return (deliver);
}
sub vcl_deliver {
set resp.http.X-Magento-Cache-Debug-Request-Url = req.url;
if (resp.http.X-Magento-Debug) {
# set the normalized request url as a http header if magento is in debug mode for easy debugging
if (resp.http.x-varnish ~ " ") {
set resp.http.X-Magento-Cache-Debug = "HIT";
set resp.http.Grace = req.http.grace;
} else {
set resp.http.X-Magento-Cache-Debug = "MISS";
}
} else {
unset resp.http.Age;
}
# unset resp.http.X-Magento-Debug;
# unset resp.http.X-Magento-Tags;
# unset resp.http.X-Powered-By;
# unset resp.http.Server;
# unset resp.http.X-Varnish;
# unset resp.http.Via;
# unset resp.http.Link;
}
sub vcl_hit {
if (obj.ttl >= 0s) {
# Hit within TTL period
return (deliver);
}
if (std.healthy(req.backend_hint)) {
if (obj.ttl + 300s > 0s) {
# Hit after TTL expiration, but within grace period
set req.http.grace = "normal (healthy server)";
return (deliver);
} else {
# Hit after TTL and grace expiration
return (fetch);
}
} else {
# server is not healthy, retrieve from cache
set req.http.grace = "unlimited (unhealthy server)";
return (deliver);
}
}
Perhaps the following will work for you much better:
if (req.url ~ "(\?|&)(gclid|utm_[a-z]+)=") {
set req.url = regsuball(req.url, "(gclid|utm_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
set req.url = regsub(req.url, "[?|&]+$", "");
}
Originally posted here.

Varnish Cache Expiring Objects Too Quickly

I've been having a problem with my varnish (v3.0.2) cache where it keeps resetting the cache of an object after less than 60 seconds despite having a TTL of 24h, cookies stripped, content encoding normalized, non-critical headers unset, Cache-Control set to public, s-maxage=86400 etc.
For some reason, if you access the following URL repeatedly over a minute, you can see that the Age creeps up and then hits zero (with X-Cache returning MISS):
http://data.eyewire.org/volume/83329/chunk/0/1/0/1/tile/xz/32:64
There are no n_lru_nuked objects and the cache is over 60GB. I watched the varnishlog and may have seen something with ExpBan going on, but I can't for the life of me figure out why.
Here are some key parts to my vcl file:
sub vcl_recv {
set req.grace = 120s;
# normalize Accept-Encoding to reduce vary
if (req.http.Accept-Encoding) {
if (req.http.User-Agent ~ "MSIE 6") {
unset req.http.Accept-Encoding;
}
elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
}
elsif (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
}
else {
unset req.http.Accept-Encoding;
}
}
# This uses the ACL action called "purge". Basically if a request to
# PURGE the cache comes from anywhere other than localhost, ignore it.
if (req.request == "PURGE")
{if (!client.ip ~ purge)
{error 405 "Not allowed.";}
return(lookup);}
if (req.http.Upgrade ~ "(?i)websocket") {
return (pipe);
}
# ....
if ( req.http.host ~ "data\.eyewire\.org" ) {
unset req.http.Cookie;
unset req.http.Accept-Language;
unset req.http.Expires;
unset req.http.Cache-Control;
unset req.http.User-Agent;
return(lookup);
}
# ....
}
sub vcl_fetch {
# ....
if ( req.http.host ~ "data.eyewire.org" ) {
if ( req.url ~ "^/volume" ) {
unset beresp.http.Set-Cookie;
set beresp.ttl = 24h;
set beresp.http.Cache-Control = "public, s-maxage=86400";
set beresp.http.X-TTL = beresp.ttl;
return(deliver);
}
elsif (req.url ~ "^/cell") {
set beresp.ttl = 1h;
return(hit_for_pass);
}
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_pass {
set bereq.http.connection = "close";
if (req.http.X-Forwarded-For) {
set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
}
else {
set bereq.http.X-Forwarded-For = regsub(client.ip, ":.*", "");
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_pipe {
#we need to copy the upgrade header
if (req.http.upgrade) {
set bereq.http.upgrade = req.http.upgrade;
set bereq.http.connection = req.http.connection;
}
set bereq.http.connection = "close";
if (req.http.X-Forwarded-For) {
set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
}
else {
set bereq.http.X-Forwarded-For = regsub(client.ip, ":.*", "");
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_hit {
if (req.request == "PURGE") {
purge;
error 200 "Purged.";
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_miss {
if (req.request == "PURGE") {
purge;
error 200 "Purged.";
}
}
sub vcl_deliver {
# Display hit/miss info
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
}
else {
set resp.http.X-Cache = "MISS";
}
# Security Non-Disclosure
remove resp.http.X-Varnish;
remove resp.http.X-Powered-By;
remove resp.http.Server;
return(deliver);
}
Thanks!
Edit: FYI: I had to revert some changes to my VCL to solve a problem in production, but the problem is still essentially the same.
I experienced the same kind of behaviour where the hits where going up and without any reason it seemed the cache was purged. After some research i found the cause was the vary: Accept-Encoding, User-agent header that made a different cache being saved per user agent.
Try setting a vary header only for Accept-Encoding.
For what it's worth, I just upgraded to Varnish 4 and it seemed to solve the problem. During the upgrade we also removed the definition of vcl_hit and vcl_miss which included a purge directive that didn't seem like it was being hit but who knows.

caching of PROPFIND requests with varnish

I will use varnish in front of my caldav server. All clients made periodically PROPFIND and OPTIONS request. Can I cache the response of the PROPFIND/OPTIONS requests with varnish?
I will purge the cache after PUT request. The following config don't work. I get no cache hits...
vcl 4.0;
import std;
backend baikal {
.host = "127.0.0.1";
.port = "6083";
}
acl upstream_proxy {
"127.0.0.1";
}
sub vcl_recv {
# purge cache for baikal.example.com after put request
if (req.method == "PUT" && req.http.host == "baikal.example.com") {
ban("req.http.host == " + req.http.Host);
}
# Set the X-Forwarded-For header so the backend can see the original
# IP address. If one is already set by an upstream proxy, we'll just re-use that.
if (client.ip ~ upstream_proxy && req.http.X-Forwarded-For) {
set req.http.X-Forwarded-For = req.http.X-Forwarded-For;
} else {
set req.http.X-Forwarded-For = regsub(client.ip, ":.*", "");
}
std.log("ip:" + req.http.x-forwarded-for);
if (req.http.host == "baikal.example.com") {
set req.backend_hint = baikal;
}
if (req.method == "PROPFIND" && req.http.host == "baikal.example.com") {
return(hash);
}
return(hash);
}
sub vcl_backend_response {
if (beresp.http.method == "PROPFIND" ) {
unset beresp.http.pragma;
unset beresp.http.cache-control;
unset beresp.http.expires;
set beresp.ttl = 1 w;
set beresp.http.magicmarker = "1";
}
}
sub vcl_deliver {
# Happens when we have all the pieces we need, and are about to send the
# response to the client.
#
# You can do accounting or modifying the final object here.
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
} else {
set resp.http.X-Cache = "MISS";
}
}

Drupal 7 and Varnish image cache setting src to 127.0.0.1

I have a Drupal 7 installation that is sometimes setting the src path of images to 127.0.0.1.
Here is an example.
<img height="291" width="233" style="width: 233px; height: 291px; float: left;" class="media-image media-element file-default" typeof="foaf:Image" src="http://127.0.0.1/sites/default/files/media/news/images/jerzy_sawicki.jpg" alt="" title="">
After clearing the cache the image src is correct for awhile.
<img height="291" width="233" style="width: 233px; height: 291px; float: left;" class="media-image media-element file-default" typeof="foaf:Image" src="http://www.example.com/sites/default/files/media/news/images/jerzy_sawicki.jpg" alt="" title="">
I have many contributed modules enabled, but I imagine that this is most likely a Varnish or Cache Expiration issue.
Here is the Varnish default.vcl config. I had changed 127.0.0.1 to the server name thinking that may effect the src, but it did not.
backend default {
.host = "www.example.com";
.port = "8888";
.connect_timeout = 10s;
.first_byte_timeout = 10s;
.between_bytes_timeout = 10s;
// Check Drupal every 5 minutes to keep cache warm.
.probe = {
.url = "/news";
.interval = 300s;
.timeout = 10s;
.window = 5;
.threshold = 2;
}
}
sub vcl_recv {
// Remove has_js and Google Analytics __* cookies.
set req.http.Cookie = regsuball(req.http.Cookie, "(^|;\s*)(__[a-z]+|has_js)=[^;]*", "");
// Remove a ";" prefix, if present.
set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");
// Remove empty cookies.
if (req.http.Cookie ~ "^\s*$") {
unset req.http.Cookie;
}
// Catch Drupal theme files – THIS BREAKS UPDATE.PHP
if (req.url ~ "^/sites/") {
unset req.http.Cookie;
}
// Catch Drupal misc files (like drupal.js and jquery.js)
if (req.url ~ "^/misc/") {
unset req.http.Cookie;
}
// Drupal js/css doesn’t need cookies, cache them
if (req.url ~ "^/modules/.*\.(js|css)\?") {
unset req.http.Cookie;
}
// Pass cron jobs
if (req.url ~ "cron.php" ||
req.url ~ "^/admin/structure/features$" ||
req.url ~ "^/admin/config/system/backup_migrate$") {
return (pass);
}
// Currently we have server-status monitoring going directly against 8888 port
// Commenting out this pass-through
//if (req.url ~ ".*/server-status$") {
//return (pass);
//}
# Add a unique header containing the client address
remove req.http.X-Forwarded-For;
set req.http.X-Forwarded-For = client.ip;
}
sub vcl_hash {
if (req.http.Cookie) {
set req.hash += req.http.Cookie;
}
}
sub vcl_deliver {
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
} else {
set resp.http.X-Cache = "MISS";
}
}
sub vcl_fetch {
# Varnish determined the object was not cacheable
if (!beresp.cacheable) {
set beresp.http.X-Cacheable = "NO:Not Cacheable";
# You don't wish to cache content for logged in users
} elsif (req.http.Cookie ~ "(UserID|_session)") {
set beresp.http.X-Cacheable = "NO:Got Session";
return(pass);
# You are respecting the Cache-Control=private header from the backend
} elsif (beresp.http.Cache-Control ~ "private") {
set beresp.http.X-Cacheable = "NO:Cache-Control=private";
return(pass);
# You are extending the lifetime of the object artificially
} elsif (beresp.ttl < 1s) {
set beresp.ttl = 5s;
set beresp.grace = 5s;
set beresp.http.X-Cacheable = "YES:FORCED";
# Varnish determined the object was cacheable
} else {
set beresp.http.X-Cacheable = "YES";
}
# ....
return(deliver);
sub vcl_error {
# If 503 error and we've tried less than 3 times, try again
if (obj.status == 503 && req.restarts < 3) {
restart;
}
}
I don't think Varnish is the one to blame in this case, but your VCL is quite unusual for drupal (the session part in vcl_fetch is wrong, by example).
Also, drupal should generate relative URLs instead absolute ones.
For a quick fix I suggest you to set $base_url value in your settings.php [1]
$base_url = 'http://yourdomain.tld';
I also suggest you to take a look to battle tested VCLs for drupal [2] [3]
[1] https://api.drupal.org/api/drupal/developer!globals.php/global/base_url/7
[2] http://www.lullabot.com/blog/article/configuring-varnish-high-availability-multiple-web-servers
http://www.lullabot.com/sites/lullabot.com/files/default_varnish3.vcl_.txt
[3] https://github.com/NITEMAN/varnish-bites/blob/master/varnish3/drupal-base.vcl

Resources