Varnish 4 VCL - Strip defined query string parameters - caching

I'm currently using Varnish 4 as a reverse proxy cache on a website. However I noticed that when a url is called with query string parameters it bypasses the varnish cache.
For example:
www.mywebsite.com = Cache HIT
www.mywebsite.com?gclid=123 = Cache
MISS
I want varnish to ignore several query string parameters when determining a match for the page such as Google's tracking parameters.
I added the following to my VCL file however when i load a url such as www.mywebsite.com?gclid=123 I see a 404 page so something isn't quite right.
# Normalize request url parameters before determining a page match.
set req.url = regsuball(req.url, "((\?)|&)(gclid|gclsrc|utm_content|utm_term|utm_campaign|utm_medium|utm_source|_ga)=[^&]*", "");
set req.url = regsub(req.url, "(\?&|\?|&)$", "");
Any help would be much appreciated.
Here's the full VCL file:
vcl 4.0;
import std;
# The minimal Varnish version is 4.0
# For SSL offloading, pass the following header in your proxy server or load balancer: 'X-Forwarded-Proto: https'
backend default {
.host = "127.2.0.1";
.port = "80";
.first_byte_timeout = 6s;
}
acl purge {
"localhost";
}
sub vcl_recv {
if (req.method == "PURGE") {
if (client.ip !~ purge) {
return (synth(405, "Method not allowed"));
}
# To use the X-Pool header for purging varnish during automated deployments, make sure the X-Pool header
# has been added to the response in your backend server config. This is used, for example, by the
# capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
return (synth(400, "X-Magento-Tags-Pattern or X-Pool header required"));
}
if (req.http.X-Magento-Tags-Pattern) {
ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
}
if (req.http.X-Pool) {
ban("obj.http.X-Pool ~ " + req.http.X-Pool);
}
return (synth(200, "Purged"));
}
if (req.method != "GET" &&
req.method != "HEAD" &&
req.method != "PUT" &&
req.method != "POST" &&
req.method != "TRACE" &&
req.method != "OPTIONS" &&
req.method != "DELETE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
# We only deal with GET and HEAD by default
if (req.method != "GET" && req.method != "HEAD") {
return (pass);
}
# Bypass shopping cart, checkout and search requests
if (req.url ~ "/checkout" || req.url ~ "/catalogsearch") {
return (pass);
}
# Bypass health check requests
if (req.url ~ "/pub/health_check.php") {
return (pass);
}
# Set initial grace period usage status
set req.http.grace = "none";
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://", "");
# Normalize request url parameters before determining a page match.
# strip normalized parameters from query string
set req.url = regsuball(req.url, "((\?)|&)(gclid|gclsrc|utm_content|utm_term|utm_campaign|utm_medium|utm_source|_ga)=[^&]*", "");
set req.url = regsub(req.url, "(\?&|\?|&)$", "");
# collect all cookies
std.collect(req.http.Cookie);
# Compression filter. See https://www.varnish-cache.org/trac/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
unset req.http.Accept-Encoding;
}
}
# Static files caching
if (req.url ~ "^/(pub/)?(media|static)/") {
# Static files should not be cached by default
return (pass);
# But if you use a few locales and don't use CDN you can enable caching static files by commenting previous line (#return (pass);) and uncommenting next 3 lines
#unset req.http.Https;
#unset req.http.X-Forwarded-Proto;
#unset req.http.Cookie;
}
return (hash);
}
sub vcl_hash {
if (req.http.cookie ~ "X-Magento-Vary=") {
hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
}
# For multi site configurations to not cache each other's content
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
# To make sure http users don't see ssl warning
if (req.http.X-Forwarded-Proto) {
hash_data(req.http.X-Forwarded-Proto);
}
}
sub vcl_backend_response {
set beresp.grace = 3d;
if (beresp.http.content-type ~ "text") {
set beresp.do_esi = true;
}
if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
set beresp.do_gzip = true;
}
if (beresp.http.X-Magento-Debug) {
set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
}
# cache only successfully responses and 404s
if (beresp.status != 200 && beresp.status != 404) {
set beresp.ttl = 0s;
set beresp.uncacheable = true;
return (deliver);
} elsif (beresp.http.Cache-Control ~ "private") {
set beresp.uncacheable = true;
set beresp.ttl = 86400s;
return (deliver);
}
# validate if we need to cache it and prevent from setting cookie
if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
unset beresp.http.set-cookie;
}
# If page is not cacheable then bypass varnish for 2 minutes as Hit-For-Pass
if (beresp.ttl <= 0s ||
beresp.http.Surrogate-control ~ "no-store" ||
(!beresp.http.Surrogate-Control &&
beresp.http.Cache-Control ~ "no-cache|no-store") ||
beresp.http.Vary == "*") {
# Mark as Hit-For-Pass for the next 2 minutes
set beresp.ttl = 120s;
set beresp.uncacheable = true;
}
return (deliver);
}
sub vcl_deliver {
set resp.http.X-Magento-Cache-Debug-Request-Url = req.url;
if (resp.http.X-Magento-Debug) {
# set the normalized request url as a http header if magento is in debug mode for easy debugging
if (resp.http.x-varnish ~ " ") {
set resp.http.X-Magento-Cache-Debug = "HIT";
set resp.http.Grace = req.http.grace;
} else {
set resp.http.X-Magento-Cache-Debug = "MISS";
}
} else {
unset resp.http.Age;
}
# unset resp.http.X-Magento-Debug;
# unset resp.http.X-Magento-Tags;
# unset resp.http.X-Powered-By;
# unset resp.http.Server;
# unset resp.http.X-Varnish;
# unset resp.http.Via;
# unset resp.http.Link;
}
sub vcl_hit {
if (obj.ttl >= 0s) {
# Hit within TTL period
return (deliver);
}
if (std.healthy(req.backend_hint)) {
if (obj.ttl + 300s > 0s) {
# Hit after TTL expiration, but within grace period
set req.http.grace = "normal (healthy server)";
return (deliver);
} else {
# Hit after TTL and grace expiration
return (fetch);
}
} else {
# server is not healthy, retrieve from cache
set req.http.grace = "unlimited (unhealthy server)";
return (deliver);
}
}

Perhaps the following will work for you much better:
if (req.url ~ "(\?|&)(gclid|utm_[a-z]+)=") {
set req.url = regsuball(req.url, "(gclid|utm_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
set req.url = regsub(req.url, "[?|&]+$", "");
}
Originally posted here.

Related

Magento 2 varnish not caching, delivers new cookie each time

I'm trying to get Varnish to cache a magento store but it keeps setting different cookies each time I curl it:
$ curl -IL -X GET https://myurl.com/ |grep cookie
set-cookie: store=default; expires=Sat, 17-Apr-2021 19:51:22 GMT; Max-Age=31536000; path=/index.php/; HttpOnly
set-cookie: PHPSESSID=j5uhb0oe5qh8d212j12sfcnsaa; expires=Fri, 17-Apr-2020 20:51:22 GMT; Max-Age=3600; path=/; domain=myurl.com; HttpOnly
$ curl -IL -X GET https://myurl.com/ |grep cookie
set-cookie: store=default; expires=Sat, 17-Apr-2021 19:51:26 GMT; Max-Age=31536000; path=/index.php/; HttpOnly
set-cookie: PHPSESSID=dg09e4uaj9kiqo37rp4pk2g8co; expires=Fri, 17-Apr-2020 20:51:26 GMT; Max-Age=3600; path=/; domain=myurl.com; HttpOnly
Website is loading pretty slow.
My varnish config:
import std;
# The minimal Varnish version is 5.0
# For SSL offloading, pass the following header in your proxy server or load balancer: 'X-Forwarded-Proto: https'
backend default {
.host = "localhost";
.port = "8080";
.first_byte_timeout = 600s;
.probe = {
.url = "/health_check.php";
.timeout = 2s;
.interval = 5s;
.window = 10;
.threshold = 5;
}
}
acl purge {
"localhost";
"172.25.0.0"/16;
}
sub vcl_recv {
# set req.backend_hint = vdir.backend(); # send all traffic to the vdir director
if (req.method == "PURGE") {
if (client.ip !~ purge) {
return (synth(405, "Method not allowed"));
}
# To use the X-Pool header for purging varnish during automated deployments, make sure the X-Pool header
# has been added to the response in your backend server config. This is used, for example, by the
# capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
return (synth(400, "X-Magento-Tags-Pattern or X-Pool header required"));
}
if (req.http.X-Magento-Tags-Pattern) {
ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
}
if (req.http.X-Pool) {
ban("obj.http.X-Pool ~ " + req.http.X-Pool);
}
# If all Tags should be purged clear
# # ban everything to catch assets as well
if (req.http.X-Magento-Tags-Pattern == ".*") {
ban("req.url ~ .*");
}
return (synth(200, "Purged Magento"));
}
if (req.method != "GET" &&
req.method != "HEAD" &&
req.method != "PUT" &&
req.method != "POST" &&
req.method != "TRACE" &&
req.method != "OPTIONS" &&
req.method != "DELETE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
if (req.url ~ "/healthcheck") {
return (pass);
}
# We only deal with GET and HEAD by default
if (req.method != "GET" && req.method != "HEAD") {
return (pass);
}
# Bypass shopping cart, checkout and search requests
if (req.url ~ "/checkout" || req.url ~ "/catalogsearch") {
return (pass);
}
# Bypass health check requests
if (req.url ~ "/health_check.php") {
return (pass);
}
# Set initial grace period usage status
set req.http.grace = "none";
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://", "");
# collect all cookies
std.collect(req.http.Cookie);
# Compression filter. See https://www.varnish-cache.org/trac/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
unset req.http.Accept-Encoding;
}
}
# Remove Google gclid parameters to minimize the cache objects
set req.url = regsuball(req.url,"\?gclid=[^&]+$",""); # strips when QS = "?gclid=AAA"
set req.url = regsuball(req.url,"\?gclid=[^&]+&","?"); # strips when QS = "?gclid=AAA&foo=bar"
set req.url = regsuball(req.url,"&gclid=[^&]+",""); # strips when QS = "?foo=bar&gclid=AAA" or QS = "?foo=bar&gclid=AAA&bar=baz"
# Static files caching
if (req.url ~ "^/(pub/)?(media|static)/") {
# Static files should not be cached by default
#return (pass);
return (hash);
# But if you use a few locales and don't use CDN you can enable caching static files by commenting previous line (#return (pass);) and uncommenting next 3 lines
unset req.http.Https;
unset req.http.X-Forwarded-Proto;
unset req.http.Cookie;
}
return (hash);
}
sub vcl_hash {
if (req.http.cookie ~ "X-Magento-Vary=") {
hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
}
# For multi site configurations to not cache each other's content
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
# To make sure http users don't see ssl warning
if (req.http.X-Forwarded-Proto) {
hash_data(req.http.X-Forwarded-Proto);
}
}
sub vcl_backend_response {
set beresp.grace = 3d;
if (beresp.http.content-type ~ "text") {
set beresp.do_esi = true;
}
if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
set beresp.do_gzip = true;
}
if (beresp.http.X-Magento-Debug) {
set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
}
# cache only successfully responses and 404s
#if (beresp.status != 200 && beresp.status != 404)) {
# don't cache 404 nor 300 nor 500
if (beresp.status != 200) {
set beresp.ttl = 0s;
set beresp.uncacheable = true;
return (deliver);
} elsif (beresp.http.Cache-Control ~ "private") {
set beresp.uncacheable = true;
set beresp.ttl = 86400s;
return (deliver);
}
# validate if we need to cache it and prevent from setting cookie
if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
unset beresp.http.set-cookie;
}
# If page is not cacheable then bypass varnish for 2 minutes as Hit-For-Pass
if (beresp.ttl <= 0s ||
beresp.http.Surrogate-control ~ "no-store" ||
(!beresp.http.Surrogate-Control &&
beresp.http.Cache-Control ~ "no-cache|no-store") ||
beresp.http.Vary == "*") {
# Mark as Hit-For-Pass for the next 2 minutes
set beresp.ttl = 120s;
set beresp.uncacheable = true;
}
return (deliver);
}
sub vcl_deliver {
if (resp.http.X-Magento-Debug) {
if (resp.http.x-varnish ~ " ") {
set resp.http.X-Magento-Cache-Debug = "HIT";
set resp.http.Grace = req.http.grace;
} else {
set resp.http.X-Magento-Cache-Debug = "MISS";
}
} else {
unset resp.http.Age;
}
set resp.http.X-Test = "YEAH";
if (obj.hits > 0) { # Add debug header to see if it's a HIT/MISS and the number of hits, disable when not needed
set resp.http.X-Cache = "HIT";
} else {
set resp.http.X-Cache = "MISS";
}
# Please note that obj.hits behaviour changed in 4.0, now it counts per objecthead, not per object
# and obj.hits may not be reset in some cases where bans are in use. See bug 1492 for details.
# So take hits with a grain of salt
set resp.http.X-Cache-Hits = obj.hits;
# Not letting browser to cache non-static files.
if (resp.http.Cache-Control !~ "private" && req.url !~ "^/(pub/)?(media|static)/") {
set resp.http.Pragma = "no-cache";
set resp.http.Expires = "-1";
set resp.http.Cache-Control = "no-store, no-cache, must-revalidate, max-age=0";
}
unset resp.http.X-Magento-Debug;
unset resp.http.X-Magento-Tags;
unset resp.http.X-Powered-By;
unset resp.http.Server;
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.Link;
}
sub vcl_hit {
if (obj.ttl >= 0s) {
# Hit within TTL period
return (deliver);
}
if (std.healthy(req.backend_hint)) {
if (obj.ttl + 300s > 0s) {
# Hit after TTL expiration, but within grace period
set req.http.grace = "normal (healthy server)";
return (deliver);
} else {
# Hit after TTL and grace expiration
return (miss);
}
} else {
# server is not healthy, retrieve from cache
set req.http.grace = "unlimited (unhealthy server)";
return (deliver);
}
}
Caché:
$ bin/magento cache:status
Current status:
config: 1
layout: 0
block_html: 0
collections: 1
reflection: 1
db_ddl: 1
eav: 1
customer_notification: 1
config_integration: 1
config_integration_api: 1
full_page: 0
translate: 1
config_webservice: 1
vertex: 0
wp_gtm_categories: 1
php bin/magento config:show |grep -i cache
system/full_page_cache/varnish/access_list - localhost
system/full_page_cache/varnish/backend_host - localhost
system/full_page_cache/varnish/backend_port - 8080
system/full_page_cache/varnish/grace_period - 300
system/full_page_cache/caching_application - 2
Is this expected? site loads extremely slow, even if I reload it over and over again
Thanks for your time, gentleman.
Based on de VCL code, it doesn't look like Varnish is removing the PHPSESSID and the store cookies. One way to be sure, is by running the following varnishlog command on your Varnish server:
varnishlog -g request -i ReqUrl -I ReqUnset:cookie -I ReqHeader:Cookie -I bereqheader:Cookie -I berequnset:cookie -q "ReqUrl eq '/'"
This will look at requests from requests to the homepage and will list the following things:
URL
Cookies set by the client as a request header
A copy of the cookies, set as a backend request header
Possible cookies being unset in the client part of the VCL (e.g. in vcl_recv)
Possible cookies being unset in the backend part of the VCL (e.g. in vcl_backend_request)
This will give you a clear indication whether or not cookies are remove by Varnish. You can replace the cookie header filter with any other header potentially being removed.
If you want to know which VCL flow is run, you can add -i "VCL_*" and then you'll see if it is a HIT, a MISS, or a deliberate PASS.
Once you've done the necessary debugging, you'll know if it is a Varnish issue, or a PHP issue.
check for X-Magento-Vary cookie. It might be on a Ajax response and this cookie is in hash routine

Varnish Cache Expiring Objects Too Quickly

I've been having a problem with my varnish (v3.0.2) cache where it keeps resetting the cache of an object after less than 60 seconds despite having a TTL of 24h, cookies stripped, content encoding normalized, non-critical headers unset, Cache-Control set to public, s-maxage=86400 etc.
For some reason, if you access the following URL repeatedly over a minute, you can see that the Age creeps up and then hits zero (with X-Cache returning MISS):
http://data.eyewire.org/volume/83329/chunk/0/1/0/1/tile/xz/32:64
There are no n_lru_nuked objects and the cache is over 60GB. I watched the varnishlog and may have seen something with ExpBan going on, but I can't for the life of me figure out why.
Here are some key parts to my vcl file:
sub vcl_recv {
set req.grace = 120s;
# normalize Accept-Encoding to reduce vary
if (req.http.Accept-Encoding) {
if (req.http.User-Agent ~ "MSIE 6") {
unset req.http.Accept-Encoding;
}
elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
}
elsif (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
}
else {
unset req.http.Accept-Encoding;
}
}
# This uses the ACL action called "purge". Basically if a request to
# PURGE the cache comes from anywhere other than localhost, ignore it.
if (req.request == "PURGE")
{if (!client.ip ~ purge)
{error 405 "Not allowed.";}
return(lookup);}
if (req.http.Upgrade ~ "(?i)websocket") {
return (pipe);
}
# ....
if ( req.http.host ~ "data\.eyewire\.org" ) {
unset req.http.Cookie;
unset req.http.Accept-Language;
unset req.http.Expires;
unset req.http.Cache-Control;
unset req.http.User-Agent;
return(lookup);
}
# ....
}
sub vcl_fetch {
# ....
if ( req.http.host ~ "data.eyewire.org" ) {
if ( req.url ~ "^/volume" ) {
unset beresp.http.Set-Cookie;
set beresp.ttl = 24h;
set beresp.http.Cache-Control = "public, s-maxage=86400";
set beresp.http.X-TTL = beresp.ttl;
return(deliver);
}
elsif (req.url ~ "^/cell") {
set beresp.ttl = 1h;
return(hit_for_pass);
}
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_pass {
set bereq.http.connection = "close";
if (req.http.X-Forwarded-For) {
set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
}
else {
set bereq.http.X-Forwarded-For = regsub(client.ip, ":.*", "");
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_pipe {
#we need to copy the upgrade header
if (req.http.upgrade) {
set bereq.http.upgrade = req.http.upgrade;
set bereq.http.connection = req.http.connection;
}
set bereq.http.connection = "close";
if (req.http.X-Forwarded-For) {
set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
}
else {
set bereq.http.X-Forwarded-For = regsub(client.ip, ":.*", "");
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_hit {
if (req.request == "PURGE") {
purge;
error 200 "Purged.";
}
}
# from http://blog.bigdinosaur.org/adventures-in-varnish/
sub vcl_miss {
if (req.request == "PURGE") {
purge;
error 200 "Purged.";
}
}
sub vcl_deliver {
# Display hit/miss info
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
}
else {
set resp.http.X-Cache = "MISS";
}
# Security Non-Disclosure
remove resp.http.X-Varnish;
remove resp.http.X-Powered-By;
remove resp.http.Server;
return(deliver);
}
Thanks!
Edit: FYI: I had to revert some changes to my VCL to solve a problem in production, but the problem is still essentially the same.
I experienced the same kind of behaviour where the hits where going up and without any reason it seemed the cache was purged. After some research i found the cause was the vary: Accept-Encoding, User-agent header that made a different cache being saved per user agent.
Try setting a vary header only for Accept-Encoding.
For what it's worth, I just upgraded to Varnish 4 and it seemed to solve the problem. During the upgrade we also removed the definition of vcl_hit and vcl_miss which included a purge directive that didn't seem like it was being hit but who knows.

caching of PROPFIND requests with varnish

I will use varnish in front of my caldav server. All clients made periodically PROPFIND and OPTIONS request. Can I cache the response of the PROPFIND/OPTIONS requests with varnish?
I will purge the cache after PUT request. The following config don't work. I get no cache hits...
vcl 4.0;
import std;
backend baikal {
.host = "127.0.0.1";
.port = "6083";
}
acl upstream_proxy {
"127.0.0.1";
}
sub vcl_recv {
# purge cache for baikal.example.com after put request
if (req.method == "PUT" && req.http.host == "baikal.example.com") {
ban("req.http.host == " + req.http.Host);
}
# Set the X-Forwarded-For header so the backend can see the original
# IP address. If one is already set by an upstream proxy, we'll just re-use that.
if (client.ip ~ upstream_proxy && req.http.X-Forwarded-For) {
set req.http.X-Forwarded-For = req.http.X-Forwarded-For;
} else {
set req.http.X-Forwarded-For = regsub(client.ip, ":.*", "");
}
std.log("ip:" + req.http.x-forwarded-for);
if (req.http.host == "baikal.example.com") {
set req.backend_hint = baikal;
}
if (req.method == "PROPFIND" && req.http.host == "baikal.example.com") {
return(hash);
}
return(hash);
}
sub vcl_backend_response {
if (beresp.http.method == "PROPFIND" ) {
unset beresp.http.pragma;
unset beresp.http.cache-control;
unset beresp.http.expires;
set beresp.ttl = 1 w;
set beresp.http.magicmarker = "1";
}
}
sub vcl_deliver {
# Happens when we have all the pieces we need, and are about to send the
# response to the client.
#
# You can do accounting or modifying the final object here.
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
} else {
set resp.http.X-Cache = "MISS";
}
}

wget --mirror not creating varnish cache

I have varnish cache installed on my server in port 80 with Apache as content server on port 8080. If I run wget --mirror example.com it should crawl through my entire website and create the varnish cache, right? It does not. For eg., by running wget --mirror example.com I can see in my output that it has run through example.com/abc.html. But when I go to example.com/abc.html from my browser, from my response headers I can see that it is returning Varnish MISS (and it also takes long time). However, if I go to the same url again via the browser this time the cache has been generated as I can see Varnish HIT in my response headers
Here is another interesting fact: If I run only run wget example.com/abc.html it will create the varnish cache! Another interesting fact: If I run wget --mirror example.com/abc.html it will create the varnish cache for abc.html but not further pages
So for some reason using --mirror example.com creates the varnish cache for first page but not further page.
I am using Magento if it makes any difference
I have tried:
wget --mirror --no-http-keep-alive example.com
but it does not work
Here is my varnish vcl
# This is a basic VCL configuration file for PageCache powered by Varnish for Magento module.
# default backend definition. Set this to point to your content server.
backend default {
.host = "127.0.0.1";
.port = "8080";
}
# admin backend with longer timeout values. Set this to the same IP & port as your default server.
backend admin {
.host = "127.0.0.1";
.port = "8080";
.first_byte_timeout = 18000s;
.between_bytes_timeout = 18000s;
}
# add your Magento server IP to allow purges from the backend
acl purge {
"localhost";
"127.0.0.1";
}
sub vcl_recv {
if (client.ip ~ purge) {
set req.hash_always_miss = true;
}
if (req.restarts == 0) {
if (req.http.x-forwarded-for) {
set req.http.X-Forwarded-For =
req.http.X-Forwarded-For + ", " + client.ip;
} else {
set req.http.X-Forwarded-For = client.ip;
}
}
if (req.request != "GET" &&
req.request != "HEAD" &&
req.request != "PUT" &&
req.request != "POST" &&
req.request != "TRACE" &&
req.request != "OPTIONS" &&
req.request != "DELETE" &&
req.request != "PURGE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
# purge request
if (req.request == "PURGE") {
if (!client.ip ~ purge) {
error 405 "Not allowed.";
}
ban("obj.http.X-Purge-Host ~ " + req.http.X-Purge-Host + " && obj.http.X-Purge-URL ~ " + req.http.X-Purge-Regex + " && obj.http.Content-Type ~ " + req.http.X-Purge-Content-Type);
error 200 "Purged.";
}
# switch to admin backend configuration
if (req.http.cookie ~ "adminhtml=") {
set req.backend = admin;
}
# we only deal with GET and HEAD by default
if (req.request != "GET" && req.request != "HEAD") {
return (pass);
}
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://[^/]+", "");
# static files are always cacheable. remove SSL flag and cookie
if (req.url ~ "^/(media|js|skin)/.*\.(png|jpg|jpeg|gif|css|js|swf|ico)$") {
unset req.http.Https;
unset req.http.Cookie;
}
# not cacheable by default
if (req.http.Authorization || req.http.Https) {
return (pass);
}
# do not cache any page from
# - index files
# - ...
if (req.url ~ "^/(index)") {
return (pass);
}
# as soon as we have a NO_CACHE cookie pass request
if (req.http.cookie ~ "NO_CACHE=") {
return (pass);
}
# normalize Aceept-Encoding header
# http://varnish.projects.linpro.no/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
remove req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
remove req.http.Accept-Encoding;
}
}
# remove Google gclid parameters
set req.url = regsuball(req.url,"\?gclid=[^&]+$",""); # strips when QS = "?gclid=AAA"
set req.url = regsuball(req.url,"\?gclid=[^&]+&","?"); # strips when QS = "?gclid=AAA&foo=bar"
set req.url = regsuball(req.url,"&gclid=[^&]+",""); # strips when QS = "?foo=bar&gclid=AAA" or QS = "?foo=bar&gclid=AAA&bar=baz"
return (lookup);
}
# sub vcl_pipe {
# # Note that only the first request to the backend will have
# # X-Forwarded-For set. If you use X-Forwarded-For and want to
# # have it set for all requests, make sure to have:
# # set bereq.http.connection = "close";
# # here. It is not set by default as it might break some broken web
# # applications, like IIS with NTLM authentication.
# return (pipe);
# }
#
# sub vcl_pass {
# return (pass);
# }
#
sub vcl_hash {
hash_data(req.url);
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
if (!(req.url ~ "^/(media|js|skin)/.*\.(png|jpg|jpeg|gif|css|js|swf|ico)$")) {
call design_exception;
}
return (hash);
}
#
# sub vcl_hit {
# return (deliver);
# }
#
# sub vcl_miss {
# return (fetch);
# }
sub vcl_fetch {
if (beresp.status == 500) {
set beresp.saintmode = 10s;
return (restart);
}
set beresp.grace = 5m;
# add ban-lurker tags to object
set beresp.http.X-Purge-URL = req.url;
set beresp.http.X-Purge-Host = req.http.host;
if (beresp.status == 200 || beresp.status == 301 || beresp.status == 404) {
if (beresp.http.Content-Type ~ "text/html" || beresp.http.Content-Type ~ "text/xml") {
if ((beresp.http.Set-Cookie ~ "NO_CACHE=") || (beresp.ttl < 1s)) {
set beresp.ttl = 0s;
return (hit_for_pass);
}
# marker for vcl_deliver to reset Age:
set beresp.http.magicmarker = "1";
# Don't cache cookies
unset beresp.http.set-cookie;
} else {
# set default TTL value for static content
set beresp.ttl = 4h;
}
return (deliver);
}
return (hit_for_pass);
}
sub vcl_deliver {
# debug info
if (resp.http.X-Cache-Debug) {
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
} else {
set resp.http.X-Cache = "MISS";
}
set resp.http.X-Cache-Expires = resp.http.Expires;
} else {
# remove Varnish/proxy header
remove resp.http.X-Varnish;
remove resp.http.Via;
remove resp.http.Age;
remove resp.http.X-Purge-URL;
remove resp.http.X-Purge-Host;
}
if (resp.http.magicmarker) {
# Remove the magic marker
unset resp.http.magicmarker;
set resp.http.Cache-Control = "no-store, no-cache, must-revalidate, post-check=0, pre-check=0";
set resp.http.Pragma = "no-cache";
set resp.http.Expires = "Mon, 31 Mar 2008 10:00:00 GMT";
set resp.http.Age = "0";
}
}
# sub vcl_error {
# set obj.http.Content-Type = "text/html; charset=utf-8";
# set obj.http.Retry-After = "5";
# synthetic {"
# <?xml version="1.0" encoding="utf-8"?>
# <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
# "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
# <html>
# <head>
# <title>"} + obj.status + " " + obj.response + {"</title>
# </head>
# <body>
# <h1>Error "} + obj.status + " " + obj.response + {"</h1>
# <p>"} + obj.response + {"</p>
# <h3>Guru Meditation:</h3>
# <p>XID: "} + req.xid + {"</p>
# <hr>
# <p>Varnish cache server</p>
# </body>
# </html>
# "};
# return (deliver);
# }
#
# sub vcl_init {
# return (ok);
# }
#
# sub vcl_fini {
# return (ok);
# }
sub design_exception {
}
Edit , Answer:
I don't know whether adding the --no-cookies fixed it (don't know whether wget --mirror stores cookies and if it does then that would have fixed it) or whether adding the headers fixed it, but this works and creates the varnish cache which I can see via my browser:
wget --spider --recursive --no-cookies --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" --header "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3" --header "Accept-Language: en-US,en;q=0.8" --header "Cache-Control: max-age=0" --header "Connection: keep-alive" --header "Host: www.example.com" --header "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.19 (KHTML, like Gecko) Ubuntu/10.04 Chromium/18.0.1025.168 Chrome/18.0.1025.168 Safari/535.19" www.example.com
Second Edit, Related to previous answer. IMPORTANT
Anyone using Magento, don't use my above solution. Because of --no-cookies, Magento ends up creating a new session file under var/session folder for every single request. This caused my session folder to be filled in with 250,000 files with every wget run of above command! This caused the folder to be full and none of my customers could actually add anything to their basket since Magento could not create any more session files for them. I am looking for more alternatives to my problem
You should look into two points:
All modern browsers send some Accept-Encoding ~ 'gzip' header, so cached entries will not be used if your spider doesn't use this one (a decent back-end generating gzipped responses adds a vary: Accept-Encoding header).
Your back-end generates cookies for every cookie-less-user. Your script should keep its cookie but your cache rules should ignore cookies if they don't matter. However, if your responses include shopping-carts or other stuff (very user/state/cookie dependent) you can not cache this and you'll have to recreate this response every time. You can factor our the variable part with javascript/iframes, but you need to (re-)design your application to make it cacheable.

Magento with Nginx and Varnish - 302's, cookies, and redirects?

I'm testing out Varnish to increase load times with Magento, so far caching has worked fantastic. I can serve up 32-35 pages/sec for index.php, and max out at 1200/sec for catalog pages. However I've run into a problem that I'm really struggling with. I've been trying to fix this for days now. When adding a product to the cart, it redirects to the homepage and then displays "Item XXX has been added to your cart". I can see Varnish getting the 302 to return it back to the page I added the item from, but it always bounces back to the homepage. The same problem exists when adding a product for comparison, but this never gets added to the list of items to compare.
You can see the site here:
http://test.autoracks.com
Here's my default.vcl:
# default backend definition. Set this to point to your content server.
backend default {
.host = "127.0.0.1";
.port = "8080";
}
# admin backend with longer timeout values. Set this to the same IP & port as your default server.
backend admin {
.host = "127.0.0.1";
.port = "8080";
.first_byte_timeout = 18000s;
.between_bytes_timeout = 18000s;
}
# add your Magento server IP to allow purges from the backend
acl purge {
"localhost";
"127.0.0.1";
}
sub vcl_recv {
if (req.restarts == 0) {
if (req.http.x-forwarded-for) {
set req.http.X-Forwarded-For =
req.http.X-Forwarded-For ", " client.ip;
} else {
set req.http.X-Forwarded-For = client.ip;
}
}
if (req.request != "GET" &&
req.request != "HEAD" &&
req.request != "PUT" &&
req.request != "POST" &&
req.request != "TRACE" &&
req.request != "OPTIONS" &&
req.request != "DELETE" &&
req.request != "PURGE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
# purge request
if (req.request == "PURGE") {
if (!client.ip ~ purge) {
error 405 "Not allowed.";
}
purge("obj.http.X-Purge-Host ~ " req.http.X-Purge-Host " && obj.http.X-Purge-URL ~ " req.http.X-Purge-Regex " && obj.http.Content-Type ~ " req.http.X-Purge-Content-Type);
error 200 "Purged.";
}
# switch to admin backend configuration
if (req.http.cookie ~ "adminhtml=") {
set req.backend = admin;
}
# we only deal with GET and HEAD by default
if (req.request != "GET" && req.request != "HEAD") {
return (pass);
}
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://[^/]+", "");
# static files are always cacheable. remove SSL flag and cookie
if (req.url ~ "^/(media|js|skin)/.*\.(png|jpg|jpeg|gif|css|js|swf|ico)$") {
unset req.http.Https;
unset req.http.Cookie;
}
# not cacheable by default
if (req.http.Authorization || req.http.Https) {
return (pass);
}
# do not cache any page from
# - index files
# - ...
#if (req.url ~ "^/(index)") {
# return (pass);
#}
# as soon as we have a NO_CACHE cookie pass request
if (req.http.cookie ~ "NO_CACHE=") {
return (pass);
}
# normalize Aceept-Encoding header
# http://varnish.projects.linpro.no/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
remove req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
remove req.http.Accept-Encoding;
}
}
# remove Google gclid parameters
set req.url = regsuball(req.url,"\?gclid=[^&]+$",""); # strips when QS = "?gclid=AAA"
set req.url = regsuball(req.url,"\?gclid=[^&]+&","?"); # strips when QS = "?gclid=AAA&foo=bar"
set req.url = regsuball(req.url,"&gclid=[^&]+",""); # strips when QS = "?foo=bar&gclid=AAA" or QS = "?foo=bar&gclid=AAA&bar=baz"
return (lookup);
}
# sub vcl_pipe {
# # Note that only the first request to the backend will have
# # X-Forwarded-For set. If you use X-Forwarded-For and want to
# # have it set for all requests, make sure to have:
# # set bereq.http.connection = "close";
# # here. It is not set by default as it might break some broken web
# # applications, like IIS with NTLM authentication.
# return (pipe);
# }
#
# sub vcl_pass {
# return (pass);
# }
#
sub vcl_hash {
set req.hash += req.url;
if (req.http.host) {
set req.hash += req.http.host;
} else {
set req.hash += server.ip;
}
if (!(req.url ~ "^/(media|js|skin)/.*\.(png|jpg|jpeg|gif|css|js|swf|ico)$")) {
call design_exception;
}
return (hash);
}
#
# sub vcl_hit {
# if (!obj.cacheable) {
# return (pass);
# }
# return (deliver);
# }
#
# sub vcl_miss {
# return (fetch);
# }
sub vcl_fetch {
if (beresp.status == 500) {
set beresp.saintmode = 10s;
restart;
}
set beresp.grace = 5m;
# add ban-lurker tags to object
set beresp.http.X-Purge-URL = req.url;
set beresp.http.X-Purge-Host = req.http.host;
if (beresp.status == 200 || beresp.status == 301 || beresp.status == 404) {
if (beresp.http.Content-Type ~ "text/html" || beresp.http.Content-Type ~ "text/xml") {
if ((beresp.http.Set-Cookie ~ "NO_CACHE=") || (beresp.ttl < 1s)) {
set beresp.ttl = 0s;
return (pass);
}
# marker for vcl_deliver to reset Age:
set beresp.http.magicmarker = "1";
# Don't cache cookies
unset beresp.http.set-cookie;
} else {
# set default TTL value for static content
set beresp.ttl = 4h;
}
return (deliver);
}
return (pass);
}
sub vcl_deliver {
# debug info
if (resp.http.X-Cache-Debug) {
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
} else {
set resp.http.X-Cache = "MISS";
}
set resp.http.X-Cache-Expires = resp.http.Expires;
} else {
# remove Varnish/proxy header
remove resp.http.X-Varnish;
remove resp.http.Via;
remove resp.http.Age;
remove resp.http.X-Purge-URL;
remove resp.http.X-Purge-Host;
}
if (resp.http.magicmarker) {
# Remove the magic marker
unset resp.http.magicmarker;
set resp.http.Cache-Control = "no-store, no-cache, must-revalidate, post-check=0, pre-check=0";
set resp.http.Pragma = "no-cache";
set resp.http.Expires = "Mon, 31 Mar 2008 10:00:00 GMT";
set resp.http.Age = "0";
}
}
# sub vcl_error {
# set obj.http.Content-Type = "text/html; charset=utf-8";
# synthetic {"
# <?xml version="1.0" encoding="utf-8"?>
# <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
# "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
# <html>
# <head>
# <title>"} obj.status " " obj.response {"</title>
# </head>
# <body>
# <h1>Error "} obj.status " " obj.response {"</h1>
# <p>"} obj.response {"</p>
# <h3>Guru Meditation:</h3>
# <p>XID: "} req.xid {"</p>
# <hr>
# <p>Varnish cache server</p>
# </body>
# </html>
# "};
# return (deliver);
# }
sub design_exception {
}
I should that if I put nginx in front everything works correctly.
Any help with this would be greatly appreciated, I really want to get this working!
Thanks...
This problem is located in the Magento Core. As the method getCurrentUrl builds the URL using the Nginx port number (not often 80 when behind Varnish). Later when the redirect URL is decoded it fails as an "internal URL" due to the port number.
The solution (whithout modifying the Magento core) is to set Nginx to listen to port 80 and Varnish to something else. Then map incoming traffic to Varnish using iptables.
I've covered this in a blog post on Keeping Magento satisfied behind Varnish.
when i was testing varnish with a local apache server using siege the test would sometimes send a different response url than i was expecting from the test.
i have no idea why i never witnessed (with my eyes) a wrong page loading even though i ran it with the same vcl all the time i was developing other things.
but i followed the advice on the on the varnish site and installed a 64 bit os.
my problems disappeared.
were you using a 32 bit system?

Resources