'Req_body functions (length, hash, regex)'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       varnish-dev
Subject:    Req_body functions (length, hash, regex)
From:       Arianna Aondio <arianna.aondio () varnish-software ! com>
Date:       2015-03-23 14:47:46
Message-ID: CAL6a+5g+8t2j1VxizssUXASEEQVWjksjN=JEhZbDLiu5hD8PMg () mail ! gmail ! com
[Download RAW message or body]

Hi everyone,
please find patch attached.

This is a proposed solution for 3 req.body functions: len_req_body,
hash_req_body and rematch_req_body.

The idea is to iterate over the request body, wrap it into a blob (I
have to thank Nils for this because I took the idea from the vmod he
wrote a couple of weeks ago) and use this blob for operation such as
hashing on it or regex matching.

*len_req_body: check if the req.body has been buffered, if so then we
return the number of consumed bytes.

*hash_req_body: first the req.body is collected in a blob, then we
hash on this (I had to implement a new function for hashing on binary
data)

*rematch_req_body: using a priv_call the regular expression given as
parameter is compiled just once, then we use VRE_exec on the req.body
blob to check if there's a regex match.

Comments much appreciated.

-- 
Arianna Aondio
Software Developer | Varnish Software AS
Mobile: +47 980 62 619

We Make Websites Fly
www.varnish-software.com

["req_body_functions.patch" (text/x-diff)]

From de7634c55e92dc69e75e1fc27f5ad614f1d387d2 Mon Sep 17 00:00:00 2001
From: Arianna Aondio <arianna.aondio@varnish-software.com>
Date: Mon, 23 Mar 2015 15:14:22 +0100
Subject: [PATCH] Implementation of req.body functions and test cases.

---
 bin/varnishd/cache/cache_hash.c  |  10 ++++
 bin/varnishd/hash/hash_slinger.h |   3 +-
 bin/varnishtest/tests/m00022.vtc |  50 ++++++++++++++++
 bin/varnishtest/tests/m00023.vtc |  77 +++++++++++++++++++++++++
 bin/varnishtest/tests/m00024.vtc |  56 ++++++++++++++++++
 lib/libvmod_std/vmod.vcc         |  48 ++++++++++++++++
 lib/libvmod_std/vmod_std.c       | 119 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 362 insertions(+), 1 deletion(-)
 create mode 100644 bin/varnishtest/tests/m00022.vtc
 create mode 100644 bin/varnishtest/tests/m00023.vtc
 create mode 100644 bin/varnishtest/tests/m00024.vtc

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index b3d3ac8..2af4256 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -198,6 +198,16 @@ HSH_AddString(const struct req *req, const char *str)
 		SHA256_Update(req->sha256ctx, &str, sizeof str);
 }
 
+void
+HSH_AddBytes(const struct req *req, const void *buf, size_t len)
+{
+	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+	AN(req->sha256ctx);
+
+	if (buf != NULL)
+		SHA256_Update(req->sha256ctx, buf, len);
+}
+
 /*---------------------------------------------------------------------
  * This is a debugging hack to enable testing of boundary conditions
  * in the hash algorithm.
diff --git a/bin/varnishd/hash/hash_slinger.h b/bin/varnishd/hash/hash_slinger.h
index 189f938..59524f4 100644
--- a/bin/varnishd/hash/hash_slinger.h
+++ b/bin/varnishd/hash/hash_slinger.h
@@ -67,7 +67,8 @@ enum lookup_e HSH_Lookup(struct req *, struct objcore **, struct objcore **,
     int wait_for_busy, int always_insert);
 void HSH_Ref(struct objcore *o);
 void HSH_Init(const struct hash_slinger *slinger);
-void HSH_AddString(const struct req *, const char *str);
+void HSH_AddString(const struct req *req, const char *str);
+void HSH_AddBytes(const struct req *req, const void *buf, size_t len);
 void HSH_Insert(struct worker *, const void *hash, struct objcore *);
 void HSH_Purge(struct worker *, struct objhead *, double ttl, double grace,
     double keep);
diff --git a/bin/varnishtest/tests/m00022.vtc b/bin/varnishtest/tests/m00022.vtc
new file mode 100644
index 0000000..03d96a1
--- /dev/null
+++ b/bin/varnishtest/tests/m00022.vtc
@@ -0,0 +1,50 @@
+varnishtest "Test std.len_req_body"
+
+server s1 {
+	rxreq
+	txresp
+	rxreq
+	txresp
+	rxreq
+	txresp
+} -start
+
+varnish v1 -vcl+backend {
+	import ${vmod_std};
+
+	sub vcl_recv {
+		std.cache_req_body(110B);
+		set req.http.x-len = std.len_req_body();
+	}
+
+	sub vcl_deliver {
+		set resp.http.x-len = req.http.x-len;
+	}
+} -start
+
+client c1 {
+	txreq -req POST -nolen -hdr "Transfer-encoding: chunked"
+	chunked {BLAS}
+	delay .2
+	chunkedlen 110
+	expect_close
+} -run
+
+client c1 {
+	txreq -req POST -nolen -hdr "Transfer-encoding: chunked"
+	chunked {BLAS}
+	delay .2
+	chunkedlen 90
+	delay .2
+	chunked {FOO}
+	delay .2
+	chunkedlen 0
+	rxresp
+	expect resp.http.x-len == 97
+} -run
+
+client c2 {
+	txreq -req POST -body "BANANE"
+	rxresp
+	expect resp.http.x-len == 6
+} -run
diff --git a/bin/varnishtest/tests/m00023.vtc b/bin/varnishtest/tests/m00023.vtc
new file mode 100644
index 0000000..7c3234c
--- /dev/null
+++ b/bin/varnishtest/tests/m00023.vtc
@@ -0,0 +1,77 @@
+varnishtest "Test std.hash_req_body"
+
+server s1 {
+	rxreq
+	txresp
+	rxreq
+	txresp
+	rxreq
+	txresp
+	rxreq
+	txresp
+} -start
+
+varnish v1 -vcl+backend {
+	import ${vmod_std};
+
+	sub vcl_recv {
+		std.cache_req_body(110B);
+		return (hash);
+	}
+
+	sub vcl_hash {
+		std.hash_req_body();
+		hash_data(req.url);
+		if (req.http.host) {
+			hash_data(req.http.host);
+		} else {
+			hash_data(server.ip);
+		}
+		return (lookup);
+	}
+
+	sub vcl_hit {
+		set req.http.x-hit = "HIT";
+	}
+
+	sub vcl_miss {
+		set req.http.x-miss = "MISS";
+	}
+
+	sub vcl_deliver {
+		set resp.http.x-hit = req.http.x-hit;
+		set resp.http.x-miss = req.http.x-miss;
+	}
+
+} -start
+
+client c1 {
+	txreq -req POST -nolen -hdr "Transfer-encoding: chunked"
+	chunked {BLAS}
+	delay .2
+	chunkedlen 110
+	expect_close
+} -run
+
+client c1 {
+	txreq -req POST -nolen -hdr "Transfer-encoding: chunked"
+	chunked {BLAS}
+	delay .2
+	chunkedlen 90
+	delay .2
+	chunked {FOO}
+	delay .2
+	chunkedlen 0
+	rxresp
+} -run
+
+client c2 {
+	txreq -req POST -url "/banane" -body "FOOBAR"
+	rxresp
+} -run
+
+client c2 {
+	txreq -req GET -url "/banane" -body "FOOBAR"
+	rxresp
+	expect resp.http.x-hit == "HIT"
+} -run
diff --git a/bin/varnishtest/tests/m00024.vtc b/bin/varnishtest/tests/m00024.vtc
new file mode 100644
index 0000000..8e109c5
--- /dev/null
+++ b/bin/varnishtest/tests/m00024.vtc
@@ -0,0 +1,56 @@
+varnishtest "test rematch matchin on binary req.body"
+server s1 {
+	rxreq
+	expect req.bodylen > 12
+	txresp
+	rxreq
+	txresp
+} -start
+
+# rematch_req_body is case sensitive.
+varnish v1 -vcl+backend {
+	import ${vmod_std};
+	sub vcl_recv {
+		std.cache_req_body(10KB);
+		set req.http.x-boolean1 = std.rematch_req_body(".*");
+		set req.http.x-boolean2 = std.rematch_req_body("aRNI");
+		set req.http.x-boolean3 = std.rematch_req_body("a");
+		set req.http.x-boolean4 = std.rematch_req_body("F");
+	}
+
+	sub vcl_deliver {
+		set resp.http.x-boolean1 = req.http.x-boolean1;
+		set resp.http.x-boolean2 = req.http.x-boolean2;
+		set resp.http.x-boolean3 = req.http.x-boolean3;
+		set resp.http.x-boolean4 = req.http.x-boolean4;
+	}
+} -start
+
+client c1 {
+	txreq -req "POST" -gzipbody {a5e2e2e1c2e2}
+	rxresp
+	expect resp.http.x-boolean1 == 1
+	expect resp.http.x-boolean2 == 0
+	expect resp.http.x-boolean3 == 1
+	expect resp.http.x-boolean4 == 0
+} -run
+
+varnish v1 -cliok "param.set debug +syncvsl"
+varnish v1 -cliok "param.set fetch_chunksize 4k"
+
+client c2 {
+	txreq -req POST -nolen -hdr "Transfer-encoding: chunked"
+	chunked {a5e2e2e1c2e2}
+	delay .2
+	chunkedlen 4090
+	delay .2
+	chunked {VARNISH}
+	delay .2
+	chunked {\0}
+	chunked {FOO}
+	delay .2
+	chunkedlen 0
+	rxresp
+	expect resp.http.x-boolean1 == 1
+	expect resp.http.x-boolean2 == 0
+} -run
diff --git a/lib/libvmod_std/vmod.vcc b/lib/libvmod_std/vmod.vcc
index 288a1ed..2a7571b 100644
--- a/lib/libvmod_std/vmod.vcc
+++ b/lib/libvmod_std/vmod.vcc
@@ -257,6 +257,54 @@ Example
 	| }
 
 
+$Function INT len_req_body()
+
+Description
+	Returns the request body length.
+
+	Note that the request body must be buffered.
+
+Example
+	| if (std.cache_req_body(1KB)) {
+	|     set req.http.x-len = std.len_req_body();
+	| }
+
+$Function VOID hash_req_body(PRIV_TOP)
+
+Description
+	Adds available request body bytes to the lookup hash key.
+	Note that this function can only be used in vcl_hash and
+	the request body must be buffered.
+
+	Example
+	| sub vcl_recv {
+	|     std.cache_req_body(1KB);
+	| }
+	|
+	| sub vcl_hash{
+	|     std.hash_req_body();
+	| }
+
+$Function INT rematch_req_body(PRIV_TOP, PRIV_CALL, STRING re)
+
+Description
+	Returns -1 if an error occurred.
+	Returns 0 if the request body doesn't contain the string *re*.
+	Returns 1 if the request body contains the string *re*.
+
+	Note that the comparison is case sensitive and the
+	request body must be buffered.
+
+Example
+	| std.cache_req_body(1KB);
+	|
+	| if (std.regex_req_body("FOO") == 1) {
+	|    std.log("is true");
+	| }
+
+
+
+
 SEE ALSO
 ========
 
diff --git a/lib/libvmod_std/vmod_std.c b/lib/libvmod_std/vmod_std.c
index 097cd45..b025ba2 100644
--- a/lib/libvmod_std/vmod_std.c
+++ b/lib/libvmod_std/vmod_std.c
@@ -43,7 +43,9 @@
 #include "vtim.h"
 
 #include "cache/cache.h"
+#include "hash/hash_slinger.h"
 #include "cache/cache_director.h"
+#include "vcl.h"
 
 #include "vcc_if.h"
 
@@ -248,3 +250,120 @@ vmod_strstr(VRT_CTX, VCL_STRING s1, VCL_STRING s2)
 	return (strstr(s1, s2));
 }
 
+static int __match_proto__(req_body_iter_f)
+concat_req_body(struct req *req, void *priv, void *ptr, size_t len)
+{
+	struct ws *ws = priv;
+	(void)req;
+
+	return (!WS_Copy(ws, ptr, len));
+}
+
+static void
+vmod_blob_req_body(VRT_CTX, struct vmod_priv *priv_top)
+{
+	unsigned *p;
+	char *ws_f;
+	ssize_t l;
+
+	if (priv_top->priv) {
+		return;
+	}
+
+	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+	CHECK_OBJ_NOTNULL(ctx->req, REQ_MAGIC);
+
+	if (ctx->req->req_body_status != REQ_BODY_CACHED){
+		VSLb(ctx->vsl, SLT_VCL_Error,
+		    "Uncached req.body");
+		return;
+	}
+
+	assert(ctx->req->req_body_status == REQ_BODY_CACHED);
+
+	p = (void*)WS_Alloc(ctx->ws, sizeof *p);
+	AN(p);
+	priv_top->priv = p;
+
+	ws_f = WS_Snapshot(ctx->ws);
+	AN(ws_f);
+	l = VRB_Iterate(ctx->req, concat_req_body, ctx->ws);
+
+	if (l < 0 || WS_Copy(ctx->ws, "\0", 1) == NULL) {
+		VSLb(ctx->vsl, SLT_VCL_Error,
+		    "Iteration on req.body didn't succeed.");
+		WS_Reset(ctx->ws, ws_f);
+		memset(p, 0, sizeof *p);
+		priv_top->len = -1;
+	}
+	priv_top->priv = ws_f;
+	priv_top->len = l;
+}
+
+VCL_INT __match_proto__(td_std_len_req_body)
+vmod_len_req_body(VRT_CTX)
+{
+	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+	CHECK_OBJ_NOTNULL(ctx->req, REQ_MAGIC);
+
+	if (ctx->req->req_body_status != REQ_BODY_CACHED) {
+		VSLb(ctx->vsl, SLT_VCL_Error,
+		   "Uncached req.body");
+		return (-1);
+	}
+
+	return (ctx->req->req_bodybytes);
+}
+
+VCL_VOID __match_proto__(td_std_hash_req_body)
+vmod_hash_req_body(VRT_CTX, struct vmod_priv *priv_top)
+{
+	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+
+	if (ctx->method != VCL_MET_HASH) {
+		VSLb(ctx->vsl, SLT_VCL_Error,
+		    "Hash_Req_Body can only be used in vcl_hash{}");
+		return;
+	}
+
+	vmod_blob_req_body(ctx, priv_top);
+	HSH_AddBytes(ctx->req, priv_top->priv,  priv_top->len);
+
+}
+
+VCL_INT  __match_proto__(td_std_regex_req_body)
+vmod_rematch_req_body(VRT_CTX, struct vmod_priv *priv_top,
+    struct vmod_priv *priv_call, VCL_STRING re)
+{
+	const char *error;
+	int erroroffset;
+	vre_t *t = NULL;
+	int i;
+
+	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+	AN(re);
+
+	if(priv_call->priv == NULL) {
+		t =  VRE_compile(re, 0, &error, &erroroffset);
+		priv_call->priv = t;
+		priv_call->free = free;
+
+		if(t == NULL) {
+			VSLb(ctx->vsl, SLT_VCL_Error,
+			    "Regular expression not valid");
+			return (-1);
+		}
+	}
+
+	vmod_blob_req_body(ctx, priv_top);
+
+	i = VRE_exec(priv_call->priv, priv_top->priv, priv_top->len, 0, 0,
+	    NULL, 0, &cache_param->vre_limits);
+
+	if (i >= 0)
+		return (1);
+
+	if (i < VRE_ERROR_NOMATCH )
+		VSLb(ctx->vsl, SLT_VCL_Error, "Regexp matching returned %d", i);
+	return (0);
+}
-- 
1.9.1



_______________________________________________
varnish-dev mailing list
varnish-dev@varnish-cache.org
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev

[prev in list] [next in list] [prev in thread] [next in thread]