[prev in list] [next in list] [ prev in thread ] [next in thread] List: openbsd-tech Subject: httpd rewrites with Lua's pattern matching From: Reyk Floeter <reyk () openbsd ! org> Date: 2015-06-20 12:52:37 Message-ID: 20150620130118.GC20130 () autobahn ! atexit ! net [Download RAW message or body] Hi, there is some great interest in getting support for rewrites and better matching in httpd. I refused to implement this using regex, as regex is extremely complicated code, there have been lots of bugs, they allow, if not specified carefully, dangerous recursions and ReDOS, and I would add another potential attack surface in httpd. Thanks to tedu@'s hint at BSDCan, I stumbled across Lua's pattern matching implementation. It is relatively small (less than 700loc), powerful, portable C code, MIT-licensed, and doesn't suffer from some of regex' problems (eg., it doesn't allow recursive captures). I ported it on my flight back from Ottawa, KNF'ed it, and turned it into a C API without the Lua bindings. No, this diff does not bring the Lua language to httpd! Here is a diff that adds pattern matching to httpd, allowing rewrites with redirects. Additional use cases can be added later and, if it works out, we can probably move it to an existing library. location match '^/page/(%d+)' { block return 302 "/index.cgi?page=%1' } This diff will be two commits, one is pending to fix '?' in the uri. Comments? OK? Reyk diff --git httpd/Makefile httpd/Makefile index 885ad42..69fdb5e 100644 --- httpd/Makefile +++ httpd/Makefile @@ -6,6 +6,9 @@ SRCS+= config.c control.c httpd.c log.c logger.c proc.c SRCS+= server.c server_http.c server_file.c server_fcgi.c MAN= httpd.8 httpd.conf.5 +SRCS+= patterns.c +MAN+= patterns.7 + LDADD= -levent -ltls -lssl -lcrypto -lutil DPADD= ${LIBEVENT} ${LIBTLS} ${LIBSSL} ${LIBCRYPTO} ${LIBUTIL} #DEBUG= -g -DDEBUG=3 -O0 diff --git httpd/httpd.conf.5 httpd/httpd.conf.5 index 87866d2..24d92ac 100644 --- httpd/httpd.conf.5 +++ httpd/httpd.conf.5 @@ -131,14 +131,38 @@ The configured web servers. .Pp Each .Ic server -must have a -.Ar name -and include one or more lines of the following syntax: +section starts with a declaration of the server +.Ar name : +.Bl -tag -width Ds +.It Ic server Ar name Brq ... +Match the server name using shell globbing rules. +This can be an explicit name, +.Ar www.example.com , +or a name including wildcards, +.Ar *.example.com . +.It Ic server match Ar name Brq ... +Match the server name using pattern matching, +see +.Xr patterns 7 . +.El +.Pp +Followed by a block of options that is enclosed in curly brackets: .Bl -tag -width Ds .It Ic alias Ar name Specify an additional alias .Ar name for this server. +.It Ic alias match Ar name +Like the +.Ic alias +option, +but +.Ic match +the +.Ar name +using pattern matching instead of shell globbing rules, +see +.Xr patterns 7 . .It Oo Ic no Oc Ic authenticate Oo Ar realm Oc Ic with Pa htpasswd Authenticate a remote user for .Ar realm @@ -188,6 +212,12 @@ The configured IP address of the server. The configured TCP server port of the server. .It Ic $SERVER_NAME The name of the server. +.It Ic Pf % Ar n +The capture index +.Ar n +of a string that was captured by the enclosing +.Ic location match +option. .El .It Ic connection Ar option Set the specified options and limits for HTTP connections. @@ -247,6 +277,22 @@ except .Ic location and .Ic tcp . +.It Ic location match Ar path Brq ... +Like the +.Ic location +option, +but +.Ic match +the +.Ar path +using pattern matching instead of shell globbing rules, +see +.Xr patterns 7 . +The pattern may contain captures that can be used in the +.Ar uri +of an enclosed +.Ic block return +option. .It Oo Ic no Oc Ic log Op Ar option Set the specified logging options. Logging is enabled by default using the standard @@ -516,6 +562,7 @@ server "www.example.com" { .Ed .Sh SEE ALSO .Xr htpasswd 1 , +.Xr patterns 7 , .Xr httpd 8 , .Xr slowcgi 8 .Sh AUTHORS diff --git httpd/httpd.h httpd/httpd.h index 1431eaa..ff76281 100644 --- httpd/httpd.h +++ httpd/httpd.h @@ -35,6 +35,8 @@ #include <imsg.h> #include <tls.h> +#include "patterns.h" + #define CONF_FILE "/etc/httpd.conf" #define HTTPD_SOCKET "/var/run/httpd.sock" #define HTTPD_USER "www" @@ -278,6 +280,7 @@ struct client { void *clt_srv_conf; u_int32_t clt_srv_id; struct sockaddr_storage clt_srv_ss; + struct str_match clt_srv_match; int clt_s; in_port_t clt_port; @@ -341,12 +344,15 @@ SPLAY_HEAD(client_tree, client); #define SRVFLAG_NO_AUTH 0x00020000 #define SRVFLAG_BLOCK 0x00040000 #define SRVFLAG_NO_BLOCK 0x00080000 +#define SRVFLAG_LOCATION_MATCH 0x00100000 +#define SRVFLAG_SERVER_MATCH 0x00200000 #define SRVFLAG_BITS \ "\10\01INDEX\02NO_INDEX\03AUTO_INDEX\04NO_AUTO_INDEX" \ "\05ROOT\06LOCATION\07FCGI\10NO_FCGI\11LOG\12NO_LOG\13SOCKET" \ "\14SYSLOG\15NO_SYSLOG\16TLS\17ACCESS_LOG\20ERROR_LOG" \ - "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK" + "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK\25LOCATION_MATCH" \ + "\26SERVER_MATCH" #define TCPFLAG_NODELAY 0x01 #define TCPFLAG_NNODELAY 0x02 diff --git httpd/parse.y httpd/parse.y index 0aae421..f556bf0 100644 --- httpd/parse.y +++ httpd/parse.y @@ -107,7 +107,7 @@ int host_if(const char *, struct addresslist *, int host(const char *, struct addresslist *, int, struct portrange *, const char *, int); void host_free(struct addresslist *); -struct server *server_inherit(struct server *, const char *, +struct server *server_inherit(struct server *, struct server_config *, struct server_config *); int getservice(char *); int is_if_in_group(const char *, const char *); @@ -131,14 +131,14 @@ typedef struct { %token ACCESS ALIAS AUTO BACKLOG BODY BUFFER CERTIFICATE CHROOT CIPHERS COMMON %token COMBINED CONNECTION DHE DIRECTORY ECDHE ERR FCGI INDEX IP KEY LISTEN -%token LOCATION LOG LOGDIR MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS +%token LOCATION LOG LOGDIR MATCH MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS %token REQUEST REQUESTS ROOT SACK SERVER SOCKET STRIP STYLE SYSLOG TCP TIMEOUT %token TLS TYPES %token ERROR INCLUDE AUTHENTICATE WITH BLOCK DROP RETURN PASS %token <v.string> STRING %token <v.number> NUMBER %type <v.port> port -%type <v.number> opttls +%type <v.number> opttls optmatch %type <v.tv> timeout %type <v.string> numberstring optstring %type <v.auth> authopts @@ -200,26 +200,26 @@ main : PREFORK NUMBER { } ; -server : SERVER STRING { +server : SERVER optmatch STRING { struct server *s; if (!loadcfg) { - free($2); + free($3); YYACCEPT; } if ((s = calloc(1, sizeof (*s))) == NULL) fatal("out of memory"); - if (strlcpy(s->srv_conf.name, $2, + if (strlcpy(s->srv_conf.name, $3, sizeof(s->srv_conf.name)) >= sizeof(s->srv_conf.name)) { yyerror("server name truncated"); - free($2); + free($3); free(s); YYERROR; } - free($2); + free($3); strlcpy(s->srv_conf.root, HTTPD_DOCROOT, sizeof(s->srv_conf.root)); @@ -235,7 +235,9 @@ server : SERVER STRING { s->srv_conf.timeout.tv_sec = SERVER_TIMEOUT; s->srv_conf.maxrequests = SERVER_MAXREQUESTS; s->srv_conf.maxrequestbody = SERVER_MAXREQUESTBODY; - s->srv_conf.flags |= SRVFLAG_LOG; + s->srv_conf.flags = SRVFLAG_LOG; + if ($2) + s->srv_conf.flags |= SRVFLAG_SERVER_MATCH; s->srv_conf.logformat = LOG_FORMAT_COMMON; s->srv_conf.tls_protocols = TLS_PROTOCOLS_DEFAULT; if ((s->srv_conf.tls_cert_file = @@ -334,7 +336,7 @@ server : SERVER STRING { continue; if ((sn = server_inherit(srv, - b->name, a)) == NULL) { + b, a)) == NULL) { serverconfig_free(srv_conf); free(srv); YYABORT; @@ -405,30 +407,35 @@ serveroptsl : LISTEN ON STRING opttls port { } if (alias != NULL) { + /* IP-based; use name match flags from parent */ + alias->flags = srv->srv_conf.flags; TAILQ_INSERT_TAIL(&srv->srv_hosts, alias, entry); } } - | ALIAS STRING { + | ALIAS optmatch STRING { struct server_config *alias; if (parentsrv != NULL) { yyerror("alias inside location"); - free($2); + free($3); YYERROR; } if ((alias = calloc(1, sizeof(*alias))) == NULL) fatal("out of memory"); - if (strlcpy(alias->name, $2, sizeof(alias->name)) >= + if (strlcpy(alias->name, $3, sizeof(alias->name)) >= sizeof(alias->name)) { yyerror("server alias truncated"); - free($2); + free($3); free(alias); YYERROR; } - free($2); + free($3); + + if ($2) + alias->flags |= SRVFLAG_SERVER_MATCH; TAILQ_INSERT_TAIL(&srv->srv_hosts, alias, entry); } @@ -456,38 +463,38 @@ serveroptsl : LISTEN ON STRING opttls port { | fastcgi | authenticate | filter - | LOCATION STRING { + | LOCATION optmatch STRING { struct server *s; if (srv->srv_conf.ss.ss_family == AF_UNSPEC) { yyerror("listen address not specified"); - free($2); + free($3); YYERROR; } if (parentsrv != NULL) { - yyerror("location %s inside location", $2); - free($2); + yyerror("location %s inside location", $3); + free($3); YYERROR; } if (!loadcfg) { - free($2); + free($3); YYACCEPT; } if ((s = calloc(1, sizeof (*s))) == NULL) fatal("out of memory"); - if (strlcpy(s->srv_conf.location, $2, + if (strlcpy(s->srv_conf.location, $3, sizeof(s->srv_conf.location)) >= sizeof(s->srv_conf.location)) { yyerror("server location truncated"); - free($2); + free($3); free(s); YYERROR; } - free($2); + free($3); if (strlcpy(s->srv_conf.name, srv->srv_conf.name, sizeof(s->srv_conf.name)) >= @@ -501,6 +508,8 @@ serveroptsl : LISTEN ON STRING opttls port { /* A location entry uses the parent id */ s->srv_conf.parent_id = srv->srv_conf.id; s->srv_conf.flags = SRVFLAG_LOCATION; + if ($2) + s->srv_conf.flags |= SRVFLAG_LOCATION_MATCH; s->srv_s = -1; memcpy(&s->srv_conf.ss, &srv->srv_conf.ss, sizeof(s->srv_conf.ss)); @@ -884,6 +893,10 @@ block : BLOCK { } ; +optmatch : /* empty */ { $$ = 0; } + | MATCH { $$ = 1; } + ; + optstring : /* empty */ { $$ = NULL; } | STRING { $$ = $1; } ; @@ -1108,6 +1121,7 @@ lookup(char *s) { "location", LOCATION }, { "log", LOG }, { "logdir", LOGDIR }, + { "match", MATCH }, { "max", MAXIMUM }, { "no", NO }, { "nodelay", NODELAY }, @@ -1889,7 +1903,7 @@ host_free(struct addresslist *al) } struct server * -server_inherit(struct server *src, const char *name, +server_inherit(struct server *src, struct server_config *alias, struct server_config *addr) { struct server *dst, *s, *dstl; @@ -1927,7 +1941,7 @@ server_inherit(struct server *src, const char *name, } /* Now set alias and listen address */ - strlcpy(dst->srv_conf.name, name, sizeof(dst->srv_conf.name)); + strlcpy(dst->srv_conf.name, alias->name, sizeof(dst->srv_conf.name)); memcpy(&dst->srv_conf.ss, &addr->ss, sizeof(dst->srv_conf.ss)); dst->srv_conf.port = addr->port; dst->srv_conf.prefixlen = addr->prefixlen; @@ -1936,6 +1950,10 @@ server_inherit(struct server *src, const char *name, else dst->srv_conf.flags &= ~SRVFLAG_TLS; + /* Don't inherit the "match" option, use it from the alias */ + dst->srv_conf.flags &= ~SRVFLAG_SERVER_MATCH; + dst->srv_conf.flags |= (alias->flags & SRVFLAG_SERVER_MATCH); + if (server_tls_load_keypair(dst) == -1) { yyerror("failed to load public/private keys " "for server %s", dst->srv_conf.name); @@ -1975,7 +1993,8 @@ server_inherit(struct server *src, const char *name, fatal("out of memory"); memcpy(&dstl->srv_conf, &s->srv_conf, sizeof(dstl->srv_conf)); - strlcpy(dstl->srv_conf.name, name, sizeof(dstl->srv_conf.name)); + strlcpy(dstl->srv_conf.name, alias->name, + sizeof(dstl->srv_conf.name)); /* Copy the new Id and listen address */ dstl->srv_conf.id = ++last_server_id; diff --git httpd/patterns.7 httpd/patterns.7 new file mode 100644 index 0000000..1eeef4c --- /dev/null +++ httpd/patterns.7 @@ -0,0 +1,305 @@ +.\" $OpenBSD$ +.\" +.\" Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> +.\" Copyright (C) 1994-2015 Lua.org, PUC-Rio. +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining +.\" a copy of this software and associated documentation files (the +.\" "Software"), to deal in the Software without restriction, including +.\" without limitation the rights to use, copy, modify, merge, publish, +.\" distribute, sublicense, and/or sell copies of the Software, and to +.\" permit persons to whom the Software is furnished to do so, subject to +.\" the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be +.\" included in all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +.\" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +.\" MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +.\" IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +.\" CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +.\" TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +.\" SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +.\" +.\" Derived from section 6.4.1 in manual.html of Lua 5.3.1: +.\" $Id: manual.of,v 1.151 2015/06/10 21:08:57 roberto Exp $ +.\" +.Dd $Mdocdate: Jun 19 2015 $ +.Dt PATTERNS 7 +.Os +.Sh NAME +.Nm patterns +.Nd Lua's pattern matching rules. +.Sh DESCRIPTION +Pattern matching in +.Xr httpd 8 +is based on the implementation of the Lua scripting language and +provides a simple and fast alternative to Regular expressions (REs) that +are described in +.Xr re_format 7 . +Patterns are described by regular strings, which are interpreted as +patterns by the pattern-matching +.Dq find +and +.Dq match +functions. +This document describes the syntax and the meaning (that is, what they +match) of these strings. +.Sh CHARACTER CLASS +.Pp +A character class is used to represent a set of characters. +The following combinations are allowed in describing a character +class: +.Bl -tag -width Ds +.It Ar x +(where +.Ar x +is not one of the magic characters +.Sq ^$()%.[]*+-? ) +represents the character +.Ar x +itself. +.It . +(a dot) represents all characters. +.It %a +represents all letters. +.It %c +represents all control characters. +.It %d +represents all digits. +.It %g +represents all printable characters except space. +.It %l +represents all lowercase letters. +.It %p +represents all punctuation characters. +.It %s +represents all space characters. +.It %u +represents all uppercase letters. +.It %w +represents all alphanumeric characters. +.It %x +represents all hexadecimal digits. +.It Pf % Ar x +(where +.Ar x +is any non-alphanumeric character) represents the character +.Ar x . +This is the standard way to escape the magic characters. +Any non-alphanumeric character (including all punctuation characters, +even the non-magical) can be preceded by a +.Eq % +when used to represent itself in a pattern. +.It Bq Ar set +represents the class which is the union of all +characters in +.Ar set . +A range of characters can be specified by separating the end +characters of the range, in ascending order, with a +.Sq - . +All classes +.Sq Ar %x +described above can also be used as components in +.Ar set . +All other characters in +.Ar set +represent themselves. +For example, +.Sq [%w_] +(or +.Sq [_%w] ) +represents all alphanumeric characters plus the underscore, +.Sq [0-7] +represents the octal digits, +and +.Sq [0-7%l%-] +represents the octal digits plus the lowercase letters plus the +.Sq - +character. +.Pp +The interaction between ranges and classes is not defined. +Therefore, patterns like +.Sq [%a-z] +or +.Sq [a-%%] +have no meaning. +.It Bq Ar ^set +represents the complement of +.Ar set , +where +.Ar set +is interpreted as above. +.El +.Pp +For all classes represented by single letters ( +.Sq %a , +.Sq %c , +etc.), +the corresponding uppercase letter represents the complement of the class. +For instance, +.Sq %S +represents all non-space characters. +.Pp +The definitions of letter, space, and other character groups depend on +the current locale. +In particular, the class +.Sq [a-z] +may not be equivalent to +.Sq %l . +.Sh PATTERN ITEM +A pattern item can be +.Bl -bullet +.It +a single character class, which matches any single character in the class; +.It +a single character class followed by +.Sq * , +which matches zero or more repetitions of characters in the class. +These repetition items will always match the longest possible sequence; +.It +a single character class followed by +.Sq + , +which matches one or more repetitions of characters in the class. +These repetition items will always match the longest possible sequence; +.It +a single character class followed by +.Sq - , +which also matches zero or more repetitions of characters in the class. +Unlike +.Sq * , +these repetition items will always match the shortest possible sequence; +.It +a single character class followed by +.Sq \? , +which matches zero or one occurrence of a character in the class. +It always matches one occurrence if possible; +.It +.Sq Pf % Ar n , +for +.Ar n +between 1 and 9; +such item matches a substring equal to the n-th captured string (see below); +.It +.Sq Pf %b Ar xy , +where +.Ar x +and +.Ar y +are two distinct characters; +such item matches strings that start with +.Ar x, +end with +.Ar y , +and where the +.Ar x +and +.Ar y +are +.Em balanced . +This means that, if one reads the string from left to right, counting +.Em +1 +for an +.Ar x +and +.Em -1 +for a +.Ar y , +the ending +.Ar y +is the first +.Ar y +where the count reaches 0. +For instance, the item +.Sq %b() +matches expressions with balanced parentheses. +.It +.Sq Pf %f Bq Ar set , +a +.Em frontier pattern ; +such item matches an empty string at any position such that the next +character belongs to +.Ar set +and the previous character does not belong to +.Ar set . +The set +.Ar set +is interpreted as previously described. +The beginning and the end of the subject are handled as if +they were the character +.Sq \e0 . +.El +.Sh PATTERN +A pattern is a sequence of pattern items. +A caret +.Sq ^ +at the beginning of a pattern anchors the match at the beginning of +the subject string. +A +.Sq \$ +at the end of a pattern anchors the match at the end of the subject string. +At other positions, +.Sq ^ +and +.Sq \$ +have no special meaning and represent themselves. +.Sh CAPTURES +A pattern can contain sub-patterns enclosed in parentheses; they +describe captures. +When a match succeeds, the substrings of the subject string that match +captures are stored (captured) for future use. +Captures are numbered according to their left parentheses. +For instance, in the pattern +.Qq (a*(.)%w(%s*)) , +the part of the string matching +.Qq a*(.)%w(%s*) +is stored as the first capture (and therefore has number 1); +the character matching +.So \. Sc +is captured with number 2, +and the part matching +.Qq %s* +has number 3. +.Pp +As a special case, the empty capture +.Sq () +captures the current string position (a number). +For instance, if we apply the pattern +.Qq ()aa() +on the string +.Qq flaaap , +there will be two captures: 3 and 5. +.Sh SEE ALSO +.Xr fnmatch 3 , +.Xr re_format 3 , +.Xr httpd 8 . +.Rs +.%A Roberto Ierusalimschy +.%A Luiz Henrique de Figueiredo +.%A Waldemar Celes +.%Q Lua.org +.%Q PUC-Rio +.%D June 2015 +.%R Lua 5.3 Reference Manual +.%T Patterns +.%U http://www.lua.org/manual/5.3/manual.html#6.4.1 +.Re +.Sh HISTORY +The first implementation of the pattern rules were introduced with Lua 2.5. +Almost twenty years later, +an implementation based on Lua 5.3.1 appeared in +.Ox 5.8 . +.Sh AUTHORS +The pattern matching is derived from the original implementation of +the Lua scripting language, that is written by +.An -nosplit +.An Roberto Ierusalimschy , +.An Waldemar Celes , +and +.An Luiz Henrique de Figueiredo +at PUC-Rio. +It was turned into a native C API for +.Xr httpd 8 +by +.An Reyk Floeter Aq Mt reyk@openbsd.org . diff --git httpd/patterns.c httpd/patterns.c new file mode 100644 index 0000000..b7cb381 --- /dev/null +++ httpd/patterns.c @@ -0,0 +1,695 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> + * Copyright (C) 1994-2015 Lua.org, PUC-Rio. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Derived from Lua 5.3.1: + * $Id: lstrlib.c,v 1.229 2015/05/20 17:39:23 roberto Exp $ + * Standard library for string operations and pattern-matching + */ + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <assert.h> + +#include "patterns.h" + +#define uchar(c) ((unsigned char)(c)) /* macro to 'unsign' a char */ +#define CAP_UNFINISHED (-1) +#define CAP_POSITION (-2) +#define L_ESC '%' +#define SPECIALS "^$*+?.([%-" + +struct match_state { + int matchdepth; /* control for recursive depth (to avoid C + * stack overflow) */ + int maxcaptures; /* configured capture limit */ + const char *src_init; /* init of source string */ + const char *src_end; /* end ('\0') of source string */ + const char *p_end; /* end ('\0') of pattern */ + const char *error; /* should be NULL */ + int level; /* total number of captures (finished or + * unfinished) */ + struct { + const char *init; + ptrdiff_t len; + } capture[MAXCAPTURES]; +}; + +/* recursive function */ +static const char *match(struct match_state *, const char *, const char *); + +static int +match_error(struct match_state *ms, const char *error) +{ + ms->error = ms->error == NULL ? error : ms->error; + return (-1); +} + +static int +check_capture(struct match_state *ms, int l) +{ + l -= '1'; + if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) + return match_error(ms, "invalid capture index"); + return (l); +} + +static int +capture_to_close(struct match_state *ms) +{ + int level = ms->level; + for (level--; level >= 0; level--) + if (ms->capture[level].len == CAP_UNFINISHED) + return (level); + return match_error(ms, "invalid pattern capture"); +} + +static const char * +classend(struct match_state *ms, const char *p) +{ + switch (*p++) { + case L_ESC: + if (p == ms->p_end) + match_error(ms, + "malformed pattern (ends with '%%')"); + return p + 1; + case '[': + if (*p == '^') + p++; + do { + /* look for a ']' */ + if (p == ms->p_end) { + match_error(ms, + "malformed pattern (missing ']')"); + break; + } + if (*(p++) == L_ESC && p < ms->p_end) { + /* skip escapes (e.g. '%]') */ + p++; + } + } while (*p != ']'); + return p + 1; + default: + return p; + } +} + +static int +match_class(int c, int cl) +{ + int res; + switch (tolower(cl)) { + case 'a': + res = isalpha(c); + break; + case 'c': + res = iscntrl(c); + break; + case 'd': + res = isdigit(c); + break; + case 'g': + res = isgraph(c); + break; + case 'l': + res = islower(c); + break; + case 'p': + res = ispunct(c); + break; + case 's': + res = isspace(c); + break; + case 'u': + res = isupper(c); + break; + case 'w': + res = isalnum(c); + break; + case 'x': + res = isxdigit(c); + break; + case 'z': + res = (c == 0); + break; /* deprecated option */ + default: + return (cl == c); + } + return (islower(cl) ? res : !res); +} + +static int +matchbracketclass(int c, const char *p, const char *ec) +{ + int sig = 1; + if (*(p + 1) == '^') { + sig = 0; + /* skip the '^' */ + p++; + } + while (++p < ec) { + if (*p == L_ESC) { + p++; + if (match_class(c, uchar(*p))) + return sig; + } else if ((*(p + 1) == '-') && (p + 2 < ec)) { + p += 2; + if (uchar(*(p - 2)) <= c && c <= uchar(*p)) + return sig; + } else if (uchar(*p) == c) + return sig; + } + return !sig; +} + +static int +singlematch(struct match_state *ms, const char *s, const char *p, + const char *ep) +{ + if (s >= ms->src_end) + return 0; + else { + int c = uchar(*s); + switch (*p) { + case '.': + /* matches any char */ + return (1); + case L_ESC: + return match_class(c, uchar(*(p + 1))); + case '[': + return matchbracketclass(c, p, ep - 1); + default: + return (uchar(*p) == c); + } + } +} + +static const char * +matchbalance(struct match_state *ms, const char *s, const char *p) +{ + if (p >= ms->p_end - 1) { + match_error(ms, + "malformed pattern (missing arguments to '%%b')"); + return (NULL); + } + if (*s != *p) + return (NULL); + else { + int b = *p; + int e = *(p + 1); + int cont = 1; + while (++s < ms->src_end) { + if (*s == e) { + if (--cont == 0) + return s + 1; + } else if (*s == b) + cont++; + } + } + + /* string ends out of balance */ + return (NULL); +} + +static const char * +max_expand(struct match_state *ms, const char *s, const char *p, const char *ep) +{ + ptrdiff_t i = 0; + /* counts maximum expand for item */ + while (singlematch(ms, s + i, p, ep)) + i++; + /* keeps trying to match with the maximum repetitions */ + while (i >= 0) { + const char *res = match(ms, (s + i), ep + 1); + if (res) + return res; + /* else didn't match; reduce 1 repetition to try again */ + i--; + } + return NULL; +} + +static const char * +min_expand(struct match_state *ms, const char *s, const char *p, const char *ep) +{ + for (;;) { + const char *res = match(ms, s, ep + 1); + if (res != NULL) + return res; + else if (singlematch(ms, s, p, ep)) + s++; /* try with one more repetition */ + else + return NULL; + } +} + +static const char * +start_capture(struct match_state *ms, const char *s, const char *p, int what) +{ + const char *res; + + int level = ms->level; + if (level >= ms->maxcaptures) + match_error(ms, "too many captures"); + ms->capture[level].init = s; + ms->capture[level].len = what; + ms->level = level + 1; + /* undo capture if match failed */ + if ((res = match(ms, s, p)) == NULL) + ms->level--; + return res; +} + +static const char * +end_capture(struct match_state *ms, const char *s, const char *p) +{ + int l = capture_to_close(ms); + const char *res; + /* close capture */ + ms->capture[l].len = s - ms->capture[l].init; + /* undo capture if match failed */ + if ((res = match(ms, s, p)) == NULL) + ms->capture[l].len = CAP_UNFINISHED; + return res; +} + +static const char * +match_capture(struct match_state *ms, const char *s, int l) +{ + size_t len; + l = check_capture(ms, l); + len = ms->capture[l].len; + if ((size_t) (ms->src_end - s) >= len && + memcmp(ms->capture[l].init, s, len) == 0) + return s + len; + else + return NULL; +} + +static const char * +match(struct match_state *ms, const char *s, const char *p) +{ + const char *ep, *res; + char previous; + + if (ms->matchdepth-- == 0) + match_error(ms, "pattern too complex"); + + /* using goto's to optimize tail recursion */ + init: + /* end of pattern? */ + if (p != ms->p_end) { + switch (*p) { + case '(': + /* start capture */ + if (*(p + 1) == ')') + /* position capture? */ + s = start_capture(ms, s, p + 2, CAP_POSITION); + else + s = start_capture(ms, s, p + 1, CAP_UNFINISHED); + break; + case ')': + /* end capture */ + s = end_capture(ms, s, p + 1); + break; + case '$': + /* is the '$' the last char in pattern? */ + if ((p + 1) != ms->p_end) { + /* no; go to default */ + goto dflt; + } + /* check end of string */ + s = (s == ms->src_end) ? s : NULL; + break; + case L_ESC: + /* escaped sequences not in the format class[*+?-]? */ + switch (*(p + 1)) { + case 'b': + /* balanced string? */ + s = matchbalance(ms, s, p + 2); + if (s != NULL) { + p += 4; + /* return match(ms, s, p + 4); */ + goto init; + } /* else fail (s == NULL) */ + break; + case 'f': + /* frontier? */ + p += 2; + if (*p != '[') + match_error(ms, "missing '['" + " after '%%f' in pattern"); + /* points to what is next */ + ep = classend(ms, p); + previous = + (s == ms->src_init) ? '\0' : *(s - 1); + if (!matchbracketclass(uchar(previous), + p, ep - 1) && + matchbracketclass(uchar(*s), + p, ep - 1)) { + p = ep; + /* return match(ms, s, ep); */ + goto init; + } + /* match failed */ + s = NULL; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* capture results (%0-%9)? */ + s = match_capture(ms, s, uchar(*(p + 1))); + if (s != NULL) { + p += 2; + /* return match(ms, s, p + 2) */ + goto init; + } + break; + default: + goto dflt; + } + break; + default: + + /* pattern class plus optional suffix */ + dflt: + /* points to optional suffix */ + ep = classend(ms, p); + + /* does not match at least once? */ + if (!singlematch(ms, s, p, ep)) { + /* accept empty? */ + if (*ep == '*' || *ep == '?' || *ep == '-') { + p = ep + 1; + /* return match(ms, s, ep + 1); */ + goto init; + } else { + /* '+' or no suffix */ + s = NULL; /* fail */ + } + } else { + /* matched once */ + /* handle optional suffix */ + switch (*ep) { + case '?': + /* optional */ + if ((res = + match(ms, s + 1, ep + 1)) != NULL) + s = res; + else { + /* + * else return + * match(ms, s, ep + 1); + */ + p = ep + 1; + goto init; + } + break; + case '+': + /* 1 or more repetitions */ + s++; /* 1 match already done */ + /* FALLTHROUGH */ + case '*': + /* 0 or more repetitions */ + s = max_expand(ms, s, p, ep); + break; + case '-': + /* 0 or more repetitions (minimum) */ + s = min_expand(ms, s, p, ep); + break; + default: + /* no suffix */ + s++; + p = ep; + /* return match(ms, s + 1, ep); */ + goto init; + } + } + break; + } + } + ms->matchdepth++; + return s; +} + +static const char * +lmemfind(const char *s1, size_t l1, + const char *s2, size_t l2) +{ + const char *init; + + if (l2 == 0) { + /* empty strings are everywhere */ + return (s1); + } else if (l2 > l1) { + /* avoids a negative 'l1' */ + return (NULL); + } else { + /* + * to search for a '*s2' inside 's1' + * - 1st char will be checked by 'memchr' + * - 's2' cannot be found after that + */ + l2--; + l1 = l1 - l2; + while (l1 > 0 && + (init = (const char *)memchr(s1, *s2, l1)) != NULL) { + /* 1st char is already checked */ + init++; + if (memcmp(init, s2 + 1, l2) == 0) + return init - 1; + else { + /* correct 'l1' and 's1' to try again */ + l1 -= init - s1; + s1 = init; + } + } + /* not found */ + return (NULL); + } +} + +static int +push_onecapture(struct match_state *ms, int i, const char *s, + const char *e, struct str_find *sm) +{ + if (i >= ms->level) { + if (i == 0 || ms->level == 0) { + /* add whole match */ + sm->sm_so = (off_t)(s - ms->src_init); + sm->sm_eo = (off_t)(e - s) + sm->sm_so; + } else + return match_error(ms, "invalid capture index"); + } else { + ptrdiff_t l = ms->capture[i].len; + if (l == CAP_UNFINISHED) + match_error(ms, "unfinished capture"); + sm->sm_so = ms->capture[i].init - ms->src_init; + sm->sm_eo = sm->sm_so + l; + } + sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo; + return (0); +} + +static int +push_captures(struct match_state *ms, const char *s, const char *e, + struct str_find *sm, size_t nsm) +{ + unsigned int i; + unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level; + + if (nlevels > nsm) + nlevels = nsm; + for (i = 0; i < nlevels; i++) + if (push_onecapture(ms, i, s, e, sm + i) == -1) + break; + + /* number of strings pushed */ + return (nlevels); +} + +/* check whether pattern has no special characters */ +static int +nospecials(const char *p, size_t l) +{ + size_t upto = 0; + + do { + if (strpbrk(p + upto, SPECIALS)) { + /* pattern has a special character */ + return 0; + } + /* may have more after \0 */ + upto += strlen(p + upto) + 1; + } while (upto <= l); + + /* no special chars found */ + return (1); +} + +static int +str_find_aux(struct match_state *ms, const char *pattern, const char *string, + struct str_find *sm, size_t nsm, off_t init) +{ + size_t ls = strlen(string); + size_t lp = strlen(pattern); + const char *s = string; + const char *p = pattern; + const char *s1, *s2; + int anchor, i; + + if (init < 0) + init = 0; + else if (init > (off_t)ls) + return match_error(ms, "starting after string's end"); + s1 = s + init; + + if (nospecials(p, lp)) { + /* do a plain search */ + s2 = lmemfind(s1, ls - (size_t)init, p, lp); + if (s2 != NULL) { + i = 0; + sm[i].sm_so = 0; + sm[i].sm_eo = ls; + if (nsm > 1) { + i++; + sm[i].sm_so = s2 - s; + sm[i].sm_eo = (s2 - s) + lp; + } + return (i + 1); + } + return (0); + } + + anchor = (*p == '^'); + if (anchor) { + p++; + lp--; /* skip anchor character */ + } + ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1; + ms->matchdepth = MAXCCALLS; + ms->src_init = s; + ms->src_end = s + ls; + ms->p_end = p + lp; + do { + const char *res; + ms->level = 0; + assert(ms->matchdepth == MAXCCALLS); + if ((res = match(ms, s1, p)) != NULL) { + sm->sm_so = 0; + sm->sm_eo = ls; + return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1; + } + } while (s1++ < ms->src_end && !anchor); + + return 0; +} + +int +str_find(const char *string, const char *pattern, struct str_find *sm, + size_t nsm, const char **errstr) +{ + struct match_state ms; + int ret; + + memset(&ms, 0, sizeof(ms)); + memset(sm, 0, nsm * sizeof(*sm)); + + ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); + if (ms.error != NULL) { + /* Return 0 on error and store the error string */ + *errstr = ms.error; + ret = 0; + } else + *errstr = NULL; + + return (ret); +} + +int +str_match(const char *string, const char *pattern, struct str_match *m, + const char **errstr) +{ + struct str_find sm[MAXCAPTURES]; + struct match_state ms; + int ret, i; + size_t len, nsm; + + nsm = MAXCAPTURES; + memset(&ms, 0, sizeof(ms)); + memset(sm, 0, sizeof(sm)); + memset(m, 0, sizeof(*m)); + + ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); + if (ret == 0 || ms.error != NULL) { + /* Return 0 on error and store the error string */ + *errstr = ms.error; + return (-1); + } + + if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) { + *errstr = strerror(errno); + return (-1); + } + m->sm_nmatch = ret; + + for (i = 0; i < ret; i++) { + if (sm[i].sm_so > sm[i].sm_eo) + continue; + len = sm[i].sm_eo - sm[i].sm_so; + if ((m->sm_match[i] = calloc(1, + len + 1)) == NULL) { + *errstr = strerror(errno); + str_match_free(m); + return (-1); + } + (void)memcpy(m->sm_match[i], + string + sm[i].sm_so, len); + } + + *errstr = NULL; + return (0); +} + +void +str_match_free(struct str_match *m) +{ + unsigned int i = 0; + for (i = 0; i < m->sm_nmatch; i++) + free(m->sm_match[i]); + free(m->sm_match); + m->sm_nmatch = 0; +} diff --git httpd/patterns.h httpd/patterns.h new file mode 100644 index 0000000..ddda0dd --- /dev/null +++ httpd/patterns.h @@ -0,0 +1,46 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/cdefs.h> + +#ifndef PATTERNS_H +#define PATTERNS_H + +#define MAXCAPTURES 32 /* Max no. of allowed captures in pattern */ +#define MAXCCALLS 200 /* Max recusion depth in pattern matching */ + +struct str_find { + off_t sm_so; /* start offset of match */ + off_t sm_eo; /* end offset of match */ +}; + +struct str_match { + char **sm_match; /* allocated array of matched strings */ + unsigned int sm_nmatch; /* number of elements in array */ +}; + +__BEGIN_DECLS +int str_find(const char *, const char *, struct str_find *, size_t, + const char **); +int str_match(const char *, const char *, struct str_match *, + const char **); +void str_match_free(struct str_match *); +__END_DECLS + +#endif /* PATTERNS_H */ diff --git httpd/server_http.c httpd/server_http.c index 99d5267..26d5392 100644 --- httpd/server_http.c +++ httpd/server_http.c @@ -29,14 +29,16 @@ #include <string.h> #include <unistd.h> #include <limits.h> +#include <fnmatch.h> #include <stdio.h> #include <time.h> #include <resolv.h> #include <event.h> -#include <fnmatch.h> +#include <ctype.h> #include "httpd.h" #include "http.h" +#include "patterns.h" static int server_httpmethod_cmp(const void *, const void *); static int server_httperror_cmp(const void *, const void *); @@ -633,6 +635,7 @@ server_reset_http(struct client *clt) clt->clt_remote_user = NULL; clt->clt_bev->readcb = server_read_http; clt->clt_srv_conf = &srv->srv_conf; + str_match_free(&clt->clt_srv_match); } ssize_t @@ -738,7 +741,7 @@ server_abort_http(struct client *clt, u_int code, const char *msg) const char *httperr = NULL, *style; char *httpmsg, *body = NULL, *extraheader = NULL; char tmbuf[32], hbuf[128]; - char buf[IBUF_READ_SIZE], *ptr = NULL; + char buf[IBUF_READ_SIZE]; int bodylen; if (code == 0) { @@ -770,16 +773,13 @@ server_abort_http(struct client *clt, u_int code, const char *msg) if (msg == NULL) break; memset(buf, 0, sizeof(buf)); - if ((ptr = server_expand_http(clt, msg, - buf, sizeof(buf))) == NULL) + if (server_expand_http(clt, msg, buf, sizeof(buf)) == NULL) goto done; - if ((ptr = url_encode(ptr)) == NULL) - goto done; - if (asprintf(&extraheader, "Location: %s\r

", ptr) == -1) { + if (asprintf(&extraheader, "Location: %s\r

", buf) == -1) { code = 500; extraheader = NULL; } - msg = ptr; + msg = buf; break; case 401: if (asprintf(&extraheader, @@ -858,7 +858,6 @@ server_abort_http(struct client *clt, u_int code, const char *msg) server_close(clt, httpmsg); free(httpmsg); } - free(ptr); } void @@ -877,6 +876,8 @@ server_close_http(struct client *clt) clt->clt_descresp = NULL; free(clt->clt_remote_user); clt->clt_remote_user = NULL; + + str_match_free(&clt->clt_srv_match); } char * @@ -885,14 +886,41 @@ server_expand_http(struct client *clt, const char *val, char *buf, { struct http_descriptor *desc = clt->clt_descreq; struct server_config *srv_conf = clt->clt_srv_conf; - char ibuf[128], *str; + char ibuf[128], *str, *path; + const char *errstr = NULL, *p; + size_t size; + int n, ret; if (strlcpy(buf, val, len) >= len) return (NULL); + /* Find previously matched substrings by index */ + for (p = val; clt->clt_srv_match.sm_nmatch && + (p = strstr(p, "%")) != NULL; p++) { + if (!isdigit(*(p + 1))) + continue; + + /* Copy number, leading '%' char and add trailing \0 */ + size = strspn(p + 1, "0123456789") + 2; + if (size >= sizeof(ibuf)) + return (NULL); + (void)strlcpy(ibuf, p, size); + n = strtonum(ibuf + 1, 0, + clt->clt_srv_match.sm_nmatch - 1, &errstr); + if (errstr != NULL) + return (NULL); + + /* Expand variable with matched value */ + if (expand_string(buf, len, ibuf, + clt->clt_srv_match.sm_match[n]) != 0) + return (NULL); + } if (strstr(val, "$DOCUMENT_URI") != NULL) { - if (expand_string(buf, len, "$DOCUMENT_URI", - desc->http_path) != 0) + if ((path = url_encode(desc->http_path)) == NULL) + return (NULL); + ret = expand_string(buf, len, "$DOCUMENT_URI", path); + free(path); + if (ret != 0) return (NULL); } if (strstr(val, "$QUERY_STRING") != NULL) { @@ -929,17 +957,22 @@ server_expand_http(struct client *clt, const char *val, char *buf, } } if (strstr(val, "$REQUEST_URI") != NULL) { + if ((path = url_encode(desc->http_path)) == NULL) + return (NULL); if (desc->http_query == NULL) { - if ((str = strdup(desc->http_path)) == NULL) + str = path; + } else { + ret = asprintf(&str, "%s?%s", + path, desc->http_query); + free(path); + if (ret == -1) return (NULL); - } else if (asprintf(&str, "%s?%s", - desc->http_path, desc->http_query) == -1) - return (NULL); - if (expand_string(buf, len, "$REQUEST_URI", str) != 0) { - free(str); - return (NULL); } + + ret = expand_string(buf, len, "$REQUEST_URI", str); free(str); + if (ret != 0) + return (NULL); } if (strstr(val, "$SERVER_") != NULL) { if (strstr(val, "$SERVER_ADDR") != NULL) { @@ -977,8 +1010,10 @@ server_response(struct httpd *httpd, struct client *clt) struct server *srv = clt->clt_srv; struct server_config *srv_conf = &srv->srv_conf; struct kv *kv, key, *host; - int portval = -1; + struct str_find sm; + int portval = -1, ret; char *hostval; + const char *errstr = NULL; /* Canonicalize the request path */ if (desc->http_path == NULL || @@ -1038,9 +1073,17 @@ server_response(struct httpd *httpd, struct client *clt) hostname); } #endif - if ((srv_conf->flags & SRVFLAG_LOCATION) == 0 && - fnmatch(srv_conf->name, hostname, - FNM_CASEFOLD) == 0 && + if (srv_conf->flags & SRVFLAG_LOCATION) + continue; + else if (srv_conf->flags & SRVFLAG_SERVER_MATCH) { + str_find(hostname, srv_conf->name, + &sm, 1, &errstr); + ret = errstr == NULL ? 0 : -1; + } else { + ret = fnmatch(srv_conf->name, + hostname, FNM_CASEFOLD); + } + if (ret == 0 && (portval == -1 || (portval != -1 && portval == srv_conf->port))) { /* Replace host configuration */ @@ -1110,6 +1153,8 @@ server_getlocation(struct client *clt, const char *path) { struct server *srv = clt->clt_srv; struct server_config *srv_conf = clt->clt_srv_conf, *location; + const char *errstr = NULL; + int ret; /* Now search for the location */ TAILQ_FOREACH(location, &srv->srv_hosts, entry) { @@ -1120,11 +1165,20 @@ server_getlocation(struct client *clt, const char *path) } #endif if ((location->flags & SRVFLAG_LOCATION) && - location->parent_id == srv_conf->parent_id && - fnmatch(location->location, path, FNM_CASEFOLD) == 0) { - /* Replace host configuration */ - clt->clt_srv_conf = srv_conf = location; - break; + location->parent_id == srv_conf->parent_id) { + errstr = NULL; + if (location->flags & SRVFLAG_LOCATION_MATCH) { + ret = str_match(path, location->location, + &clt->clt_srv_match, &errstr); + } else { + ret = fnmatch(location->location, + path, FNM_CASEFOLD); + } + if (ret == 0 && errstr == NULL) { + /* Replace host configuration */ + clt->clt_srv_conf = srv_conf = location; + break; + } } } [prev in list] [next in list] [ prev in thread ] [next in thread]