blob: 9a5c00412915bf34de178ffc6b751965401e3912 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- A simple sequence matching facility. ---*/
/*--- m_seqmatch.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2008-2011 OpenWorks Ltd
info@open-works.co.uk
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
#include "pub_core_basics.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcbase.h" // VG_(strlen)
#include "pub_core_seqmatch.h" // self
/* ---------------------------------------------------------------------
A simple sequence matching facility
------------------------------------------------------------------ */
/* See detailed comment in include/pub_tool_seqmatch.h about this. */
Bool VG_(generic_match) (
Bool matchAll,
void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt,
void* input, SizeT szbInput, UWord nInput, UWord ixInput,
Bool (*pIsStar)(void*),
Bool (*pIsQuery)(void*),
Bool (*pattEQinp)(void*,void*)
)
{
/* This is the spec, written in my favourite formal specification
language. It specifies non-greedy matching of '*'s.
ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
ma ('*':ps) [] = ma ps []
ma ('?':ps) (i:is) = ma ps is
ma ('?':ps) [] = False
ma (p:ps) (i:is) = p == i && ma ps is
ma (p:ps) [] = False
ma [] (i:is) = False -- m-all, True for m-prefix
ma [] [] = True
*/
Bool havePatt, haveInput;
void *currPatt, *currInput;
tailcall:
vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */
vg_assert(nInput >= 0 && nInput < 1000000); /* arbitrary */
vg_assert(ixPatt >= 0 && ixPatt <= nPatt);
vg_assert(ixInput >= 0 && ixInput <= nInput);
havePatt = ixPatt < nPatt;
haveInput = ixInput < nInput;
/* No specific need to set NULL when !have{Patt,Input}, but guards
against inadvertantly dereferencing an out of range pointer to
the pattern or input arrays. */
currPatt = havePatt ? ((Char*)patt) + szbPatt * ixPatt : NULL;
currInput = haveInput ? ((Char*)input) + szbInput * ixInput : NULL;
// Deal with the complex case first: wildcards. Do frugal
// matching. When encountering a '*', first skip no characters
// at all, and see if the rest of the match still works. Only if
// that fails do we then skip a character, and retry at the next
// position.
//
// ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
//
// If we're out of input, check the rest of the pattern matches
// the empty input. This really means can only be be empty or
// composed entirely of '*'s.
//
// ma ('*':ps) [] = ma ps []
//
if (havePatt && pIsStar(currPatt)) {
if (haveInput) {
// ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
// we unavoidably have to make a real recursive call for the
// first half of the OR, since this isn't straight tail-recursion.
if (VG_(generic_match)( matchAll,
patt, szbPatt, nPatt, ixPatt+1,
input,szbInput,nInput, ixInput+0,
pIsStar,pIsQuery,pattEQinp) ) {
return True;
}
// but we can tail-recurse for the second call
ixInput++; goto tailcall;
} else {
// ma ('*':ps) [] = ma ps []
ixPatt++; goto tailcall;
}
}
// simpler cases now. Deal with '?' wildcards.
//
// ma ('?':ps) (i:is) = ma ps is
// ma ('?':ps) [] = False
if (havePatt && pIsQuery(currPatt)) {
if (haveInput) {
ixPatt++; ixInput++; goto tailcall;
} else {
return False;
}
}
// obvious case with literal chars in the pattern
//
// ma (p:ps) (i:is) = p == i && ma ps is
if (havePatt && haveInput) {
if (!pattEQinp(currPatt,currInput)) return False;
ixPatt++; ixInput++; goto tailcall;
}
// if we run out of input before we run out of pattern, we must fail
// ma (p:ps) [] = False
if (havePatt && !haveInput) return False;
// if we run out of pattern before we run out of input, the
// verdict depends on the matching mode. If we are trying to
// match exactly (the pattern must consume the entire input)
// then the outcome is failure. However, if we're merely attempting
// to match some prefix of the input, then we have been successful.
//
// ma [] (i:is) = False -- m-all, True for m-prefix
if (!havePatt && haveInput) {
return matchAll ? False // match-all
: True; // match-prefix
}
// finally, if both sequence and input are both completely
// consumed, then we were successful, regardless of matching mode.
if (!havePatt && !haveInput) return True;
// end of cases
vg_assert(0);
}
/* And a parameterization of the above, to make it do
string matching.
*/
static Bool charIsStar ( void* pV ) { return *(Char*)pV == '*'; }
static Bool charIsQuery ( void* pV ) { return *(Char*)pV == '?'; }
static Bool char_p_EQ_i ( void* pV, void* cV ) {
Char p = *(Char*)pV;
Char c = *(Char*)cV;
vg_assert(p != '*' && p != '?');
return p == c;
}
Bool VG_(string_match) ( const Char* patt, const Char* input )
{
return VG_(generic_match)(
True/* match-all */,
(void*)patt, sizeof(UChar), VG_(strlen)(patt), 0,
(void*)input, sizeof(UChar), VG_(strlen)(input), 0,
charIsStar, charIsQuery, char_p_EQ_i
);
}
// test cases for the matcher (in match-all mode)
// typedef struct { char* patt; char* input; Bool xres; } Test;
//
//static Test tests[] =
// {
// { "" ,"" , True },
// { "a" ,"" , False },
// { "a" ,"b" , False },
// { "a" ,"a" , True },
// { "a" ,"aa" , False },
// { "*" ,"" , True },
// { "**" ,"" , True },
// { "*" ,"abc", True },
// { "*a" ,"abc", False },
// { "*b" ,"abc", False },
// { "*bc" ,"abc", True },
// { "a*b" ,"abc", False },
// { "a*c" ,"abc", True },
// { "*c" ,"abc", True },
// { "c*c" ,"abc", False },
// { "abc*" ,"abc", True },
// { "abc**" ,"abc", True },
// { "**abc" ,"abc", True },
// { "**a*b*c**" ,"abc", True },
// { "**a*b*d**" ,"abc", False },
// { "a?b" ,"abc", False },
// { "a?c" ,"abc", True },
// { "?" ,"" , False },
// { "?" ,"a" , True },
// { "?" ,"ab" , False },
// { "abcd" ,"abc", False },
// { "ab" ,"abc", False },
// { NULL ,NULL , False }
// };
//
//int main ( void )
//{
// Test* t;
// for (t = tests; t->patt; t++) {
// printf("%10s %6s %s\n",
// t->patt, t->input,
// match_string_all((UChar*)t->patt,(UChar*)t->input,True)
// == t->xres
// ? "pass" : "FAIL" );
// }
// return 0;
//}
/*--------------------------------------------------------------------*/
/*--- end m_seqmatch.c ---*/
/*--------------------------------------------------------------------*/