// $Id: RE.h,v 1.3 2005/03/08 15:26:41 vern Exp $
//
// Copyright (c) 1998, 1999, 2001, 2002
//      The Regents of the University of California.  All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that: (1) source code distributions
// retain the above copyright notice and this paragraph in its entirety, (2)
// distributions including binary code include the above copyright notice and
// this paragraph in its entirety in the documentation or other materials
// provided with the distribution, and (3) all advertising materials mentioning
// features or use of this software display the following acknowledgement:
// ``This product includes software developed by the University of California,
// Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
// the University nor the names of its contributors may be used to endorse
// or promote products derived from this software without specific prior
// written permission.
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

#ifndef re_h
#define re_h

#include "Obj.h"
#include "Dict.h"
#include "BroString.h"
#include "CCL.h"
#include "EquivClass.h"

#include <ctype.h>
typedef int (*cce_func)(int);

class CCL;
class NFA_Machine;
class DFA_Machine;
class Specific_RE_Matcher;
class RE_Matcher;

declare(PDict,char);
declare(PDict,CCL);
declare(PList,CCL);

extern int case_insensitive;
extern CCL* curr_ccl;
extern NFA_Machine* nfa;
extern Specific_RE_Matcher* rem;
extern const char* RE_parse_input;

extern int re_lex(void);
extern int clower(int);
extern void synerr(char str[]);

typedef int_list AcceptingSet;
typedef name_list string_list;

typedef enum { MATCH_ANYWHERE, MATCH_EXACTLY, } match_type;

// A "specific" RE matcher will match one type of pattern: either
// MATCH_ANYWHERE or MATCH_EXACTLY.

class Specific_RE_Matcher {
public:
	Specific_RE_Matcher(match_type mt, int multiline=0);
	~Specific_RE_Matcher();

	void AddPat(const char* pat);

	void SetPat(const char* pat)	{ pattern_text = copy_string(pat); }

	int Compile(int lazy = 0);

	// The following is vestigial from flex's use of "{name}" definitions.
	// It's here because at some point we may want to support such
	// functionality.
	const char* LookupDef(const char* def);

	void InsertCCL(const char* txt, CCL* ccl) { ccl_dict.Insert(txt, ccl); }
	int InsertCCL(CCL* ccl)
		{
		ccl_list.append(ccl);
		return ccl_list.length() - 1;
		}
	CCL* LookupCCL(const char* txt)	{ return ccl_dict.Lookup(txt); }
	CCL* LookupCCL(int index)	{ return ccl_list[index]; }
	CCL* AnyCCL();

	void ConvertCCLs();

	int MatchAll(const char* s);
	int MatchAll(const BroString* s);

	// Compiles a set of regular expressions simultaniously.
	// 'idx' contains indizes associated with the expressions.
	// On matching, the set of indizes is returned which correspond
	// to the matching expressions.  (idx must not contain zeros).
	int CompileSet(const string_list& set, const int_list& idx);

	// Returns the position in s just beyond where the first match
	// occurs, or 0 if there is no such position in s.  Note that
	// if the pattern matches empty strings, matching continues
	// in an attempt to match at least one character.
	int Match(const char* s);
	int Match(const BroString* s);

	int LongestMatch(const char* s);
	int LongestMatch(const BroString* s);
	int LongestMatch(byte_vec bv, int n);

	EquivClass* EC()		{ return &equiv_class; }

	const char* PatternText() const	{ return pattern_text; }

	DFA_Machine* DFA() const		{ return dfa; }

	void Dump(FILE* f);

	unsigned int MemoryAllocation() const;

protected:
	void AddAnywherePat(const char* paT);
	void AddExactPat(const char* pat);

	int MatchAll(byte_vec bv, int n);
	int Match(byte_vec bv, int n);

	match_type mt;
	int multiline;
	char* pattern_text;

	PDict(char) defs;
	PDict(CCL) ccl_dict;
	PList(CCL) ccl_list;
	EquivClass equiv_class;
	int* ecs;
	DFA_Machine* dfa;
	CCL* any_ccl;
	AcceptingSet* accepted;
};

#ifdef EXPIRE_DFA_STATES
	class DFA_State_Handle;
#else
	class DFA_State;
	typedef DFA_State DFA_State_Handle;
#endif

class RE_Match_State {
public:
	RE_Match_State(Specific_RE_Matcher* matcher)
		{
		dfa = matcher->DFA() ? matcher->DFA() : 0;
		ecs = matcher->EC()->EquivClasses();
		current_pos = -1;
		current_state = 0;
		}

	~RE_Match_State();

	const AcceptingSet* Accepted() const	{ return &accepted; }
	const int_list* MatchPositions() const	{ return &match_pos; }

	// Returns the number of bytes feeded into the matcher so far
	int Length()	{ return current_pos; }

	// Returns true if this inputs leads to at least one new match
	bool Match(const u_char* bv, int n, bool bol, bool eol);

	void Clear()
		{
		current_pos = -1;
		current_state = 0;
		accepted.clear();
		match_pos.clear();
		}

protected:
	DFA_Machine* dfa;
	int* ecs;

	AcceptingSet accepted;
	int_list match_pos;
	DFA_State_Handle* current_state;
	int current_pos;
};

class RE_Matcher : SerialObj {
public:
	RE_Matcher();
	RE_Matcher(const char* pat);
	~RE_Matcher();

	void AddDef(const char* defn_name, const char* defn_val);
	void AddPat(const char* pat);

	int Compile(int lazy = 0);

	// Returns true if s exactly matches the pattern, false otherwise.
	int MatchExactly(const char* s)
		{ return re_exact->MatchAll(s); }
	int MatchExactly(const BroString* s)
		{ return re_exact->MatchAll(s); }

	// Returns the position in s just beyond where the first match
	// occurs, or 0 if there is no such position in s.  Note that
	// if the pattern matches empty strings, matching continues
	// in an attempt to match at least one character.
	int MatchAnywhere(const char* s)
		{ return re_anywhere->Match(s); }
	int MatchAnywhere(const BroString* s)
		{ return re_anywhere->Match(s); }

	// Note: it matches the *longest* prefix.
	int MatchPrefix(const char* s)
		{ return re_exact->LongestMatch(s); }
	int MatchPrefix(const BroString* s)
		{ return re_exact->LongestMatch(s); }
	int MatchPrefix(byte_vec s, int n)
		{ return re_exact->LongestMatch(s, n); }

	const char* PatternText() const	{ return re_exact->PatternText(); }
	const char* AnywherePatternText() const	{ return re_anywhere->PatternText(); }

	bool RE_Matcher::Serialize(SerialInfo* info) const;
	static RE_Matcher* RE_Matcher::Unserialize(UnserialInfo* info);

	unsigned int MemoryAllocation() const
		{
		return padded_sizeof(*this)
			+ (re_anywhere ? re_anywhere->MemoryAllocation() : 0)
			+ (re_exact ? re_exact->MemoryAllocation() : 0);
		}

protected:
	DECLARE_SERIAL(RE_Matcher);

	Specific_RE_Matcher* re_anywhere;
	Specific_RE_Matcher* re_exact;
};

declare(PList, RE_Matcher);
typedef PList(RE_Matcher) re_matcher_list;

extern RE_Matcher* RE_Matcher_conjunction(const RE_Matcher* re1, const RE_Matcher* re2);
extern RE_Matcher* RE_Matcher_disjunction(const RE_Matcher* re1, const RE_Matcher* re2);

#endif
