// $Id: TCP_Contents.cc,v 1.8 2004/11/02 07:24:34 vern Exp $
//
// Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
//      The Regents of the University of California.  All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that: (1) source code distributions
// retain the above copyright notice and this paragraph in its entirety, (2)
// distributions including binary code include the above copyright notice and
// this paragraph in its entirety in the documentation or other materials
// provided with the distribution, and (3) all advertising materials mentioning
// features or use of this software display the following acknowledgement:
// ``This product includes software developed by the University of California,
// Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
// the University nor the names of its contributors may be used to endorse
// or promote products derived from this software without specific prior
// written permission.
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

#include "File.h"
#include "TCP.h"


// Maximum amount of data that might plausibly be sent in an initial
// flight (prior to receiving any acks).  Used to determine whether we
// must not be seeing our peer's acks.
const int max_initial_window = 4096;

// If we're not seeing our peer's acks, maximum volume of data above
// a sequence hole that we'll tolerate before assuming that there's
// been a packet drop and we should give up on tracking a connection.
const int max_above_hole_without_any_acks = 4096;


TCP_Reassembler::~TCP_Reassembler()
	{
	if ( record_contents_file )
		{ // Record any undelivered data.
		if ( blocks && last_reassem_seq < last_block->upper )
			RecordToSeq(last_reassem_seq, last_block->upper,
					record_contents_file);
		Unref(record_contents_file);
		}
	}

void TCP_Reassembler::SizeBufferedData(int& waiting_on_hole,
					int& waiting_on_ack) const
	{
	waiting_on_hole = waiting_on_ack = 0;
	for ( DataBlock* b = blocks; b; b = b->next )
		{
		if ( b->seq <= last_reassem_seq )
			// We must have delivered this block, but
			// haven't yet trimmed it.
			waiting_on_ack += b->Size();
		else
			waiting_on_hole += b->Size();
		}
	}

void TCP_Reassembler::SetContentsFile(BroFile* f)
	{
	if ( ! f->IsOpen() )
		{
		run_time("no such file \"%s\"", f->Name());
		return;
		}

	if ( record_contents_file )
		// We were already recording, no need to catch up.
		Unref(record_contents_file);
	else
		{
		if ( blocks )
			RecordToSeq(blocks->seq, last_reassem_seq, f);
		}

	record_contents_file = f;
	}

void TCP_Reassembler::Undelivered(int up_to_seq)
	{
	if ( up_to_seq <= 2 && contents_processor->Conn()->IsPartial() )
		// Since it was a partial connection, we faked up its
		// initial sequence numbers as though we'd seen a SYN.
		// We've now received the first ack and are getting a
		// complaint that either that data is missing (if
		// up_to_seq is 1), or one octet beyond it is missing
		// (if up_to_seq is 2).  The latter can occur when the
		// first packet we saw instantiating the partial connection
		// was a keep-alive.  So, in either case, just ignore it.
		return;

#if 0
	if ( contents_processor->Endpoint()->FIN_cnt > 0 )
		{
		// Make sure we're not worrying about undelivered
		// FIN control octets!
		int FIN_seq = contents_processor->Endpoint()->FIN_seq - contents_processor->Endpoint()->start_seq;
		if ( up_to_seq >= FIN_seq )
			up_to_seq = FIN_seq - 1;
		}
#endif
	if ( contents_processor->Endpoint()->FIN_cnt > 0 ||
	     contents_processor->Endpoint()->RST_cnt > 0 )
		{
		// We could be running on a SYN/FIN/RST-filtered trace - don't
		// complain about data missing at the end of the connection.
		//
		// Note, this check will confuse the EOF checker (and cause a
		// missing FIN in the rewritten trace) when the content gap
		// in the middle is discovered only after the FIN packet.
		return;
		}

	if ( up_to_seq <= last_reassem_seq )
		return;

	if ( record_contents_file )
		RecordToSeq(last_reassem_seq, up_to_seq, record_contents_file);

	// Inform the endpoint about the gap.
	contents_processor->Undelivered(last_reassem_seq,
					up_to_seq - last_reassem_seq);

	if ( tcp_match_undelivered )
		MatchUndelivered(up_to_seq);

	last_reassem_seq = up_to_seq;	// we've done our best ...
	}

void TCP_Reassembler::MatchUndelivered(int up_to_seq)
	{
	if ( ! blocks || ! rule_matcher )
		return;

	if ( up_to_seq == -1 )
		up_to_seq = last_block->upper;

	if ( last_reassem_seq < up_to_seq )
		contents_processor->Conn()->Weird("matching_undelivered_data");

	for ( DataBlock* b = blocks; b && b->upper <= up_to_seq; b = b->next )
		contents_processor->Conn()->Match(Rule::PAYLOAD, b->block,
			b->Size(), false, false,
			contents_processor->IsOrig());
	}

void TCP_Reassembler::RecordToSeq(int start_seq, int stop_seq, BroFile* f)
	{
	DataBlock* b = blocks;
	// Skip over blocks up to the start seq.
	while ( b && b->upper <= start_seq )
		b = b->next;

	if ( ! b )
		return;

	int last_seq = start_seq;
	while ( b && b->upper <= stop_seq )
		{
		if ( b->seq > last_seq )
			RecordGap(last_seq, b->seq, f);

		RecordBlock(b, f);
		last_seq = b->upper;
		b = b->next;
		}

	if ( b )
		// Check for final gap.
		if ( last_seq < stop_seq )
			RecordGap(last_seq, stop_seq, f);
	}

void TCP_Reassembler::RecordBlock(DataBlock* b, BroFile* f)
	{
	unsigned int len = b->Size();
	if ( ! f->Write((const char*)b->block, len) )
		// ### this should really generate an event
		internal_error("contents write failed");
	}

void TCP_Reassembler::RecordGap(int start_seq, int upper_seq, BroFile* f)
	{
	if ( ! f->Write(fmt("\n<<gap %d>>\n", upper_seq - start_seq)) )
		// ### this should really generate an event
		internal_error("contents gap write failed");
	}

void TCP_Reassembler::BlockInserted(DataBlock* start_block)
	{
	if ( start_block->seq > last_reassem_seq ||
	     start_block->upper <= last_reassem_seq )
		return;

	// We've filled a leading hole.  Deliver as much as possible.
	// Note that the new block may include both some old stuff
	// and some new stuff.  AddAndCheck() will have split the
	// new stuff off into its own block(s), but in the following
	// loop we have to take care not to deliver already-delivered
	// data.
	for ( DataBlock* b = start_block; b && b->seq <= last_reassem_seq;
	      b = b->next )
		{
		if ( b->seq == last_reassem_seq )
			{ // New stuff.
			int len = b->Size();

			if ( record_contents_file )
				RecordBlock(b, record_contents_file);

			contents_processor->DeliverBlock(last_reassem_seq,
							len, b->block);
			if ( rule_matcher )
				contents_processor->Conn()->Match(Rule::PAYLOAD,
					b->block, len, false, false,
					contents_processor->IsOrig());

			last_reassem_seq += len;
			}
		}

	TCP_Endpoint* e = contents_processor->Endpoint();

	if ( ! e->peer->HasContents() )
		// Our endpoint's peer doesn't do reassembly and so
		// (presumably) isn't processing acks.  So don't hold
		// the now-delivered data.
		TrimToSeq(last_reassem_seq);

	else if ( e->Size() > max_initial_window && e->NoDataAcked() )
		// We've sent quite a bit of data, yet none of it has
		// been acked.  Presume that we're not seeing the peer's
		// acks (perhaps due to filtering or split routing) and
		// don't hang onto the data further, as we may wind up
		// carrying it all the way until this connection ends.
		TrimToSeq(last_reassem_seq);

	// Note: don't make an EOF check here, because then we'd miss it
	// for FIN packets that don't carry any payload (and thus
	// endpoint->DataSent is not called).  Instead, do the check in
	// TCP_Connection::NextPacket.
	}

void TCP_Reassembler::Overlap(const u_char* b1, const u_char* b2, int n)
	{
	if ( rexmit_inconsistency &&
	     memcmp((const void*) b1, (const void*) b2, n) &&
	     // The following weeds out keep-alives for which that's all
	     // we've ever seen for the connection.
	     (n > 1 || contents_processor->Endpoint()->peer->HasDoneSomething()) )
		{
		BroString* b1_s = new BroString((const u_char*) b1, n, 0);
		BroString* b2_s = new BroString((const u_char*) b2, n, 0);
		contents_processor->Conn()->Event(rexmit_inconsistency,
					new StringVal(b1_s), new StringVal(b2_s));
		}
	}

IMPLEMENT_SERIAL(TCP_Reassembler, SER_TCP_REASSEMBLER);

bool TCP_Reassembler::DoSerialize(SerialInfo* info) const
	{
	DO_SERIALIZE(SER_TCP_REASSEMBLER, Reassembler);
	SERIALIZE_OPTIONAL(record_contents_file);
	return contents_processor->Serialize(info);
	}

bool TCP_Reassembler::DoUnserialize(UnserialInfo* info)
	{
	DO_UNSERIALIZE(Reassembler);
	UNSERIALIZE_OPTIONAL(record_contents_file, BroFile::Unserialize(info));

	contents_processor = TCP_Contents::Unserialize(info);
	return contents_processor != 0;
	}

TCP_Contents::TCP_Contents(TCP_Endpoint* arg_endp, int arg_stop_on_gap)
	{
	endp = arg_endp;
	endp->AddContentsProcessor(this);
	t_reassem = new TCP_Reassembler(this, endp->src_addr);
	skip_deliveries = 0;
	stop_on_gap = arg_stop_on_gap;
	did_EOF = 0;

	deliver_tcp_contents = 0;
	if ( tcp_contents )
		{
		Val dst_port_val(ntohs(Conn()->RespPort()), TYPE_PORT);
		TableVal* ports = IsOrig() ? 
			tcp_content_delivery_ports_orig : 
			tcp_content_delivery_ports_resp;
		Val* result = ports->Lookup(&dst_port_val);

		if ( result && result->AsBool() )
			deliver_tcp_contents = 1;
		}
	}

TCP_Contents::~TCP_Contents()
	{
	Unref(t_reassem);
	}

int TCP_Contents::DataSent(double t, int seq, int len,
				const u_char* data)
	{
	if ( skip_deliveries || Conn()->Skipping() )
		return 0;

	int ack = endp->AckSeq() - endp->StartSeq();
	int upper_seq = seq + len;

	if ( seq < ack )
		{
		if ( upper_seq <= ack )
			// We've already delivered this and it's been acked.
			return 0;

		// We've seen an ack for part of this packet, but not the
		// whole thing.  This can happen when, for example, a previous
		// packet held [a, a+b) and this packet holds [a, a+c) for c>b
		// (which some TCP's will do when retransmitting).  Trim the
		// packet to just the unacked data.
		int amount_acked = ack - seq;
		seq += amount_acked;
		data += amount_acked;
		len -= amount_acked;
		}

	t_reassem->NewBlock(t, seq, len, data);

	if ( t_reassem->NumUndeliveredBytes() > max_above_hole_without_any_acks &&
	     Endpoint()->NoDataAcked() )
		{
		Conn()->Weird("above_hole_data_without_any_acks");
		t_reassem->ClearBlocks();
		SetSkipDeliveries(1);
		}

	return 1;
	}

void TCP_Contents::AckReceived(int seq)
	{
	if ( endp->FIN_cnt > 0 && seq >= endp->FIN_seq )
		// TrimToSeq: FIN_seq - 1
		seq = endp->FIN_seq - 1;

	if ( ! t_reassem->TrimToSeq(seq) &&
	     ! SkippingDeliveries() && ! Conn()->Skipping() &&
	     reading_live && endp->state != TCP_PARTIAL && endp->peer->state != TCP_PARTIAL )
		// Only generate these events when reading live; from
		// a trace file, the hole might exist because the
		// file was trimmed while writing it.
		Conn()->Event(ack_above_hole);

	// Check EOF here because t_reassem->LastReassemSeq() may have
	// changed after calling TrimToSeq().
	CheckEOF();
	}

void TCP_Contents::CheckEOF()
	{
	if ( ! did_EOF &&
	     (endp->FIN_cnt > 0 || endp->state == TCP_CLOSED || endp->state == TCP_RESET) &&
	     (Conn()->Skipping() || skip_deliveries ||
	      endp->StartSeq() + t_reassem->LastReassemSeq() >= endp->LastSeq() - 1) )
		{
		// We've now delivered all of the data.
		did_EOF = 1;
		Conn()->EndpointEOF(this);
		}
	}

// DeliverBlock is basically a relay to function Deliver. But unlike
// Deliver, DeliverBlock is not virtual, and this allows us to insert
// operations that apply to all connections using TCP_Contents.

void TCP_Contents::DeliverBlock(int seq, int len, u_char* data)
	{
	if ( deliver_tcp_contents )
		{
		val_list* vl = new val_list();
		vl->append(Conn()->BuildConnVal());
		vl->append(new Val(IsOrig(), TYPE_BOOL));
		vl->append(new Val(seq, TYPE_COUNT));
		vl->append(new StringVal(len, (const char*) data));

		Conn()->ConnectionEvent(tcp_contents, vl);
		}

	Deliver(seq, len, data);
	}

void TCP_Contents::Deliver(int seq, int len, u_char* data)
	{
	Conn()->Deliver(endp, seq, len, data);
	}

void TCP_Contents::Undelivered(int seq, int len)
	{
	if ( stop_on_gap )
		// No more data will be sent to the reassembler via
		// DataSent, and thus no data will be delivered. Note
		// that it is safe to stop right here because all the data
		// before seq that exists in the trace must have already
		// appeared.
		SetSkipDeliveries(1);

	if ( ! Conn()->Skipping() && ! skip_deliveries )
		Conn()->Undelivered(endp, seq, len);
	}

void TCP_Contents::SizeBufferedData(int& waiting_on_hole, int& waiting_on_ack)
	{
	t_reassem->SizeBufferedData(waiting_on_hole, waiting_on_ack);
	}

int TCP_Contents::DataPending() const
	{
	uint32 delivered_seq = Endpoint()->StartSeq() + DataSeq();

	// We've delivered everything if we're up to the penultimate
	// sequence number (since a FIN consumes an octet in the
	// sequence space), or right at it (because a RST does not).
	if ( delivered_seq != Endpoint()->LastSeq() - 1 &&
	     delivered_seq != Endpoint()->LastSeq() )
		return 1;

	// If we've sent RST, then we can't send ACKs any more.
	if ( Endpoint()->state != TCP_RESET &&
	     Endpoint()->peer->HasUndeliveredData() )
		return 1;

	return 0;
	}

void TCP_Contents::SetContentsFile(BroFile* f)
	{
	t_reassem->SetContentsFile(f);
	}

IMPLEMENT_SERIAL(TCP_Contents, SER_TCP_CONTENTS);

bool TCP_Contents::Serialize(SerialInfo* info) const
	{
	return SerialObj::Serialize(info);
	}

TCP_Contents* TCP_Contents::Unserialize(UnserialInfo* info)
	{
	return (TCP_Contents*) SerialObj::Unserialize(info, SER_TCP_CONTENTS);
	}

bool TCP_Contents::DoSerialize(SerialInfo* info) const
	{
	DO_SERIALIZE(SER_TCP_CONTENTS, SerialObj);

	// FIXME: We don't serialize the reassembler. Would this make sense?
	return endp->Serialize(info) &&
		t_reassem->Serialize(info) &&
		SERIALIZE_BIT(skip_deliveries) &&
		SERIALIZE_BIT(stop_on_gap) &&
		SERIALIZE_BIT(did_EOF);
	}

bool TCP_Contents::DoUnserialize(UnserialInfo* info)
	{
	DO_UNSERIALIZE(SerialObj);
	endp = TCP_Endpoint::Unserialize(info);
	if ( ! endp )
		return false;

	t_reassem =(TCP_Reassembler*) Reassembler::Unserialize(info);
	if ( ! t_reassem )
		return false;

	UNSERIALIZE_BIT(skip_deliveries);
	UNSERIALIZE_BIT(stop_on_gap);
	UNSERIALIZE_BIT(did_EOF);

	return true;
	}

TCP_ContentLine::TCP_ContentLine(TCP_Endpoint* arg_endp,
			int is_NUL_sensitive, int skip_partial,
		        int arg_CRLF_as_EOL)
: TCP_Contents(arg_endp)
	{
	flag_NULs = is_NUL_sensitive;
	punt_on_partial = skip_partial;
	CR_LF_as_EOL = arg_CRLF_as_EOL;
	line_limit = 1024;
	buf = 0;
	Init(0);
	}

void TCP_ContentLine::Init(int size)
	{
	if ( size < 128 )
		size = 128;

	char* b = new char[size];

	if ( buf )
		{
		if ( offset > 0 )
			memcpy(b, buf, offset);
		delete [] buf;
		}
	else
		{
		offset = 0;
		last_char = 0;
		}

	buf = b;
	buf_len = size;
	}

TCP_ContentLine::~TCP_ContentLine()
	{
	delete [] buf;
	}

void TCP_ContentLine::Deliver(int seq, int len, u_char* data)
	{
	if ( len <= 0 || skip_deliveries )
		return;

	if ( punt_on_partial &&
	     (endp->state == TCP_PARTIAL || endp->peer->state == TCP_PARTIAL) )
		{
		Conn()->SetSkip(1);
		Conn()->SetRecordPackets(0);	// ###
		return;
		}

	if ( buf && len + offset >= buf_len )
		{ // Make sure we have enough room to accommodate the new stuff.
		int old_buf_len = buf_len;
		buf_len = ((offset + len) * 3) / 2 + 1;

		char* tmp = new char[buf_len];
		for ( int i = 0; i < old_buf_len; ++i )
			tmp[i] = buf[i];

		delete [] buf;
		buf = tmp; 

		if ( ! buf )
			internal_error("out of memory delivering endpoint line");
		}

	DoDeliver(seq, len, data);
	}

u_char* TCP_ContentLine::NextNewLine(int len, u_char* data, int& next_pos)
	{
	u_char* end_of_data = data + len;
	for ( u_char* p = data; p < end_of_data; ++p )
		if ( *p == '\n' || *p == '\r' )
			{
			next_pos = p + 1 - data;	// where next line begins
			last_char = *p;
			return p;
			}
	next_pos = 0;
	return 0;
	}


void TCP_ContentLine::DoDeliver(int /* seq */, int len, u_char* data)
	{
	while ( len > 0 && ! skip_deliveries )
		{
		if ( (CR_LF_as_EOL & CR_as_EOL) &&
		     last_char == '\r' && *data == '\n' )
			{
			// CR is already considered as EOL.
			// Compress CRLF to just one line termination.
			last_char = *data;
			--len; ++data;
			}

		int n = DoDeliverOnce(len, data);
		len -= n;
		data += n;
		}
	}

int TCP_ContentLine::DoDeliverOnce(int len, u_char* data)
	{
	const u_char* data_start = data;

	// ### Note, excessive_line checking no longer done.

	// ### This code should be correspondingly updated in TCP_NVT
	// ### and Rlogin.
	//        fprintf(stderr, "Data delivered (len): '%.*s' (%d)\n", len, data, len);

	if ( len <= 0 )
		return 0;

	for ( ; len > 0; --len, ++data )
		{
		if ( offset >= buf_len )
			Init(buf_len * 2);

		int c = data[0];

#define EMIT_LINE \
	{ \
	buf[offset] = '\0'; \
	Conn()->NewLine(this, offset, buf); \
	offset = 0; \
	last_char = c; \
	return data + 1 - data_start; \
	}

		switch ( c ) {
		case '\r':
			if ( CR_LF_as_EOL & CR_as_EOL )
				EMIT_LINE
			else
				buf[offset++] = c;
			break;

		case '\n':
			if ( last_char == '\r' )
				{
				--offset; // remove '\r'
				EMIT_LINE
				}
			else if ( CR_LF_as_EOL & LF_as_EOL )
				EMIT_LINE
			else
				{
				if ( Conn()->FlagEvent(SINGULAR_LF) )
					Conn()->Weird("line_terminated_with_single_LF");
				buf[offset++] = c;
				}
			break;

		case '\0':
			if ( flag_NULs )
				CheckNUL();
			else
				buf[offset++] = c;
			break;

		default:
			buf[offset++] = c;
			break;
		}

		if ( last_char == '\r' )
			if ( Conn()->FlagEvent(SINGULAR_CR) )
				Conn()->Weird("line_terminated_with_single_CR");

		last_char = c;
		}

	return data - data_start;
	}

void TCP_ContentLine::CheckNUL()
	{
	// If this is the first byte seen on this connection,
	// and if the connection's state is PARTIAL, then we've
	// intercepted a keep-alive, and shouldn't complain
	// about it.  Note that for PARTIAL connections, the
	// starting sequence number is adjusted as though there
	// had been an initial SYN, so we check for whether
	// the connection has at most two bytes so far.
	if ( endp->state == TCP_PARTIAL &&
	     endp->LastSeq() - endp->StartSeq() <= 2 )
		; // Ignore it.
	else
		{
		if ( Conn()->FlagEvent(NUL_IN_LINE) )
			Conn()->Weird("NUL_in_line");
		flag_NULs = 0;
		}
	}

void TCP_ContentLine::ExcessiveLine(const u_char* /* data */, int /* len */)
	{
	Conn()->Event(excessive_line);
	line_limit = 0;
	}

IMPLEMENT_SERIAL(TCP_ContentLine, SER_TCP_CONTENT_LINE);

bool TCP_ContentLine::DoSerialize(SerialInfo* info) const
	{
	DO_SERIALIZE(SER_TCP_CONTENT_LINE, TCP_Contents);

	return SERIALIZE_BIT(flag_NULs) &&
		SERIALIZE_BIT(punt_on_partial) &&
		SERIALIZE_BIT(line_limit) &&
		SERIALIZE((unsigned int)CR_LF_as_EOL) &&
	SERIALIZE_STR(buf, offset) &&
		SERIALIZE(offset) &&
		SERIALIZE(buf_len) &&
		SERIALIZE(last_char);
	}

bool TCP_ContentLine::DoUnserialize(UnserialInfo* info)
	{
	DO_UNSERIALIZE(TCP_Contents);

	UNSERIALIZE_BIT(flag_NULs);
	UNSERIALIZE_BIT(punt_on_partial);
	UNSERIALIZE_BIT(line_limit);

	unsigned int tmp;
	if ( ! UNSERIALIZE(&tmp) )
		return false;

	CR_LF_as_EOL = tmp;

	const char* tmpbuf;
	int tmplen;

	if ( ! (UNSERIALIZE_STR(&tmpbuf, &tmplen) &&
		UNSERIALIZE(&offset) &&
		UNSERIALIZE(&buf_len) &&
		UNSERIALIZE(&last_char)) )
		return false;

	// If our input is correct, tmplen is <= buf_len. Check this.
	if ( tmplen > buf_len )
		{
		info->s->Error("incorrect buffer size in TCP_ContentLine");
		return false;
		}

	buf = new char[buf_len];
	memcpy(buf, tmpbuf, tmplen);

	return true;
	}
