# $Id: scan.bro,v 1.10 2005/06/19 06:35:41 vern Exp $

@load notice
@load port-name
@load hot

redef enum Notice += {
	AddressDropped,		# connectivity w/ given address has been dropped
	AddressDropIgnored,	# a request to drop connectivity has been ignored
	PortScan,		# the source has scanned a number of ports
	AddressScan,		# the source has scanned a number of addrs
	BackscatterSeen,	# apparent flooding backscatter seen from source
	ScanSummary,		# a summary of scanning activity
	PasswordGuessing,	# source tried many user/password combinations
};

# Functions to be used with automatic expiration for some of the tables below.
# To activate them, you must add &expire_* timeouts.
global decr_num_distinct_peers:
	function(t: table[addr] of count, idx: any): interval;
global decr_num_distinct_ports:
	function(t: table[addr] of count, idx: any): interval;
global decr_num_distinct_low_ports:
	function(t: table[addr] of count, idx: any): interval;
global remove_possible_source: function(s: set[addr], idx: addr): interval;

# If true, we suppress scan checking (we still do account-tried accounting).
# This is provided because scan checking can consume a lot of memory.
global suppress_scan_checks = F &redef;
global activate_landmine_check = F &redef;
global activate_priv_port_check = T &redef;

# If true, in addition to using the regular algorithm, also use the
# Threshold Random Walk algorithm developed by Jung et al.
global use_TRW_algorithm = F &redef;

# Function that is called to perform TRW analysis, if above is T.  You
# need to "@load trw" to pull in the definition for it.
global check_TRW_scan:
	function(c: connection, state: string, reverse: bool): bool;


global num_distinct_peers: table[addr] of count &default=0;
global distinct_peers: set[addr, addr] &expire_func=decr_num_distinct_peers;
global landmine_address: set[addr] &redef;
global troll_skip_service = { smtp, ftp, ssh, 20/tcp, http, } &redef;

# If expiration is activated for the tables above, we can get notices
# for the same threshold more than once. This table is used to suppress
# these duplicates. You should also define an &read_expire on it, but
# set it to a considerably larger value.
global reported_address_scans: set[addr, count];

# Report a scan of peers at each of these points.

const report_peer_scan = {
	20, 100, 1000, 10000, 50000, 100000, 250000, 500000, 1000000,
} &redef;
const report_outbound_peer_scan = { 100, 1000, 10000, } &redef;

global num_distinct_ports: table[addr] of count &default=0;
global num_distinct_low_ports: table[addr] of count &default=0;
global distinct_ports: set[addr, port] &expire_func=decr_num_distinct_ports;
global distinct_low_ports: set[addr, port]
	&expire_func=decr_num_distinct_low_ports;

# Same as reported_address_scans but for port scans.
global reported_port_scans: set[addr, addr, count];

global possible_scan_sources: set[addr] &expire_func=remove_possible_source;
global num_scan_triples: table[addr] of table[addr] of count;
global scan_triples: table[addr] of set[addr, port];

# Report a scan of ports at each of these points.
const report_port_scan = { 50, 250, 1000, 5000, 10000, 25000, 65000, } &redef;

# Once a source has scanned this many different ports (to however many
# different remote hosts), start tracking its per-destination access.
const possible_port_scan_thresh = 20 &redef;

# Indexed by source address, user name tried, password tried.
global accounts_tried: set[addr, string, string];
global num_accounts_tried: table[addr] of count &default=0;

const report_accounts_tried = { 20, 100, 1000, 10000, 100000, 1000000, } &redef;
const report_remote_accounts_tried = { 100, 500, } &redef;
const skip_accounts_tried: set[addr] &redef;

const addl_web = { 81/tcp, 443/tcp, 8000/tcp, 8001/tcp, 8080/tcp, } &redef;
const skip_services = { ident, } &redef;
const skip_outbound_services = { allow_services, ftp, addl_web, } &redef;

const skip_scan_sources = {
	255.255.255.255,	# who knows why we see these, but we do

	j5004.inktomisearch.com, j5005.inktomisearch.com,
	j5006.inktomisearch.com, j100.inktomi.com, j101.inktomi.com,
	j3002.inktomi.com, si3000.inktomi.com, si3001.inktomi.com,
	si3002.inktomi.com, si3003.inktomi.com, si4000.inktomi.com,
	si4001.inktomi.com, si4002.inktomi.com, wm3018.inktomi.com,

	# AltaVista:
	test-scooter.av.pa-x.dec.com,
	scooter2.sv.av.com,
} &redef;

const skip_scan_nets: set[subnet] = {} &redef;

# Reverse (SYN-ack) scans seen from these ports are considered to reflect
# possible SYN flooding backscatter and not true (stealth) scans.
const backscatter_ports = {
	http, 53/tcp, 53/udp, bgp, 6666/tcp, 6667/tcp,
} &redef;

# How many different hosts connected to with a possible backscatter
# signature.
global num_backscatter_peers: table[addr] of count &default=0;
global distinct_backscatter_peers: table[addr, addr] of count &default=0;

const report_backscatter = { 20, } &redef;

### move connectivity-dropping into conn.bro say, or drop.bro; not here
global can_drop_connectivity = F &redef;
global drop_connectivity_script = "drop-connectivity" &redef;
global connectivity_dropped: set[addr];
const shut_down_scans: set[port] &redef;
const shut_down_all_scans = F &redef;
const shut_down_thresh = 100 &redef;
const landmine_thresh_trigger = 5 &redef;
const priv_scan_trigger = 5 &redef;

const root_servers = {
	a.root-servers.net, b.root-servers.net, c.root-servers.net,
	d.root-servers.net, e.root-servers.net, f.root-servers.net,
	g.root-servers.net, h.root-servers.net, i.root-servers.net,
	j.root-servers.net, k.root-servers.net, l.root-servers.net,
	m.root-servers.net,
} &redef;

const gtld_servers = {
	a.gtld-servers.net, b.gtld-servers.net, c.gtld-servers.net,
	d.gtld-servers.net, e.gtld-servers.net, f.gtld-servers.net,
	g.gtld-servers.net, h.gtld-servers.net, i.gtld-servers.net,
	j.gtld-servers.net, k.gtld-servers.net, l.gtld-servers.net,
	m.gtld-servers.net,
} &redef;

global never_shut_down = {
	root_servers, gtld_servers,
} &redef;

global never_drop_nets: set[subnet] &redef;
global did_drop_address: table[addr] of count &default=0;

function drop_address(a: addr, msg: string)
	{
	if ( msg != "" )
		msg = fmt(" (%s)", msg);

	if ( ! can_drop_connectivity || a in never_shut_down ||
	     a in never_drop_nets )
		NOTICE([$note=AddressDropIgnored, $src=a,
			$msg=fmt("ignoring request to drop %s%s", a, msg)]);

	else if ( ++did_drop_address[a] == 1 )
		{
		system(fmt("%s %s", drop_connectivity_script, a));
		NOTICE([$note=AddressDropped, $src=a,
			$msg=fmt("dropping address %s%s", a, msg)]);
		}

	else
		NOTICE([$note=AddressDropIgnored, $src=a,
			$msg=fmt("ignoring request to drop address %s again%s", a, msg)]);
	}

event scan_summary(orig: addr)
	{
	NOTICE([$note=ScanSummary, $src=orig, $n=num_distinct_peers[orig],
		$msg=fmt("%s scanned a total of %d hosts", orig, num_distinct_peers[orig])]);
	}

function check_scan(c: connection, established: bool, reverse: bool): bool
	{
	if ( suppress_scan_checks )
		return F;

	local id = c$id;
	local service = (c$service == port_names[20/tcp]) ? 20/tcp : id$resp_p;
	local orig = reverse ? id$resp_h : id$orig_h;
	local resp = reverse ? id$orig_h : id$resp_h;
	local outbound = is_local_addr(orig);

	# if ( orig in connectivity_dropped )
		# return F;

	if ( service in skip_services && ! outbound )
		return F;

	if ( outbound && service in skip_outbound_services )
		return F;

	if ( orig in skip_scan_sources )
		return F;

	if ( orig in skip_scan_nets )
		return F;

	if ( (! established || service !in allow_services) &&
			# not established, service not expressly allowed

	     [orig, resp] !in distinct_peers &&
			# not known peer set, can shut down scan/service

	     (shut_down_all_scans || service in shut_down_scans) )
		{
		local svc = service in port_names ?
				port_names[service] : fmt("%s", service);

		if ( reverse && service in backscatter_ports &&
		     id$orig_p >= 1024/tcp && # reverse, non-priv backscatter port

		     # The test is <= 2 because we get two check_scan()
		     # calls, once on connection attempt and once on
		     # tear-down.
		     ++distinct_backscatter_peers[orig, resp] <= 2 )
			{
			# Looks like backscatter, and it's not scanning
			# a privileged port.
			if ( distinct_backscatter_peers[orig, resp] == 1 &&
			     ++num_backscatter_peers[orig] in report_backscatter )
				{
				NOTICE([$note=BackscatterSeen, $src=orig,
					$conn=c,
					$msg=fmt("backscatter seen from %s (%d hosts; %s)",
						orig, num_backscatter_peers[orig], svc)]);
				}
			}

		else
			{ # done with backscatter check
			add distinct_peers[orig, resp];

			local n = ++num_distinct_peers[orig];

			if ( activate_landmine_check &&
			     n == landmine_thresh_trigger &&
			     mask_addr(resp, 24) in landmine_address )
				{
				drop_address(orig, svc);
				add connectivity_dropped[orig];
				schedule 1 day { scan_summary(orig) };

				NOTICE([$note=AddressDropped, $src=orig,
					$msg=fmt("landmine address trigger %s%s", orig, svc)]);
				}

			# Check for threshold if not outbound.
			if ( n == shut_down_thresh &&
			     ! outbound && orig !in neighbor_nets )
				{
				drop_address(orig, svc);
				add connectivity_dropped[orig];
				schedule 1 day { scan_summary(orig) };
				}

			else
				{ # inside host scanning out?
				if ( (outbound &&
				      n in report_outbound_peer_scan) ||
				     (! outbound && n in report_peer_scan) &&
				     orig !in connectivity_dropped &&
				     [orig, n] !in reported_address_scans )
					{
					NOTICE([$note=AddressScan, $src=orig,
						$conn=c,
						$msg=fmt("%s has scanned %d hosts (%s)",
							orig, num_distinct_peers[orig], svc)]);
					add reported_address_scans[orig, n];
					}
				}
			}
		}

	if ( established )
		# Don't consider established connections for port scanning,
		# it's too easy to be mislead by FTP-like applications that
		# legitimately gobble their way through the port space.
		return F;

	# Coarse search for port-scanning candidates: those that have made
	# connections (attempts) to possible_port_scan_thresh or more
	# distinct ports.
	if ( [orig, service] !in distinct_ports )
		{
		add distinct_ports[orig, service];

		if ( ++num_distinct_ports[orig] == possible_port_scan_thresh )
			{
			local empty_table: table[addr] of count;
			local empty_set: set[addr, port];

			num_scan_triples[orig] = empty_table;
			scan_triples[orig] = empty_set;

			add possible_scan_sources[orig];
			}
		}

	# Check for low ports.
	if ( activate_priv_port_check && ! outbound && service < 1024/tcp &&
	     service !in troll_skip_service )
		{
		if ( [orig, service] !in distinct_low_ports )
			{
			add distinct_low_ports[orig, service];

			if ( ++num_distinct_low_ports[orig] == priv_scan_trigger &&
			     orig !in connectivity_dropped && # dropped already
			     orig !in neighbor_nets )
				{
				local svrc = service in port_names ?
					port_names[service] :
					fmt("%s", service);

				drop_address(orig, svrc);
				add connectivity_dropped[orig];
				schedule 1 day { scan_summary(orig) };

				NOTICE([$note=AddressDropped, $src=orig,
					$conn=c,
					$msg=fmt("low port trolling %s %s",
						orig, svrc)]);
				}
			}
		}

	# For sources that have been identified as possible scan sources,
	# keep track of per-host scanning.
	if ( orig in possible_scan_sources )
		{
		if ( resp in num_scan_triples[orig] )
			{
			if ( [resp, service] !in scan_triples[orig] )
				{
				add scan_triples[orig][resp, service];
				if ( ++num_scan_triples[orig][resp] in
				     report_port_scan &&
				     [orig, resp, num_scan_triples[orig][resp]] !in reported_port_scans )
					{
					NOTICE([$note=PortScan, $src=orig,
						$conn=c,
						$msg=fmt("%s has scanned %d ports of %s",
							orig, num_scan_triples[orig][resp], resp)]);
					add reported_port_scans[orig, resp, num_scan_triples[orig][resp]];
					}
				}
			}

		else
			{
			num_scan_triples[orig][resp] = 1;
			add scan_triples[orig][resp, service];
			}
		}

	return T;
	}


event account_tried(c: connection, user: string, passwd: string)
	{
	local src = c$id$orig_h;

	if ( [src, user, passwd] in accounts_tried )
		return;

	local is_loc = is_local_addr(src);

	if ( ++num_accounts_tried[src] in
		(is_loc ? report_remote_accounts_tried :
			  report_accounts_tried) &&
	     src !in skip_accounts_tried )
		{
		NOTICE([$note=PasswordGuessing, $src=src,
			$user=user, $sub=passwd, $conn=c,
			$msg=fmt("%s has tried %d username/password combinations (latest: %s@%s)",
				src, num_accounts_tried[src],
				user, c$id$resp_h)]);
		}

	add accounts_tried[src, user, passwd];
	}


# Note, these three functions decrement a *different* table on expiration,
# not the table t that's passed in.
function decr_num_distinct_peers(t: table[addr] of count, idx: any): interval
	{
	local orig: addr;
	local resp: addr;

	[orig, resp] = idx;

	if ( --num_distinct_peers[orig] == 0 )
		delete num_distinct_peers[orig];

	return 0 secs;
	}

function decr_num_distinct_ports(t: table[addr] of count, idx: any): interval
	{
	local h: addr;
	local p: addr;

	[h, p] = idx;

	if ( h in num_distinct_ports )
		if ( num_distinct_ports[h] <= 1 )
			delete num_distinct_ports[h];
		else
			--num_distinct_ports[h];

	return 0 secs;
	}

function decr_num_distinct_low_ports(t: table[addr] of count, idx: any): interval
	{
	local h: addr;
	local p: addr;

	[h, p] = idx;

	if ( h in num_distinct_low_ports )
		if ( num_distinct_low_ports[h] <= 1)
			delete num_distinct_low_ports[h];
		else
			--num_distinct_low_ports[h];

	return 0 secs;
	}

function remove_possible_source(s: set[addr], idx: addr): interval
	{
	delete scan_triples[idx];
	delete num_scan_triples[idx];
	return 0 secs;
	}
