#! /usr/bin/env bash

function usage
{
cat <<EOF

`basename $0` [options] <columns>

Extracts the given columns from an ASCII Bro log on standard input. By
default, bro-cut does not include format header blocks into the output.

Example: cat conn.log | `basename $0` -d ts id.orig_h id.orig_p

    -c       Include the first format header block into the output.
    -C       Include all format header blocks into the output.
    -d       Convert time values into humand-readable format (needs gawk).
    -D <fmt> Like -d, but specify format for time (see strtime(3) for syntax).

For time conversion, the def

EOF
    exit 1
}

if [ "$BRO_CUT_TIMEFMT" != "" ]; then
    timefmt=$BRO_CUT_TIMEFMT
else
    timefmt="%Y-%m-%dT%H:%M:%S%z"
fi

headers=0
substtime=0
awk=awk

which gawk >/dev/null && awk=gawk

while getopts "cCdD:" opt; do
    case "$opt" in
        c) headers=1;;
        C) headers=2;;
        d) substtime=1;;
        D) substtime=1; timefmt=$OPTARG;;
        *) usage;;
    esac
done

if [ "$substtime" == 1 -a "$awk" != "gawk" ]; then
    echo "option -d only supported with gawk" >&2
    exit 1
fi

shift $(($OPTIND - 1))
fields=`echo $@ | sed 's/[ ,] */:/g'`

$awk -v fields=$fields -v headers=$headers -v substtime=$substtime -v "timefmt=$timefmt" '

function error(msg) {
    print "bro-cut error: " msg >"/dev/stderr";
    exit(1);
}

function asc(c) {
    return sprintf("%c", c);
}

function hexdigit(d) {
    digits="0123456789abcdef";
    return index(digits, tolower(d)) - 1;
}

function parseSep(s) {
    # s must be sequence of "\xXX"
    if ( s == "" )
        return "";

    a = substr(s, 3, 1);
    b = substr(s, 4, 1);
    tail = substr(s, s + 5);

    return asc(hexdigit(a) * 16 + hexdigit(b)) parseSep(tail);
}

function printHeader() {
    return headers == 2 || (headers == 1 && first_header);
}

BEGIN {
    first_header = 1;
    n = split(fields, f, /:/);
    for ( i = 1; i <= n; ++i )
        idx[f[i]] = i;
}

/^#separator/ {
    split($0, s, / /);
    FS = OFS = parseSep(s[2]);
    next;
}

/^#fields/ {
    for ( i = 2; i <= NF; ++i ) {
        if ( $i in idx )
            columns[idx[$i]] = i-1;
        }

    for ( i = 1; i <= length(f); ++i ) {
        if ( columns[idx[f[i]]] == "" )
            error("unknown field '" f[i] "'");
    }
}

/^#types/ {
    for ( i = 2; i <= NF; ++i )
        times[i-1] = ($i == "time");
}

/^#(fields|types)/ && printHeader() {
    printf("%s", $1);
    for ( i = 1; i <= length(columns); ++i ) {
        val = $(int(columns[i]) + 1);

        if ( $1 == "#types" && substtime && times[i] == "1" )
            val = "string";

        printf("\t%s", val);
        }

    print "";
    next;
}

/^#/ {
    if ( printHeader() )
        print;

    next;
}

{
    first_header = 0;

    for ( i = 1; i <= length(columns); ++i ) {
        j = int(columns[i])
        val = $j

        if ( substtime && times[j] == "1" )
            val = strftime(timefmt, val);

        if ( i > 1 )
            printf("\t%s", val);
        else
            printf("%s", val);
        }

    print "";
}
'
