#!/usr/bin/perl

##########################################################
#
#  v1.0, bgp.pl
#
#     Used for interpreting routeviews data, both ribs 
#       and updates
#
#     Usage: (assumes that using zebra_dump_parser.pl)
#       ...Maybe add the zebra_dump_parser directly into the code?
#       bunzip2 -c RIB/UPDATE FILE | zebra_dump_parser | bgp.pl -option
#
#     Command format: ./bgp.pl -OPTION -r BGP.PL_RIB_FILE
#
#     Options: 
#       -o|O: Produces a list of prefixes with originating ASNs
#       -t|T: Takes in an ASN and produces a list of all prefixes
#             that traverse that ASN.  Need RIB Table dumped in.
#	-c|C: Compare a RIB Table File (from -o) with an update file
#		FILENAME is for the RIB Table file with -c
#	-b|B FILENAME FILENAME: compare two RIB Table Files from -o
#		and display the diff only.  If an ASN is added, it will
#		only display the diffs with that ASN involved
#	-p|P PREFIX_FILE: This searches the RIB_TABLE (dumped in from zebra) for all
#		of the prefixes listed in the PREFIX_FILE.  It will 
#		list the prefix and every known AS_PATH associated with it.
#		*****The prefix must be in A.B.C.D/CIDR format
#	-d|D PREFIX_FILE: This searches an UPDATE_FILE (dumped in from
#		zebra)for any update associated with the prefixes in PREFIX_FILE
#       -a|A NEI_LIST PRE_LIST RIB_TABLE-p ASN_to_TEST.  Takes in a RIB_Table dumped from -p,
#       	and uses it to simulate updating BGP table to see if the NEI
#		was affected by the updates.  The NEI and PRE list are used to build
#		the initial hash table, then RIB_TABLE-p used to build initial baseline.
#               Zebra dumps in the update file.
#	-n|N PRE_LIST.  This takes in a prefix list and specifies the number of IPs it
#		contains as well as the percentage of the Internets total IPs.
#	-f|F PRE_LIST UPDATES_c.  Take in an update list from -c and a prefix list and
#               add them to the prefix list.
#       -r|R NEI_IP Takes in a rib table table and ouputs the routing table for the
#               neighbor IP address
#       -u|U NEI_IP Takes in a n update file and outputs all of the updates from the neigbor.
#
#
##########################################################


use warnings;
use strict;
use Switch;

##########################################################
#constants
use constant {
    time_const => "TIME",
    type_const => "TYPE",
    prefix_const => "PREFIX",
    originated_const => "ORIGINATED",
    from_const => "FROM",
    as_path_const => "AS_PATH",
    next_hop_const => "NEXT_HOP",
    med_const => "MULTI_EXIT_DISC",
    comm_const => "COMMUNITIES",
    seq_const => "SEQUENCE",
    to_const => "TO",
    bgp_pt_const => "BGP PACKET TYPE",
    origin_const => "ORIGIN",
    announced_const => "ANNOUNCED",
    withdrawn_const => "WITHDRAWN",
    index_table_const => "MSG_TABLE_DUMP_V2/INDEX_TABLE",
    atomic_agg_const => "ATOMIC_AGGREGATE",
    aggr_const => "AGGREGATOR",
};

##########################################################
#declarations for every option
my $argnum = 0;
my $userinput = "";
my $current_prefix = "";
my @split_userinput = "";
my $counter = -1;
my $eof_counter = -1;

#Declaration of subroutines
sub trim($);
sub ltrim($);
sub rtrim($);
sub initizlize;
sub uniq2;

#declarations for option o/O
my $current_seq_num = -1;
my $current_position = -1;
my $current_originator = -1;
#Originator list should only contain two entries per spot, 
#  but will contain more if a conflict arrises.
my @originator_list;
my @split_as_path;
my $temp_asn = -1;

#Generic variables common to both RIB and UPDATE files
my $time = "";
my $type = "";
my $from = "";
my $as_path = "";
my $next_hop = "";
my $multi_exit_disc = "";
my $communities = "";
my $origin = "";
my $atomic_agg = "";

#Variables used only by RIB files.  For options: o,p
my $sequence = "";
my $prefix = "";
my $originated = "";
my $aggr = "";

#Variables used only by UPDATE files
my $to = "";
my $bgp_type = "";
my @announced;
my @withdrawn;

#Variables for the -c with -r option, -b also
my @rib_data = ();
my @grep_results = ();
my %match;
my @split_rib_data = ();
my @split_rib_data2 = ();

#Variables for the -a options
my @nei_list = ();
my @prefixes = ();
my $test_asn = 0;
my %neighbor_stats;


#Variable for -r options
my $nei = "";

##########################################################
#begin code

#Check to see if number of argumens is >0.  If it is, then have a file.
switch($ARGV[0]){
    case(/c|C/){
	#print ("Going to open $ARGV[2]\n");
	open (RIB, $ARGV[1]) or die $!;
	#@rib_data = <RIB>;
	while (<RIB>){
	    chomp;
	    push(@rib_data,ltrim($_));
	    @split_rib_data = split(/ /,ltrim($_));
	    #print ("Trying to add $_ to hash with key: @split_rib_data[0]\n");
	    $match{ltrim($split_rib_data[0])} = $_;
	}
	close(RIB);
	print ("The number of lines in $ARGV[1] is $#rib_data\n");
    }
    case (/b|B/){
	open (RIB1,$ARGV[1]) or die$!;
	open (RIB2,$ARGV[2]) or die$!;
	while(<RIB2>){
	    chomp;
	    push(@rib_data,ltrim($_));
	}
	while (<RIB1>){
	    chomp;
	    @split_rib_data = split(/ /,ltrim($_));
	    $match{$split_rib_data[0]} = ltrim($_);
	}
	close(RIB1);
	close(RIB2);
    }
    case (/p|P|d|D/){
	open(PRE,$ARGV[1]) or die$!;
	while(<PRE>){
	    chomp;
	    $match{$_} = 1;
	    #print ("$_\n");
	}
	close(PRE);
    }
    case (/a|A/){
	open(NEI,$ARGV[1]) or die$!;
	open(PRE,$ARGV[2]) or die$!;
	open(RIB,$ARGV[3]) or die$!;
	$test_asn = $ARGV[4];
	while(<NEI>){
	    chomp;
	    push(@nei_list,$_);
	    $neighbor_stats{$_} = 0;
	}
	while(<PRE>){
	    chomp;
	    push(@prefixes,$_);
	}
	foreach (@nei_list){
	    $prefix = $_;
	    for $counter (@prefixes){
		$match{$counter}{$prefix} = 1;
		#print $match{$counter}{$prefix} . "\n";
	    }
	}
	while(<RIB>){

	    chomp;	    
	    @split_rib_data = ($_ =~ m/(\d+\.\d+\.\d+\.\d+\/\d+)|(\d+\.\d+\.\d+\.\d+)|([\d\s\{\}]+$)/g);
	    #print join(",",@split_rib_data) . "\n";
	    foreach my $element ( @split_rib_data ) {
		push(@split_rib_data2,$element)
		    if defined $element and $element ne '';
	    }
	    #print join(",",@split_rib_data2) . "\n";
	    $prefix = $split_rib_data2[0];
	    $next_hop = $split_rib_data2[1];
	    $as_path = ltrim($split_rib_data2[2]);
	    #print ("Prefix: $prefix, Next_hop: $next_hop, AS_path: $as_path\n");
	    if ($as_path =~ $ARGV[4]) {
	      $match{$next_hop}{$prefix} = $as_path;
	    }
	    else{
	      @split_rib_data = split(/ /,$as_path);
	      $match{$next_hop}{$prefix} = pop(@split_rib_data);
	    }
				       #print "NH: $next_hop, PRE: $prefix, PATH: " . $match{$next_hop}{$prefix} . "\n";}
	    @split_rib_data2 = ();
	}
	close(RIB);
	close(PRE);
	close(NEI);
	$prefix = -1;
    }
    case (/n|N/){
	open (PRE,$ARGV[1]) or die$!;
	while (<PRE>){
	    chomp;
	    push(@prefixes,$_);
	}
	close(PRE);
    }
    case (/f|F/){
	open (PRE,$ARGV[1]) or die$!;
	open (UPD,$ARGV[2]) or die$!;
	while (<PRE>){
	    chomp;
	    $match{$_} = $_;
	}
	close(PRE);
	while (<UPD>){
	    chomp;
	    if ($_ =~ /New originator: 23724/){
	        #print "The original line is: " . $_ . "\n";
		@split_rib_data = ($_ =~ m/\d+\.\d+\.\d+\.\d+\/\d+/g);
		#print join ",",@split_rib_data;
		#print "\n";
		$match{$split_rib_data[0]} = $split_rib_data[0];
	    }
	}
	#prints all hashes of a hash table
	#foreach $prefix (keys %match){
	#    print $prefix . "\n";
	#}
	foreach $prefix (keys %match){
	    push(@prefixes,$prefix);
	}
	#great for sorting and printing IP addresses after placed in array
	@prefixes = map {s/\s+//g; $_} sort map {s/(\d+)/sprintf "%3s", $1/eg; $_} @prefixes;
	print join "\n",@prefixes;
    }
    case (/r|R/){
	$nei = trim($ARGV[1]);
    }
    case (/t|T/){
	$temp_asn = $ARGV[1];
    }
}

switch ($ARGV[0]) {
    #o|O means find the originating ASN
    case (/o|O|p|P|r|R|t|T/) {
	$userinput = <STDIN>;
	while(defined($userinput)){
	    chomp($userinput);
	    @split_userinput = split(/:/,$userinput);
	    switch($split_userinput[0]){
		case (''){}
		else{
		    $split_userinput[0]=trim($split_userinput[0]);			
		}
	    }
	    #print ("$split_userinput[0]\n");
	    switch($split_userinput[0]){
		case (time_const) {
		    $time = $split_userinput[1] . ":" . $split_userinput[2] . ":" . $split_userinput[3];
		}
		case (type_const) {
		    $type = $split_userinput[1];
		}
		case (seq_const) {
		    $sequence = $split_userinput[1];
		}
		case (prefix_const) {
		    $prefix = ltrim($split_userinput[1]);
		}
		case (originated_const) {
		    $originated = $split_userinput[1];
		}
		case (from_const) {
		    $from = $split_userinput[1];
		}
		case (as_path_const) {
		    $as_path = $split_userinput[1];
		    @split_as_path = split(/ /,$split_userinput[1]);
		}
		case (next_hop_const) {
		    $next_hop = $split_userinput[1];
		}
		case (med_const) {
		    $multi_exit_disc = $split_userinput[1];
		}
		case (comm_const) {
		    $communities = $split_userinput[1];
		}
		case (origin_const) {
		    $origin = $split_userinput[1];
		}
		case (atomic_agg_const) {
		    $atomic_agg = $split_userinput[1];
		}
		case (aggr_const){
		    $aggr  =$split_userinput[1];
		}
		case ("-") {
		    #print ("SEQ: $sequence, PRE: $prefix, AS_PATH: $as_path\n");
		    switch($ARGV[0]){
			case(/o|O/){
			    switch ($current_seq_num){
				case($sequence){
				    push(@originator_list,pop(@split_as_path));
				}
				case (-1){
				    $current_seq_num = $sequence;
				    push(@originator_list, $prefix);
				    push(@originator_list,pop(@split_as_path));
				}
			    }
			}
			case(/p|P/){
			    if(exists $match{$prefix}){
				print ("Prefix: $prefix; Next Hop: $next_hop; AS Path: $as_path\n");
			    }
			}
			case (/r|R/){
			    if ($next_hop =~ $nei){
				print "Prefix: $prefix, Next Hop: $next_hop, AS Path: $as_path\n";
			    }
			}
			case(/t|T/){
			    if ($as_path =~ $temp_asn){
				print "Found $prefix traversed $temp_asn: $as_path\n";
				$match{$prefix} = $prefix;
			    }
			}
		    }
		    initialize($time,$type,$from,$as_path,$next_hop,$multi_exit_disc,$communities,$sequence,$prefix,$originated);
		}
		case (''){
		    #A blank line means that the current prefix or sequence
		    # number has finished and the next entry will contain the new
		    # seq number and prefix.  Using -1 as a check in the "-"
		    # option field to decide what to do with the originator ASN.
		    switch($ARGV[0]){
			case(/o|O/){
			    if ($current_seq_num != -1){
				#reduce the current row down to only unique elements
				@originator_list = uniq2(@originator_list);
				foreach (@originator_list){
				    print $_;
				    print " ";
				}
				print "\n";
			    }
			    $current_seq_num = -1;			    
			    @originator_list = ();
			}
			case(/p|P/){
			    if(exists $match{$prefix}){
				print ("Prefix: $prefix; Next Hop: $next_hop; AS Path: $as_path\n");
			    }
			}
			case (/r|R/){
			    if ($next_hop =~ $nei){print "Prefix: $prefix, Next Hop: $next_hop, AS Path: $as_path\n"};
			}
			case(/t|T/){
			    if ($as_path =~ $temp_asn){
				$match{$prefix} = $prefix;
			    }
			}
		    }
		
		    initialize($time,$type,$from,$as_path,$next_hop,$multi_exit_disc,$communities,$sequence,$prefix,$originated);
		}
		case (m/peer|id|view_name/i){
		    #Do nothing for now, this is just at the beginning of each RIB table
		    
		}
		else{
		    print ("Found an undefined line type:$split_userinput[0].\n");
		}
	    }
	    #get the next line for processing
	    $userinput = <STDIN>;
	}	    
    }
    case (/u|U|c|C|d|D|a|A/){
	#print "$ARGV[$argnum] was entered and u/U/c/C/d/D was selected.\n";
	$userinput = <STDIN>;
	while(defined($userinput)){
	    chomp($userinput);
	    @split_userinput = split(/:/,$userinput);
	    switch($split_userinput[0]){
		case (''){}
		else{
		    $split_userinput[0]=trim($split_userinput[0]);
		}
	    }
	    switch($split_userinput[0]){
		case (time_const) {		    
		    $time = $split_userinput[1] . ":" . $split_userinput[2] . ":" . $split_userinput[3];
		}
		case (type_const) {
		    $type = $split_userinput[1];
		}
		case (from_const) {
		    $from = ltrim($split_userinput[1]);
		}
		case (to_const) {
		    $to = $split_userinput[1];
		}
		case (bgp_pt_const) {
		    $bgp_type = $split_userinput[1];
		}
		case (as_path_const) {
		    $as_path = $split_userinput[1];
		    @split_as_path = split(/ /,$split_userinput[1]);
		}
		case (next_hop_const) {
		    $next_hop = ltrim($split_userinput[1]);
		}
		case (med_const) {
		    $multi_exit_disc = $split_userinput[1];
		}
		case (comm_const) {
		    $communities = $split_userinput[1];
		}
		case (origin_const) {
		    $origin = $split_userinput[1];
		}
		case (atomic_agg_const){
		    $atomic_agg = $split_userinput[1];
		}
		case (aggr_const){
		    $aggr = $split_userinput[1];
		}
		case (announced_const) {
		    push(@announced,ltrim($split_userinput[1]));
		}
		case (withdrawn_const) {
		    push(@withdrawn,ltrim($split_userinput[1]));
		}
		case (''){
		    #Do something, then initialize
		    switch($ARGV[0]){
			case(/u|U/){
			    switch($type){
				case (/UNKNOWN/){}
				case (''){}
				else{
				    print ("Type: $type");
				    if ($#announced >= 0){
					print (", Announced: ");
					foreach (@announced){
					    print $_;
					    print (" ");
					}
				    }
				    if ($#withdrawn >= 0){
					print(", Withdrawn: ");
					foreach (@withdrawn){
					    print $_;
					    print (" ");
					}
				    }
				    print (", Originator: ");
				    print pop(@split_as_path);
				    print("\n");
				    #Need to check BGP Message Type, because if unknown, skip it.
				    initialize($time,$type,$from,$to,$bgp_type,$as_path,$next_hop,$multi_exit_disc,$communities,$origin);
				    @announced = ();
				    @withdrawn = ();
				}
			    }
			}
			case(/c|C/){
			    switch($type){
				case (/UNKNOWN/){}
				case (''){}
				else{
				    if ($#split_as_path >= 0){
					$temp_asn = pop(@split_as_path);	    
					$temp_asn =~ s/(\{|\})//g;
					#print "temp asn: " . $temp_asn . "\n";
				    }
				    for ($counter=0;$counter<= $#announced; $counter++){
					if(exists $match{$announced[$counter]}){
					    #print $match{$announced[$counter]};;
					    #print "\n";
					    @split_rib_data = split(/ /,$match{$announced[$counter]});
					    if ($match{$announced[$counter]} !~ $temp_asn){print "New originator: $temp_asn, was $match{$announced[$counter]}, $time\n"};
					    if ($match{$announced[$counter]} =~ $temp_asn){print "Same originator: $temp_asn, was $match{$announced[$counter]}, $time\n"};
					}
					else{
					    print ("Found a new prefix: $announced[$counter] and owned by $temp_asn, $time\n");
					    
					}
				    }
				}
			    }
			    initialize($time,$type,$from,$to,$bgp_type,$as_path,$next_hop,$multi_exit_disc,$communities,$origin);
			    @announced = ();
			    @withdrawn = ();
			}
			case(/d|D/){
			    switch($type){
				case (/UNKNOWN/){}
				case (''){}
				else{
				    for ($counter=0;$counter<= $#announced; $counter++){
					if(exists $match{$announced[$counter]}){
					    print ("Time: $time, From: $from, Next Hop: $next_hop, AS Path: $as_path, Prefixes: @announced\n");
					    last;
					}
				    }
				}
			    }
			    initialize($time,$type,$from,$to,$bgp_type,$as_path,$next_hop,$multi_exit_disc,$communities,$origin);
			    @announced = ();
			    @withdrawn = ();				
			    
			}
			case(/a|A/){
			    switch($type){
				case (/UNKNOWN/){}
				case (''){}
				else{
				    for ($counter=0;$counter<= $#announced; $counter++){
					#print "About to test if $from and $announced[$counter] are in the hash table.  The results are: " . exists($match{$from}{$announced[$counter]}) . "\n";
					if(exists $match{$from}{$announced[$counter]}){
					    #print "Determined that $from and $announced[$counter] exists.\n";
					    switch($match{$from}{$announced[$counter]}){
						case ('1'){
						    #print "Selected case 1 for $from and $announced[$counter]\n";
						    $match{$from}{$announced[$counter]} = $time . " " . $as_path . "; ";
						}
						else{
						    #print "For $from and $announced[$counter], was previously modified\n";
						    $match{$from}{$announced[$counter]} = $match{$from}{$announced[$counter]} . $time . " " . $as_path . "; ";
						    #print "For $from and $announced[$counter], new result: " . $match{$from}{$prefix} . "\n";
						}
					    }
					}				    
				    }
				    for ($counter=0;$counter<= $#withdrawn; $counter++){
					if (exists $match{$from}{$withdrawn[$counter]}){
					    switch($match{$from}{$withdrawn[$counter]}){
						case ('1'){
						    $match{$from}{$withdrawn[$counter]} = "$time WITHDRAWN; ";
						}
						else{
						    $match{$from}{$withdrawn[$counter]} = $match{$from}{$withdrawn[$counter]} . "$time WITHDRAWN; ";
						}
					    }
					}
				    }
				}
			    }
			    initialize($time,$type,$from,$to,$bgp_type,$as_path,$next_hop,$multi_exit_disc,$communities,$origin);
			    @announced = ();
			    @withdrawn = ();				
			}
		    }
		}
		case (/[0-9A-F]/){
		    #print ("Found line from unknown bgp type\n");
		}
		else{
		    print ("Found an undefined line type:$split_userinput[0].\n");
		}
	    }
	    $userinput = <STDIN>;		
	}
	switch($ARGV[0]){
	    case(/a|A/){
		foreach (@nei_list){		    
		    $from = $_;
		    print ("$from\n");
		    for $counter (@prefixes){
			#print ("About to print $from and $counter\n");
			if (exists $match{$from}{$counter} and $match{$from}{$counter} =~ $ARGV[4]){
			    print "     $counter: " . $match{$from}{$counter} . "\n";
			    $neighbor_stats{$from}++;
			}
			else{
			    print "     $counter: \n";
			}
		    }
		}
		print"\n\n\n";
		foreach(@nei_list){
		    print "$_ had " . $neighbor_stats{$_} . " affected prefixes.  This is " . ($neighbor_stats{$_}/$#prefixes) . "%\n";
		}
	    }
	}
    }
    case (/b|B/){
	#The format is expected to be ./bgp.pl -b RIBOLD RIBNEW
	#The variables used are                   %match @rib_data
	#                               @split_rib_data2 @split_rib_data
	print("##########################################################\n");
	print("# $ARGV[1]                  # $ARGV[2]                   \n");
	foreach (@rib_data){
	    @split_rib_data = split(/ /,$_);
	    $prefix = $split_rib_data[0];
	    if (exists $match{$prefix}){
		@split_rib_data2 = split(/ /,$match{$split_rib_data[0]});
		#remove the prefix so only ASNs are left.  prefix is stored in 
		#   <br />
		#print ("Before shift: $#split_rib_data2, $#split_rib_data\n");
		shift(@split_rib_data2);
		shift(@split_rib_data);
		#print ("Aftere shift: $#split_rib_data2, $#split_rib_data\n");
		@split_rib_data = sort(@split_rib_data);
		@split_rib_data2 = sort(@split_rib_data2);
		#print ("$#split_rib_data2, $#split_rib_data\n");
		switch($#split_rib_data2){
		    case($#split_rib_data){
			#print ("Testing for $_\n");
			for ($counter = 0; $counter <= $#split_rib_data2;$counter++){
			    #print ("counter = $counter\n");
			    if ($split_rib_data2[$counter] ne $split_rib_data[$counter]){
				print ("$prefix @split_rib_data2    # $_\n");
				last;
			    }
			}
		    }
		    else{
			print ("$prefix @split_rib_data2    # $_\n");
		    }
		}
	    }
	    else{
		print("                          # $_\n");
	    }
	}
    }
    case (/n|N/){
	$counter = 0;
	foreach(@prefixes){
	    @split_rib_data = split(/\//,$_);
	    $counter = $counter + (2**(32 - $split_rib_data[1]));
	    #print ("$_, $split_rib_data[1], $counter\n");
	}
	print "The total number of blocked IPs was: $counter\nPercent of total Internet IP Space: " . $counter/2**32 . "\n";
	print "After removing non-routable IPs: " . $counter/((2**32) - 2**29 - 3*(2**24)) . ", " . ((2**32) - 2**29 - 3*(2**24)) , ", " . 2**32 . "\n";
    }
    case (/f|F/){
        #Don't need anything here, all taken care of earlier.  However, need to have a case otherwise matches the else.
    }
    else {
	print "Entered $ARGV[$argnum] as argument but that is not an option.\n";
    }
}
switch ($ARGV[0]){
    case (/t|T/){
	foreach $prefix (keys %match){
	    push(@prefixes,$prefix);
	}
	#great for sorting and printing IP addresses after placed in array
	@prefixes = map {s/\s+//g; $_} sort map {s/(\d+)/sprintf "%3s", $1/eg; $_} @prefixes;
	print join "\n",@prefixes;
    }
}


##########################################################
# Subroutines

sub initialize{
    my $count = "";

    for ($count=0; $count <= $#_; $count++){
	$_[$count] = "";
    }
}

#########################################################
#
#  Got the trim code from http://www.somacon.com/p114.php
#
##########################################################

# Perl trim function to remove whitespace from the start and end of the string
sub trim($)
{
    my $string = shift;
    $string =~ s/^\s+//;
    $string =~ s/\s+$//;
    return $string;
}
# Left trim function to remove leading whitespace
sub ltrim($)
{
    my $string = shift;
    $string =~ s/^\s+//;
    return $string;
}
# Right trim function to remove trailing whitespace
sub rtrim($)
{
    my $string = shift;
    $string =~ s/\s+$//;
    return $string;
}

sub uniq2 {
    my %seen = ();
    my @r = ();
    foreach my $a (@_) {
        unless ($seen{$a}) {
            push @r, $a;
            $seen{$a} = 1;
        }
    }
    return @r;
}
