#!/usr/bin/perl -w

# Check namespace cleanness of a library.
# Allowed symbols are passed as arguments.
# They may have trailing * = wildcard.
# Symbols are listed as full function unmangled names without arguments,
# e.g. 'foo bar* nspace::*' allows foo(), foo(int), bar(), barbar()
# and all symbols in namespace/class nspace.
# If an argument has comma in it, it's a filename of a file containing
# allowed symbols, one per line.


$thisProg   = "$0";     # This programs name

$la_file = "";
$allowed_symbols = "";
$debug = 0;
$allowed_weak = "";
$weak_specified = 0;

while( defined( $ARGV[ 0 ] ))
{
    $_ = shift;
    if( /^--verbose$/ )
    {
	$debug = 1;
    }
    elsif( /^--help$|^-h$/ )
    {
        print STDOUT "Usage $thisProg [OPTION] ... [.la file] [allowed symbols] ...\n",
                "\n",
		"Check if the given library has only allowed public symbols.\n",
                "\n",
		"  --allowweak=[symbol] allow only these weak symbols\n",
                "  -v, --verbose        verbosely list files processed\n",
                "  -h, --help           print this help, then exit\n";
        exit 0;
    }
    elsif( /^--allowweak=(.*)$/ )
    {
	$allowed_weak .= " " . $1;
	$weak_specified = 1;
    }
    elsif( /^--*/ )
    {
	die "Invalid argument!\n";
    }
    else
    {
	if( ! $la_file )
	{
	    $la_file = $_;
	}
	else
	{
	    $allowed_symbols .= " " . $_;
	}
    }
}

if( ! $weak_specified )
{
    $allowed_weak = "*"; 
    # allow all weak symbols by default
    # instances of templates and similar stuff - unfortunately includes also things from other libraries,
    # so it cannot be on by default
}

print STDERR ".la file:" . $la_file . "\n" if $debug;
print STDERR "allowed_symbols:" . $allowed_symbols . "\n" if $debug;
print STDERR "allowed_weak:" . $allowed_weak . "\n" if $debug;

$default_symbols = "_fini _init";  # system symbols
# on my system, every .so has :
# A _DYNAMIC
# A _GLOBAL_OFFSET_TABLE_
# A __bss_start
# A _edata
# A _end
# T _fini
# T _init
# no need to list A symbols in $default_symbols

print STDERR "default_symbols: " . $default_symbols . "\n" if $debug;

open( FILEIN, $la_file ) || die "Couldn't open $! !\n";

# get the real library file from .la
$lib_file = "";
while( $line = <FILEIN> )
{
    if( $line =~ /library_names=\'([^ ]*).*/o )
    {
	$lib_file = $1;
    }
}
close( FILEIN );

print STDERR "lib file:" . $lib_file . "\n" if $debug;

if( ! $lib_file )
{
    print STDERR "Library file not found in .la file!\n";
    exit 1;
}

$libpath = $la_file;
$libpath =~ s%[^/]*$%%;
$lib_file = $libpath . ".libs/" . $lib_file;

print STDERR "libfile: ". $lib_file . "\n" if $debug;

$allowed_symbols .= " " . $default_symbols;

sub process_symbols($\@\%);

@wildcards = ();
%exacts = ();
process_symbols( $allowed_symbols, @wildcards, %exacts );
@weak_wildcards = ();
%weak_exacts = ();
process_symbols( $allowed_weak, @weak_wildcards, %weak_exacts );

# grep is for stripping not exported symbols, which don't have address (=first column)
$nm_command = "nm -BDCg " . $lib_file . " | grep -v '^ ' |";

# TODO how portable is this nmcheck stuff?

print STDERR "nm command:" . $nm_command . "\n" if $debug;

open( FILEIN, $nm_command ) || die "nm command failed\n";

my $exit_code = 0;

while( $line = <FILEIN> )
{
    my $type;
    my $symbol;
    if( $line =~ /^[^ ]* (.) (.*)$/o )
    {
	$type = $1;
	$symbol = $2;
    }
    else
    {
	die "Invalid line: " . $line . "\n";
    }
    
    print STDERR "Type: " . $type . " , symbol: " . $symbol . "\n" if $debug;
    if( $type eq "A" )
    { # these should be system symbols, so ignore them
	next;
    }

    my $orig_symbol = $symbol;

    if( $symbol =~ /\(anonymous namespace\)/ )
    { # TODO tell to prefer named namespaces? (shorter symbols)
	next;
    }

    # strip prefixes
    # the :: appending is to make "CLASS::*" work also for "vtable for CLASS"
    $symbol =~ s/^typeinfo for ([^ ]*)/$1::/o;
    $symbol =~ s/^typeinfo fn for ([^ ]*)/$1::/o;
    $symbol =~ s/^typeinfo name for ([^ ]*)/$1::/o;
    $symbol =~ s/^vtable for ([^ ]*)/$1::/o;
    $symbol =~ s/^guard variable for ([^ ]*)/$1::/o;
    $symbol =~ s/^reference temporary for ([^ ]*)/$1::/o;
    $symbol =~ s/^VTT for ([^ ]*)/$1::/o;
    $symbol =~ s/^virtual thunk \[[^\]]*\] to ([^ ]*)/$1::/o;
    $symbol =~ s/^non-virtual thunk \[[^\]]*\] to ([^ ]*)/$1::/o;
    $symbol =~ s/^covariant return thunk \[[^\]]*\] to ([^ ]*)/$1::/o;
    $symbol =~ s/^construction vtable thunk for ([^ ]*)/$1::/o;

    $symbol =~ s/\(.*//o;  # strip ()

    # print STDERR "Processed symbol: " . $symbol . "\n" if $debug;
    
    my $found = 0;
    if( $exacts{ $symbol } )
    {
	$found = 1;
    }
    if( ! $found )
    {
	for my $wild ( @wildcards )
	{
	    if( index( $symbol, $wild ) == 0 )
	    {
		$found = 1;
		last;
	    }
	}
    }
    if( ( ! $found ) && ( $type eq "W" || $type eq "V" ))
    {
	if( $weak_exacts{ $symbol } )
	{
	    $found = 1;
	}
	if( ! $found )
	{
	    for my $wild ( @weak_wildcards )
	    {
	        if( index( $symbol, $wild ) == 0 )
	        {
	    	    $found = 1;
		    last;
		}
	    }
	}
    }

    if( ! $found )
    {
	print STDERR "Public symbol " . $orig_symbol . " is not allowed!\n";
	$exit_code = 1;
    }
}

close( FILEIN );

exit $exit_code;

sub process_symbols($\@\%)
{
    my $allowed_symbols = $_[ 0 ];
    my $wildcards_ref = $_[ 1 ];
    my $exacts_ref = $_[ 2 ];
    
    $allowed_symbols =~ s/^ *//;  # strip whitespace
    $allowed_symbols =~ s/ *$//;

    if( $allowed_symbols eq "NONE" )
    {
	$allowed_symbols = "";
    }

    my @symbols1 = split( ' ', $allowed_symbols );
    my $i = 0;
    my @symbols2 = ();
    while( defined( $symbols1[ $i ] ))
    {
	my $symbol = $symbols1[ $i ];
	if( $symbol =~ /\./ )  # dot in name -> file
	{
	    open( SYMIN, $symbol ) || die ( "Cannot open file " . $symbol . "!" );
	    while( $line = <SYMIN> )
	    {
		$symbols2[ $#symbols2 + 1 ] = $line;
	    }
	    close( SYMIN );
	}
	else
	{
	    $symbols2[ $#symbols2 + 1 ] = $symbol;
	}
	$i++;
    }
    $i = 0;
    while( defined( $symbols2[ $i ] ))
    {
	my $symbol = $symbols2[ $i ];
    # TODO check whether _NMCHECK doesn't list some weird symbols, like main ?
	if( $symbol =~ /^(.*)\*$/ )  # trailing *
        {
    	    $wildcards_ref->[ $#{$wildcards_ref} + 1 ] = $1;
	}
	else
	{
	    $exacts_ref->{ $symbol } = 1;
	}
	$i++;
    }
}
