#! /usr/bin/perl
#
# -f file   the text after which to model the statistics
# -i 		to init the file.stats table
# -e -k key -m message -s startword  to encode message in a
#									 text starting with startword
# -d -k key to decode a text given on stdin
#

# use strict;
use warnings;
use Storable qw(nstore retrieve);
use Getopt::Std;
use Digest::SHA1 qw(sha1);

# use MLDBM qw(DB_File);

my %stats;
my $file;

my ( $word, $i, $window, $wsize, $mess, $text, $key );

# We're using strict and must declare the $opts
our ( $opt_s, $opt_f, $opt_m, $opt_h, $opt_w, $opt_i, $opt_d, $opt_e, $opt_l, $opt_k, $opt_D );

$window = "";

# forward declarations
sub getweightrand;
sub mkcoin;
sub has_eq;
sub usage;

getopts('deif:k:m:s:w:l:Dh');

if ($opt_h) {
	usage;
	exit;
	# notreached
}

# windowsize
$wsize = $opt_w;
$wsize = 7 unless $wsize;

# output length for $opt_D
$outl = 1000;

$file = $opt_f;
usage unless $file;

# $tie (%stats, 'MLDBM', "$file.stats") or die "could not tie to $file.stats";

# Init the tables
if ($opt_i) {
    open I, "<$file" or die "Could not open $file: $@";
    while (<I>) {
        chomp;
        $_ .= " ";
        my @chars = split //, $_;

        # 	print "Chars: " . (join  "|", @chars) ."\n";
        while ( length($window) < $wsize ) {
            $window .= shift @chars;
        }
        if ( length($window) == $wsize ) {
            while ( scalar @chars ) {
                my $next = shift @chars;
                $stats{$window}{$next}++;
                $stats{$window}{'num'}++;
                $window .= $next;

                #			print "$window\n";
                $window = substr $window, -($wsize), $wsize;
            }
        }
        else {
            print "window wrong size?\n";
        }
    }
    close I;
	print STDERR "Finished parsing $file\n";
	print STDERR "Calling nstore\n";
    nstore( \%stats, "$file.stats" ) or die "Could not store statistics";
#     untie %stats;
    exit;

    # notreached
}

# encoding/decoding requires a key to seed the PRNG
if ( $opt_e or $opt_d ) {
    $key = $opt_k;
    usage unless $key;
}

if ($opt_D ) {
	$key = $opt_k;
    usage unless $key;
}

# Encoding
if ($opt_e or $opt_D) {
	my $count = 0;

    # The word to start with
    $word = $opt_s;
    usage unless $word;
	if (length $word  < $wlen) {
		die "Startword should be $wlen chars at least";
	}

	if ($opt_e) {
	    $mess = $opt_m;
  		usage unless $mess;
	}

    my $sref = retrieve("$file.stats")
      or die "Could not open statsfile $file.stats";

    %stats = %$sref;

    # $word should not be longer than $wsize
    if ( length($word) > $wsize ) {
        $word = substr $word, -$wsize, $wsize;
    }

    $text = $word;

    if ( not exists( $stats{$word} ) ) {
        die "$word is not in $file";
    }

    $window = $word;
    my $alt;
    my $coin = mkcoin($key);

	if ($opt_e){ 
   	 # convert the message to bits
   	 my @messbit = split //, ( unpack "b*", ( pack "a*", $mess ) );
	}

    while ( exists( $stats{$window} ) and ( scalar @messbit or ($opt_D and $count < $outl ) ) ) {
        my $rand = &$coin;
        my $next = getweightrand( $window, $rand );

		if ($opt_e) {
        # If there's another character equally likely
        # embed a bit here
        if ( defined( $alt = has_eq( $window, $next ) ) ) {
            print STDERR "Alternatives: $next and $alt\n";
            my $b = shift @messbit;
            if ( defined $b and $b == 1 ) {
                $next = $alt;
            }
        }
		}
        $window .= $next;

        # shift the window
        $window = substr $window, 1, $wsize;
		$count++;
        $text .= $next;
    }

    print $text;
    exit;

    #notreached
}

if ($opt_d) {
    $key = $opt_k;
    usage unless $key;

    my $sref = retrieve("$file.stats")
      or die "Could not open statsfile $file.stats";

    %stats = %$sref;

    local $/;
    undef $/;

    my $text = <STDIN>;
    $word = substr $text, 0, $wsize;
    $text = substr $text, $wsize;

    if ( not exists( $stats{$word} ) ) {
        die "$word is not in $file";
    }

    $window = $word;
    my $alt;
    my ( $n, $r, $check );
    my $coin = mkcoin($key);
    my @message;

    while ( $n = substr( $text, 0, 1 )
        and ( $text = substr $text, 1, length($text) ) )
    {
        $r = &$coin;
        my $next = getweightrand( $window, $r );

        # possible encoded bit?
        if ( defined( $check = has_eq( $window, $next ) ) ) {

            # 0bit ?
            if ( $next eq $n ) {
                push @message, 0;
            }
            elsif ( $check eq $n ) {
                push @message, 1;
            }
            else {
                print "Unclear ecoding\n";
            }
        }

        # shift window
        $window .= $n;
        $window = substr $window, 1, $wsize;
    }

    my $clearbits = join "", @message;
    my $cleartext = unpack "a*", ( pack "b*", $clearbits );
    print "$cleartext\n";
    exit;

    # notreached
}

# notreached

# returns a CODE ref to a seeded PRNG with internal counter
sub mkcoin {
    my $ks = shift;
    my $c  = 0;
    return sub {
        my $r = sha1( $ks . $c );
        my $z = unpack "L", $r;
        $z = $z / 4294967295;
        $c++;
        return $z;
    };
}

# looks in global %stats for the possible characters
# following a given window. Chooses a character true
# to the distribution and a random value \in {0,1}.
sub getweightrand {
    my ( $w, $r ) = @_;

    # time/memory tradeoff. May not be usefull for
    # bigger $wsize or certain texts.
    if ( not exists( $stats{$w}{table} ) ) {
        my %t;
        my $sum;
        foreach my $i ( sort keys %{ $stats{$w} } ) {
            next if $i =~ m/^num$/;
            $sum += ( $stats{$w}{$i} / $stats{$w}{num} );
            $t{$sum} = $i;
        }
        $stats{$w}{table} = \%t;
    }
    my $min = 1;
    foreach my $j ( keys %{ $stats{$w}{table} } ) {
        next if ( $j < $r );
        $min = $j if $j < $min;
    }
    return $stats{$w}{table}->{$min};
}

# Checks if there is another character with equal
# likelihood in $stats{$w} for a given character $n
# and window $w. Returns that character or undef
# if no such exists
sub has_eq {
    my ( $w, $n ) = @_;
    foreach my $k ( sort keys %{ $stats{$w} } ) {
        next if $k =~ m/^(num|table|\Q$n\E)$/;
        if ( $stats{$w}{$k} == $stats{$w}{$n} ) {
            return $k;
        }
    }
    return undef;
}

sub usage {
print <<OFF;
Usage: $0
    -i -f <textfile>  : init the statistics for <textfile>
    -e -m <message> -f <textfile> -k <key> -s <start>  :
                        create output after <textfile> with <message>
                        embedded using <key> starting with <start>.
    -d -f <textfile> -k <key>   :
                        read from stdin and extract embedded message
                        using <key> after statistics modelled after
                        <textfile>.
    -D -l <len> -k <seed> -f <textfile> -s <start> :
                        create output after <textfile> of <len> bytes
                        starting with <start> with randomness generated
                        from <seed>.
    [-w <wlen>]     :   set window size to <wlen>, default is 7.
OFF
}
                        

