=head1 NAME

iPE::Smoother::Kernel - A kernel-Gaussian smoother

=head1 DESCRIPTION

This is a kernel smoother for the using a normal (Gaussian) density function.  Two parameters are optionally passed into the instantiation of the smoother relating to the bandwidth of the smoothing.  For each element of the smoothed distribution, the bandwitdth is found as the following function: 

sigma(x) = max((a/n^5)*x, min_neighbor_distance(x))

These can be adjusted with the parameter a (errorConstant), and the parameter neighborRadius, inputted through the smoothing_data tag.  The errorConstant modifies the first case of the bandwidth function (where n is the total number of counts), proportionally increasing the likelihood of the first one to be chosen.  The neighborRadius adjusts how far from the current sample in the histogram distribution a neighbor is taken.

If nothing is passed, the default values of 0.5 and 8 are used, which were found sufficient for Augustus and iscan.  For distributions with fewer possible values, it is recommended that you lower the neighborRadius.

For more information on this method see the Augustus thesis, _Gene Prediction with a Hidden Markov Model_, Mario Stanke 2003, or its reference, _Applied Smoothing Techniques for Data Analysis_, Bowman and Azzalini, 1997.

=head1 FUNCTIONS

=over 8

=cut

package iPE::Smoother::Kernel;
use iPE;
use iPE::Globals;
use base("iPE::Smoother");
use strict;

our $PI = 3.1415926535897932384626433832795028841971693993751058;

=item init ()

Initialize the constants used in this smoother.

=cut
sub init {
    my ($this) = @_;

    if($this->data =~ m/^\s*\d+\s+\d+\s*$/) {
        ($this->{errorConstant_}, $this->{neighborRadius_}) = 
            split ' ', $this->data;
    }
    elsif(length($this->data) && $this->data =~ m/\S/) {
        die "Invalidly formatted attribute smoothing_data $this->{data_}\n ".
            "Kernel smoother.  Expecting format \"<sigma> <neighborRadius>\"\n";
    }
    else {
        $this->{errorConstant_} = 0.5;
        $this->{neighborRadius_} = 8;
    }
}

=item smoothArray (aref)

Smooths the data given according to the kernel-Gaussian smoother.

=cut
sub smoothAref {
    my ($this, $aref, $oldN) = @_;

    #create an array of lengths and a parallel array of counts.
    my @L;
    my @counts;
    my $n;
    for(my $i = 0; $i < scalar(@$aref); $i++) {
        if(defined ($aref->[$i]) && $aref->[$i] > 0) {
            push @L, $i;
            push @counts, $aref->[$i];
            $n += $aref->[$i];
        }
    }

    if(defined($oldN)) { $n = $oldN }

    my $nr = $this->{neighborRadius_};
    my $a = $this->{errorConstant_};
    my $warned = 0;
    my @sigmas;
    my @sigmasquareds;

    #calculate variances
    for(my $i = 0; $i < scalar(@L); $i++) {
        my $sigma = undef;
        if($i - $nr > 0)            { $sigma = $L[$i]-$L[$i-$nr] }
        if($i + $nr < scalar(@L) && 
                ( ( defined($sigma) && $L[$i+$nr]-$L[$i] < $sigma) || 
                   !defined($sigma))) 
            { $sigma = $L[$i+$nr]-$L[$i] }
        if(!defined($sigma)) { 
            $sigma = -1;
            Warn(__PACKAGE__." WARNING: very sparse data\n") if(!$warned);
            $warned = 1;
        }

        #variance will be max of the neighbor and mean integrated squared error
        my $mise = $a*$L[$i]/($n**0.2);
        $sigma = $sigma > $mise ? $sigma : $mise;

        push @sigmas, $sigma;
        # precompute the sigma^2 values.
        push @sigmasquareds, $sigma**2;
    }

    # precompute the denominator constant in the gaussian density function.
    my $sqrt2pi = sqrt(2*$PI);
    for(my $i = 0; $i < scalar(@$aref); $i++) {
        my $tot = 0;
        for (my $j = 0; $j < scalar(@L); $j++) {
            # calculate the gaussian density function
            # at the distance from this point to the length
            # we're examining.
            my $gauss = exp(-0.5*($i-$L[$j])**2/$sigmasquareds[$j])/
                ($sigmas[$j]*$sqrt2pi);
            # add the kernel function to the total
            $tot += $counts[$j]*$gauss;
        }
        $aref->[$i] = $tot;
    }

    # renormalize the density of this distribution to be the same as originally.
    $this->renormalizeAref($aref, $n);
}

=back

=head1 SEE ALSO

L<iPE::Smoother>

=head1 AUTHOR

Bob Zimmermann (rpz@cse.wustl.edu)

=cut


1;
