=head1 NAME

iPE::Model::Emission::CDS - Coding Sequence model.

=head1 DESCRIPTION

=head1 FUNCTIONS

=cut

package iPE::Model::Emission::CDS;
use iPE;
use iPE::Globals;
use iPE::Model;
use File::Temp;
use base ("iPE::Model::Emission");
use strict;

sub init {
    my ($this) = @_;
    $this->{posCounts_}  = [];
    $this->{nullCounts_} = {};
    $this->{scores_}     = [];

    unless($this->hasSettings()) {
        #XXX deprecated.  remove when moving to settings format
        my ($orderstr) = split(' ', $this->{data_});
        if(defined($orderstr)) {
            if($orderstr =~ m/\//) {
                ($this->settings->{order}, $this->settings->{targetOrder}) = 
                    split(/\//, $orderstr);
            }
            else {
                ($this->settings->{order}, $this->settings->{targetOrder}) = 
                    ($orderstr, $orderstr);
            }
        }
    }

    die "CDS requires an order setting in the data attribute.\n"
        if(!defined($this->order));

    $this->settings->{targetOrder} ||= $this->order;
    if($this->order =~ m/[^\d]/ || $this->targetOrder =~ m/[^\d]/) {
        die(__PACKAGE__.": The order and targetOrder settings for CDS must\n".
            "only contain digits.\n");
    }
}

sub clear {
    my ($this) = @_;

    my @nmers = 
        $this->seqClass->getAllSequences($this->order+1, $this->ambiguate);
    for my $cpos (0 .. 2) {
        for my $seq (@nmers) {
            $this->{posCounts_}->[$cpos]->{$seq} = 0.;
            $this->{nullCounts_}->{$seq} = 0.;
            $this->{scores_}->[$cpos]->{$seq} = 0.;
        }
    }

}

sub posCounts   { shift->{posCounts_}               }
sub nullCounts  { shift->{nullCounts_}              }
sub scores      { shift->{scores_}                  }
sub order       { shift->settings->{order}          }
sub targetOrder { shift->settings->{targetOrder}    }

sub countRegion     { 
    my ($this, $region) = @_;
    if($region->seq->loaded) { _count(@_, 0)          }
    else                     { _countUnloaded(@_, 0)  }
}
sub countNullRegion { 
    my ($this, $region) = @_;
    if($region->seq->loaded) { _count(@_, 1)            }
    else                     { _countUnloaded(@_, 1)    }
}


our $lastInframeStop = -1;

sub _checkInframeStop { 
    my ($pos, $cpos, $str) = @_;

    return if ($pos-$lastInframeStop < 3);
    if(($cpos == 0 && ($str =~ m/^(TAG|TGA|TAA)/ || $str =~ m/(TAG|TGA|TAA)$/)) 
        || ($cpos == 1 && $str =~ m/^\w\w(TAG|TGA|TAA)/) 
        || ($cpos == 2 && $str =~ m/^\w(TAG|TGA|TAA)/)) {
        Warn("Warning: Inframe stop codon found.\n");
        $lastInframeStop = $pos;
    }
}

sub _count {
    my ($this, $region, $null) = @_;

    #optimization
    my $end = $region->end;
    my $strRef = $region->strRef;
    my $order = $this->order;
    my $weight = $region->weight;

    # this is the first codon position of the counts.
    # 3-$region->frame takes the 5' overhang and converts it to the 
    #  first codon position of the underlying feature.
    # $region->context adds the current context (distance from the 5' end of the
    #  feature where we start counting) to to the frame calculation.
    # $order+1 is how far in we start from the beginning of the region.
    my $cpos;
    $cpos = (3-$region->frame+$region->context+$order+1)%3 if(!$null);

    my $str;
    for my $pos ($region->start + $order .. $end) {
        $str = substr($$strRef, $pos-$order, $order+1);

        if($null) { $this->{nullCounts_}->{$str} += $weight }
        else      { 
            _checkInframeStop($pos, $cpos, $str);
            $this->{posCounts_}->[$cpos]->{$str} += $weight;
        }

        $cpos = ($cpos+1)%3 if (!$null);
    }
}

sub _countUnloaded {
    my ($this, $region, $null) = @_;

    #optimization
    my $end = $region->end;
    my $strRef = $region->strRef;
    my $order = $this->order;
    my $targetOrder = $this->targetOrder;
    my $weight = $region->weight;

    # this is the first codon position of the counts.
    # 3-$region->frame takes the 5' overhang and converts it to the 
    #  first codon position of the underlying feature.
    # $region->context adds the current context (distance from the 5' end of the
    #  feature where we start counting) to to the frame calculation.
    # $order+1 is how far in we start from the beginning of the region.
    my $cpos;
    $cpos = (3-$region->frame+$region->context+$order+1)%3 if(!$null);

    my $strand = $region->strand;
    my $seq = $region->seq;
    my $str;
    my ($print_start, $print_end);
    if($strand eq "-") {
        $print_end = $seq->length-$region->start;
        $print_start = $seq->length-$region->end;
    }
    else { ($print_start, $print_end) = ($region->start+1, $region->end+1); }
    msg("Counting from $print_start to $print_end.\n");
    for my $pos ($region->start + $order .. $end) {
        $str = $seq->getContext($strand, $pos, $order, $targetOrder);

        if($null) { $this->{nullCounts_}->{$str} += $weight          }
        else      { $this->{posCounts_}->[$cpos]->{$str} += $weight; }

        $cpos = ($cpos+1)%3 if (!$null);
    }
}

sub smooth { 
    my ($this) = @_;
    
    for my $cpos(0 .. 2) {
        if($this->ambiguate && 
                $this->wildcard == iPE::Model::Emission::LEXICAL()) { 
            $this->lexicalAmbiguateMarkovChain($this->posCounts->[$cpos],
                $this->order);
        }
        $this->pseudocountSmoother->smoothHref($this->posCounts->[$cpos]);
        $this->smoother->smoothHref($this->posCounts->[$cpos]);
    }
    if($this->nullModel) {
        if($this->ambiguate && 
                $this->wildcard == iPE::Model::Emission::LEXICAL()) { 
            $this->lexicalAmbiguateMarkovChain($this->nullCounts, $this->order);
        }
        $this->pseudocountSmoother->smoothHref($this->nullCounts);
        $this->smoother->smoothHref($this->nullCounts);
    }
}

sub normalize {
    my ($this) = @_;
    
    my @nmers = $this->seqClass->getAllSequences($this->order,$this->ambiguate);
    #whether or not we have a null model.
    my $nullModel = $this->nullModel;
    my $wildCard = $this->seqClass->getWildCard();
    my @alphabet = @{$this->seqClass->getAlphabet};
    push @alphabet, $this->seqClass->getWildCard 
        if($this->ambiguate && $this->wildcard == iPE::Model::Emission::LITERAL());
    for my $cpos (0 .. 2) {
        for my $nmer (@nmers) {
            my $totCounts = 0;
            my $totNullCounts = 0;
            for my $l (@alphabet) {
                $totCounts += $this->posCounts->[$cpos]->{$nmer.$l};
                $totNullCounts += $this->nullCounts->{$nmer.$l} 
                    if(!$cpos && $nullModel);
            }
            for my $l (@alphabet) {
                $this->posCounts->[$cpos]->{$nmer.$l} /= $totCounts
                    if($totCounts);
                $this->nullCounts->{$nmer.$l} /= $totNullCounts 
                    if($totNullCounts && !$cpos && $nullModel);
            }
            if($this->ambiguate && 
                        $this->wildcard == iPE::Model::Emission::LEXICAL()) { 
                $this->posCounts->[$cpos]->{$nmer.$wildCard} = 1;
                $this->nullCounts->{$nmer.$wildCard} = 1;
            }
        }
    }
}

sub score {
    my ($this) = @_;

    my $g = new iPE::Globals();
    my @nmers = 
        $this->seqClass->getAllSequences($this->order+1, $this->ambiguate);
    my $scale = $g->options->scaleFactor;
    my $negInf = $g->options->sequenceNegInf;
    my $nullModel = $this->nullModel;

    for my $cpos (0 .. 2) {
        for my $nmer (@nmers) {
            if($nullModel) {
                $this->{scores_}->[$cpos]->{$nmer} = 
                    $this->logScore($this->{posCounts_}->[$cpos]->{$nmer},
                        $this->{nullCounts_}->{$nmer});
            }
            else {
                $this->{scores_}->[$cpos]->{$nmer} = 
                    $this->logScore($this->{posCounts_}->[$cpos]->{$nmer});
            }
        }
        if($this->ambiguate && 
                $this->wildcard == iPE::Model::Emission::PENALTY()) {
            $this->penalizeAmbiguousNmers($this->{scores_}->[$cpos], 
                $this->order);
        }
    }
}

sub outputPrepare {
    my ($this, $out, $mode) = @_;
    my $pstring = "";

    my @nmers = $this->seqClass->getAllSequences($this->order,$this->ambiguate);
    my @alphabet = @{$this->seqClass->getAlphabet};
    push @alphabet, $this->seqClass->getWildCard if($this->ambiguate);
    for my $cpos (0 .. 2) {
        for my $nmer(@nmers) {
            $pstring .= $nmer.$out->tab if($mode ne "score"); 
            for my $l (@alphabet) {
                if($mode eq "count" || $mode eq "prob") {
                    $pstring .= 
                        $out->floatf($this->posCounts->[$cpos]->{$nmer.$l}); 
                        
                    if($this->nullModel) {
                        $pstring .= " | ". 
                            $out->floatf($this->nullCounts->{$nmer.$l}); 
                    }

                    $pstring .= $out->tab; 
                }
                elsif($mode eq "score") {
                    $pstring .= 
                        $out->intf($this->scores->[$cpos]->{$nmer.$l}).
                        $out->tab;
                }
            }
            $pstring .= "\n";
        }
    }

    $this->setParamString($pstring);
}

sub numZoeSubmodels { 3 }

sub getZoeHeader {
    my ($this) = @_;

    my ($model, $name);
    if(defined $this->zModel)   { $model = $this->zModel    }
    else                        { $model = $this->model     }
    if(defined $this->zName)    { $name = $this->zName     }
    else                        { $name = $this->name       }

    my $g = new iPE::Globals();
    my $head = $name." ".$model." ".$g->zoe_seqtype($this->source)." ".
        $this->zLength." ".$this->zFocus." ".$this->zSymbols." ".
        $this->zNSubmodels." ".$this->getZoeHeaderEnd."\n";

    return $head;
}

sub outputZoe {
    my ($this, $out, $mode) = @_;
    my @lines = split "\n", $this->getParamString();
    my $alphSize = scalar(@{$this->seqClass->getAlphabet});
    my $linesPerFrame = $alphSize**$this->order;

    $out->print($out->indent.$this->getZoeHeader);
    $out->increaseIndent;
    my $g = new iPE::Globals();
    for my $frame (0 .. 2) {
        #print the hackish LUT header
        $out->print($out->indent."frame$frame LUT ".
            $g->zoe_seqtype($this->source)." ".($this->order+1)." ".
            $this->order." ".$alphSize." 0\n");
        $out->increaseIndent;
        for (my $i = 0; $i <  $linesPerFrame; $i++) {
            my $line = shift @lines;
            while(defined($line) && $line !~ /\S/) { $line = shift @lines }
            next unless(defined($line));
            $out->print($out->indent.$line."\n");
        }
        $out->decreaseIndent;
    }
    $out->decreaseIndent;
}

=head1 SEE ALSO

L<iPE::Model::Emission>

=head1 AUTHOR

Bob Zimmermann (rpz@cse.wustl.edu)

=cut

1;
