=head1 NAME

iPE::Annotation::SegmentedAnnotation - A multilayered gene annotation segmented on boundaries where the layers are exactly the same.

=head1 DESCRIPTION

A SegmentedAnnotation is a data structure which represents a gene annotation of one or more splices of the gene in a single structure.  

The data structure contains 2 major components: transcripts and segments.  The transcripts array identifies the transcript for each layer on each segment.  The segments array contains all of the segments (see L<iPE::Annotation::Segment>).  These contain information about the states in a region for all transcripts.

=head1 FUNCTIONS

=over 8

=cut

package iPE::Annotation::SegmentedAnnotation;
use iPE;
use iPE::Globals;
use iPE::Util::Interval;
use iPE::Annotation::Segment;
use strict;

=item new(memberHash)

Create a new iPE::Annotation::SegmentedAnnotation.  Segments all transcripts into iPE::Annotation::Segments.  It is assumed that the transcripts all overlap.  To use this function, pass an reference to an array of transcripts to the transcripts key of the memberHash.

=cut

sub new {
    my ($class, $m) = @_;
    my $this = bless {}, $class;

    die "transcripts is expected to be defined in ".__PACKAGE__.
        " instantiation.\n" if (!defined $m->{transcripts});

    $this->{transcripts_} = 
        [ sort { $a->min <=> $b->min }  @{$m->{transcripts}} ];
    $this->{segments_}    = [];
    $this->{min_} = -1;
    $this->{max_} = -1;
    

    for my $transcript (@{$this->{transcripts_}}) {
        $this->{min_} = $transcript->min 
            if($this->{min_} == -1 || $transcript->min < $this->{min_});
        $this->{max_} = $transcript->max
            if($this->{max_} == -1 || $transcript->max > $this->{max_});
    }


    $this->_segment;
    $this->_set_gene_ids;

    return $this;
}

=item transcripts ()

Return the transcripts in this segmented annotation.

=cut
sub transcripts { shift->{transcripts_} }

=item segments ()

Return the segments in this segmented annotation.

=cut
sub segments    { shift->{segments_}    }

=item min ()

Return the minimum coordinate of the overlapping transcripts

=cut
sub min         { shift->{min_} }

=item max ()

Return the maximum coordinate of the overlapping transcripts

=cut
sub max         { shift->{max_} }

=item numTranscriptsAt(x)

Returns how many transcripts exist at the point x.

=cut
sub numTranscriptsAt {
    my ($this, $x) = @_;
    my $ntxs = 0;

    for my $tx (@{$this->transcripts}) {
        if(includes($tx->min, $tx->max, $x)) {
            $ntxs++;
        }
    }

    return $ntxs;
}

=item numTranscriptsInRange(start, end)

Returns the number of transcripts that overlap the range [start, end].

=cut
sub numTranscriptsInRange {
    my ($this, $start, $end) = @_;
    my $ntxs = 0;

    for my $tx(@{$this->transcripts}) {
        if(includes($tx->min, $tx->max, $start) || 
                includes($tx->min, $tx->max, $end)) {
            $ntxs ++;
        }
    }

    return $ntxs;
}

# ---private functions
# perform the creation of segments based on the transcripts
sub _segment {
    my ($this) = @_;

    my @pending_feats;
    for my $transcript (@{$this->transcripts}) {
        push @pending_feats, [ @{$transcript->features} ];
    }

    # create locuses from our temporary transcript array.
    my @cur_feats;
    for (my $i = 0; $i < scalar(@pending_feats); $i++) {
        $cur_feats[$i] = undef;
    }

    my ($last_coord, $cur_coord);
    _init_frontier(\$last_coord,\$cur_coord,\@cur_feats,\@pending_feats);
    while ($cur_coord > 0) {

       _check_for_nonoverlapping_features(\@cur_feats);

       my @segment_features = @cur_feats;

        # add a segment 
        push @{$this->{segments_}}, 
            new iPE::Annotation::Segment(
                {start          => $last_coord,
                 end            => $cur_coord,
                 features       => \@segment_features, 
                 transcripts    => $this->{transcripts_},
                 seqLen         => $this->{seqLen_} });

        _shift_frontier(\$last_coord,\$cur_coord,\@cur_feats,\@pending_feats);
    }

}

sub _has_features {
    my ($features) = @_;
    for my $feature (@$features) { return 1 if defined $feature; }
    return 0;
}

sub _init_frontier {
    my ($last, $cur, $cur_feats, $pending_feats) = @_;
    
    # This routine is responsible for initializing the frontier for the first
    # time.  That means that the lowest coordinate of any first feature in 
    # all the transcripts will be included in the current features array.

    # Find the lowest coordinate.
    $$last = -1;
    for my $pending_feat (@$pending_feats) {
        $$last = $pending_feat->[0]->start 
            if($pending_feat->[0]->start < $$last || $$last == -1);
    }

    # Put all features beginning at the lowest coordinate in the current
    # features array.
    for (my $i = 0; $i < scalar(@$pending_feats); $i++) {
        $cur_feats->[$i] = shift @{$pending_feats->[$i]}
            if($pending_feats->[$i]->[0]->start == $$last) ;
    }

    # Find the next coordinate of any feature in all transcripts.
    $$cur = _find_frontier_coord($cur_feats, $pending_feats);
}

sub _shift_frontier {
    my ($last, $cur, $cur_feats, $pending_feats) = @_;

    # We have five situations we are might have to deal with, shown here
    # current feature is A, next feature is B, undef is -
    #                 
    # 1. AAAAAAAAAAAABBBBBBBBBBBBBB
    # 2. ------------BBBBBBBBBBBBBB
    # 3. AAAAAAAAAAAA--------------
    # 4. AAAAAAAAAAAAAAAAAAAAAAAAAA
    # 5. --------------------------
    #               ^
    #               cur pos

    # In cases 1 and 2, we have a new feature coming up and this is part of
    # the new frontier, and so the feature at this layer has to be changed.
    # In case 3, there is no new feature to look at, so we must change
    # that layer to be undefined.
    # In cases 4 and 5, the layer can remain unchanged since there is no
    # change at the current position
    
    for (my $i = 0; $i < scalar(@$pending_feats); $i++) {
        # cases 1 and 2
        if(defined $pending_feats->[$i]->[0] &&
                $pending_feats->[$i]->[0]->start == $$cur+1) {
            $cur_feats->[$i] = shift @{$pending_feats->[$i]};
        }
        # case 3 (since previous if was false, no defined feature coming up.
        elsif(defined $cur_feats->[$i] && 
                $cur_feats->[$i]->end == $$cur) {
            $cur_feats->[$i] = undef;
        }
        # cases 4 and 5 require no change.
    }
    $$last = $$cur+1;
    $$cur = _find_frontier_coord($cur_feats, $pending_feats);
}

sub _find_frontier_coord {
    my ($cur_feats, $pending_feats) = @_;

    # This function finds the lowest coordinate of all possible next features
    # in the transcript array.  Since the transcripts are shifted as they go
    # along, only first features in the transcript are considered and the
    # endings of the current features.

    my $min = -1;
    for my $feature (@$cur_feats) {
        next if (!defined $feature);
        $min = $feature->end
            if($feature->end < $min || $min == -1);
    }
    for my $pending_feat (@$pending_feats) {
        next if (!defined $pending_feat->[0]);
        $min = $pending_feat->[0]->start-1 
            if($pending_feat->[0]->start-1 < $min || $min == -1);
    }
    return $min;
}

sub _check_for_nonoverlapping_features {
    my ($features) = @_;

    my $errmsg = 
        "A feature in transcript %s overlapped another feature\n".
        "but the feature's state, %s is defined as a non-overlapping state.\n".
        "That feature will be removed.\n";

    # This routine is a feature filtering routine.  Since this is the first
    # time in the program where the overlapping features are shown overlapping,
    # the filtration is done here, rather than in the creation of the 
    # transcripts.  If the feature overlaps with any feature whose state
    # is not the same or undef, the feature must be removed.

    if(scalar(@$features) > 1) {
        for (my $i = 0; $i < scalar(@$features); $i++) {
            my $feature = $features->[$i];
            next if(!defined($feature));
            if($feature->state->canOverlap == 0) {
                for (my $j = 0; $j < scalar(@$features); $j++) {
                    if(defined $features->[$j]  &&
                        $features->[$j]->state->name ne $feature->state->name) {

                        Warn(sprintf($errmsg, 
                                    $feature->transcript->id, 
                                    $feature->state->name));
                        $feature->transcript->removeFeature($feature);
                        $features->[$i] = undef;
                        last;

                    }
                }
            }
        }
    }
                #Msg("A feature in transcript ".$feature->transcript->id.
                    #" overlapped another feature\nbut the feature's state, "
                    #.$feature->state->name." is designated as a ".
                    #"non-overlapping state.\n");
}

sub _set_gene_ids {
    my ($this) = @_;

    $this->{genes_} = {};
    for(my $i = 0; $i < scalar(@{$this->transcripts}); $i++) {
        my $geneID = $this->transcripts->[$i]->geneID;
        if(defined $geneID) {
            if(!defined $this->{genes_}->{$geneID}) {
                $this->{genes_}->{$geneID} = [];
            }
            push @{$this->{genes_}->{$geneID}}, $i;
        }
    }
}

=item format ()

Format the segmented annotation for output.

=cut
sub format {
    my ($this) = @_;

    my $str = " --- (";
    for my $tx (@{$this->transcripts}) { 
        $str .= ($tx->geneID)."::" if defined $tx->geneID;
        $str .= ($tx->id).", "; 
    }
    $str .= ")\n";
    for my $segment (@{$this->segments}) {
        $str .= sprintf "  ".$segment->start."\t".$segment->end." (";
        for my $feature (@{$segment->features}) {
            $str .= sprintf $feature->state->name
                if defined $feature;
            $str .= ", ";
        }
        $str .= sprintf ")\n";
    }
    return $str;
}

=back

=head1 SEE ALSO

L<iPE::Annotation::Segment>, L<iPE::Annotation>

=head1 AUTHOR

Bob Zimmermann (rpz@cse.wustl.edu)

=cut
1;
