=head1 NAME

iPE::Annotation - object which contains the state annotation of a sequence and corresponding conservation sequence.

=head1 DESCRIPTION

An annotation is a collection of L<iPE::Annoation::SegmentedAnnotation>s.  Several intermediate steps are taken to achieve this, first via the L<iPE::AnnotationPlugin>, which creates L<iPE::Annotation::Transcript>s, which are finally layered into L<iPE::Annotation::SegmentedAnnotation>s.  

Each SegmentedAnnotation represents a region in which one or more Transcripts occur, flanked by intergenic/unannotated regions.  Note that there is no relationship between a locus and a SegmentedAnnotation.  All locuses will be contained in a single SegmentedAnnotation, but some transcripts which are not in the same locus may be included.

=over 8

=head1 FUNCTIONS

=cut

package iPE::Annotation;
use iPE;
use iPE::Globals;
use iPE::AnnotationPlugin::GTFPlugin;
use iPE::Annotation::SegmentedAnnotation;
use iPE::Util::Overlap;
use iPE::Util::Overlap::Node;
use strict;

=item new (memberHash)

Create a new iPE::Annotation.  The memberHash hash reference is expected to have the following keys defined:

=cut

=over 8

=item filename

The name of the file which contains the annotation

=item featureMaps

A reference to an array of feature maps which relate specific features in an annotation format to a feature in the gHMM.

=item nullDefinitions

Definitions of null regions relative to features in transcripts.  This is of the iPE::NullDefinition class.

=item seqLength

The length of the underlying sequence.  This is used to remove any transcripts which step over the end.

=item levels

Optional.  Pass to set each feature with a gc level.  This is of the iPE::GCLevels class. DEPRECATED.

=back

=cut
sub new 
{
	my($class, $m) = @_;
	my $this = bless {}, $class;

    die "Incomplete instantiation of $class.  ".
         "Required keys are filename, featureMaps, seqLength,".
            "altspliceStates and nullDefinitions.\n"
        if( !defined $m->{filename} || 
            !defined $m->{featureMaps} ||
            !defined $m->{seqLength} ||
            !defined $m->{nullDefinitions} ||
            !defined $m->{altspliceStates});

    $this->{seqLength_}             = $m->{seqLength};
    $this->{filename_}              = $m->{filename};
    $this->{featureMaps_}           = $m->{featureMaps};
    $this->{nullDefinitions_}       = $m->{nullDefinitions};  
    $this->{altspliceStates_}       = $m->{altspliceStates};
    $this->{hasLevels_}             = defined($m->{levels});
    $this->{segmentedAnnotations_}  = [];
    $this->{blacklist_}             = [];

    #find the extension of the filename
    $this->{filename_} =~ m/\.([^.]+)$/;
    my $extension = $1;

    #TODO: Consider taking this feature map extracting out of the main 
    # Annotation code...it's sort of monolithic.

    #find the approprate feature map for this extension
    for my $featureMap (@{$this->{featureMaps_}}) {
        if($extension eq $featureMap->extension) {
            die "Multiple feature maps with the same extension.\n"
                if defined $this->{featureMap_};
            $this->{featureMap_} = $featureMap;
        }
    }

    die "No defined feature map for the extension $extension\n"
        if not defined $this->{featureMap_};

    my $g = new iPE::Globals();

    #find the annotation plugin.
    my $init = { filename => $this->{filename_},
                 featureMap => $this->{featureMap_},
                 levels     => $g->levels };
    for ($extension) {
        if(/gtf/) { 
            $this->{annotationPlugin_} = new iPE::AnnotationPlugin::GTFPlugin($init);
        }
        else { die "No plugin in ".__PACKAGE__." for extension $extension.\n" }
    }

    $this->{annotationPlugin_}->populate($this->{seqLength_});

    #we now have a collection of finalized transcripts
    my $transcripts = \@{$this->{annotationPlugin_}->transcripts};

    $this->{annotationPlugin_} = undef;

    $transcripts = [sort { $a->min <=> $b->min } @$transcripts];
    $this->{transcripts_} = [ @$transcripts ];

    while(scalar(@$transcripts)) {
        my @txCluster = ();

        my $overlap = new iPE::Util::Overlap;
        my $node = new iPE::Util::Overlap::Node(
            {low => $transcripts->[0]->min, 
             high => $transcripts->[0]->max,
             letter => "L"});

        #keep adding transcripts to the txCluster until we find one which does not
        #overlap anything in the set we have.  
        do {
            $overlap->insert($node);
            push @txCluster, (shift @$transcripts);
        } while(scalar(@$transcripts) &&
            defined $overlap->find_overlap($node = new iPE::Util::Overlap::Node(
            {low => $transcripts->[0]->min, 
             high => $transcripts->[0]->max,
             letter => "L"})));

        for my $tx (@txCluster) {
            $tx->finalize($this->{seqLength_});
        }

        if(scalar(@txCluster) > 1) {
            for my $altspliceState (@{$this->{altspliceStates_}}) {
                $altspliceState->detect(\@txCluster, $this->{seqLength_});
            }
        }

        #remove blacklisted transcripts
        for (my $i = 0; $i < scalar(@txCluster); $i++) {
            if($txCluster[$i]->blacklisted) {
                push @{$this->{blacklist_}}, $txCluster[$i]->id;
                my $last_tx = pop @txCluster;
                last if ($i == scalar(@txCluster));
                $txCluster[$i] = $last_tx;
                $i--;
            }
        }

        # put in the null regions
        for my $transcript (@txCluster) {
            for my $nullDefinition (@{$this->{nullDefinitions_}}) {
                $nullDefinition->addToTranscript($transcript);
            }
        }

        #create the actual segmented annotation.
        push @{$this->{segmentedAnnotations_}}, 
            new iPE::Annotation::SegmentedAnnotation(
                { transcripts => \@txCluster, seqLen => $this->{seqLen_} }
            ) if(scalar(@txCluster));
    }

    for my $transcript (@{$this->{transcripts_}}) {
        if(defined $g->levels) {
            $transcript->setLevels($g->levels);
        }
    }
    $this->{segmentedAnnotations_} =
        [ sort { $a->min <=> $b->min } @{$this->{segmentedAnnotations_}} ];

	return $this;
}

=item format ()

Format the annotation for output.

=cut

sub format {
    my ($this) = @_;
    my $str;

    for my $segAnn (@{$this->{segmentedAnnotations_}}) {
        $str .= ($segAnn->format);
    }

    return $str;
}


sub filename                { shift->{filename_}                }
sub featureMap              { shift->{featureMap_}              }
sub annotationPlugin        { shift->{annotationPlugin_}        }
sub segmentedAnnotations    { shift->{segmentedAnnotations_}    }
sub transcripts             { shift->{transcripts_}             }
sub blacklist               { shift->{blacklist_}               }
#sub hasLevels               { shift->{hasLevels_}               }
=back

=head1 SEE ALSO

L<iPE::Annotation::SegmentedAnnotation>, L<iPE::Annotation::Transcript>, L<iPE::Annotation::Feature>, L<iPE::FeatureMap>, L<iPE::AnnotationPlugin>

=head1 AUTHOR

Bob Zimmermann (rpz@cse.wustl.edu).

=cut

1;
