=head1 NAME

iPE::FeatureMapping - a mapping of a feature name in an annotation to a state in an HMM.

=head1 DESCRIPTION

Data structure for mapping a state to a relative position in a format-specific feature.  The indexing here goes by 0-index, and all things passed to and retrieved from this packages should be 0-based.

=head1 FUNCTIONS

=over 8

=cut

package iPE::FeatureMapping;
use iPE;
use iPE::Globals;
use iPE::Annotation::Transcript;
use iPE::Util::DNATools;
use iPE::Util::Interval;
use base("iPE::XML::Object");
use strict;

=item new (tag, members)

This object is automatically constructed by the containing FeatureMap object directly from the XML file.

=cut

sub new {
    my $class = shift;
	  my($tag, $m, $data, $element) = @_;
	  my $this = $class->SUPER::new(@_);

    my $g = new iPE::Globals();
    die ("\n".__PACKAGE__.": No state $m->{state_name}\n".
         "One of the feature mappings you provided is not\n".
         "compatible with the gHMM you provided.  Please check to see that\n".
         "all states in the feature map have corresponding states in this\n".
         "gHMM (e.g. Did you pick a UTR feature map for a non-UTR gHMM?).\n")
        if(not defined($g->states->{$m->{state_name}}));
    $this->{stateName_}     = $m->{state_name};
    $this->{feature_}       = $m->{feature};

    my $err = "feature_mapping error for state ".$this->{stateName_}.":\n";

    $this->{featureInterval_} = 
        new iPE::Util::Interval({low    => $m->{first_feature},
                                 high   => $m->{last_feature},
                                 letter => "N"});
    die $err."check first_feature and last_feature.\n" 
        if not defined $this->{featureInterval_};
    $this->{regionInterval_} =
        new iPE::Util::Interval({low    => $m->{state_region_start},
                                 high   => $m->{state_region_end},
                                 letter => "L"});
    die $err."check state_region_start and state_region_end\n"
        if not defined $this->{regionInterval_};

    #If number_in_transcript says "1+", that means that the number feature 
    #may appear one or more times in the transcript and the state it maps to
    #will be relevant.  Otherwise, if the number_in_transcript is "1", 
    #the feature must appear exactly 1 time in the transcript in order for it
    #to be relevant.
    $this->{nInTranscript_} = $m->{number_in_transcript};
    if($this->{nInTranscript_} =~ m/\+/) { $this->{mayHaveMore_} = 1; }
    else                                 { $this->{mayHaveMore_} = 0; }
    $this->{nInTranscript_} =~ s/\+//;

    $this->{annStrand_} = $m->{strand};
    $this->{annStrand_} = "N" if(!defined($this->{annStrand_}));

    #optimization
    $this->{state_}         = $g->state($this->{stateName_});
    $this->{startFrame_}   = $this->{state_}->startFrame;
    $this->{endFrame_}     = $this->{state_}->endFrame;
    $this->{strand_}        = $this->{state_}->strand;

    die $err."number_in_transcript value must be of the form <num>+ or <num>\n"
        if($this->{nInTranscript_} =~ m/\D/);

    return $this;
}

sub num {  if($_[0] =~ /\d/) { return $_[0] } else { return 0 } }

sub state           { shift->{state_}               }
sub stateName       { shift->{stateName_}           }
sub feature         { shift->{feature_}             }
sub nInTranscript   { shift->{nInTranscript_}       }

sub mayHaveMore     { shift->{mayHaveMore_}         }

sub startFrame      { shift->{startFrame_}          } 
sub endFrame        { shift->{endFrame_}            } 
sub strand          { shift->{strand_}              }
sub featureInterval { shift->{featureInterval_}     }
sub regionInterval  { shift->{regionInterval_}      }

=item addToTranscript(transcript, featureName, featureStart, featureEnd, featureStartFrame, featureEndFrame, featureNo, nInTranscript, length)

This function evaluates the feture against the calling featureMapping object and decides whether the feature is relevant to this feature mapping.  If it is, it adds a state region to the passed Transcript.  If it is not, it does nothing.

Arguments:

=over 8

=item transcript

The Transcript that will be added to if this FeatureMapping qualifies.

=item featureName 

The name of the feature as it is defined in format (for example, a GTF feature name would be "CDS").  

=item featureStart, featureStop 

The start and stop position in the annotation of the feature.  

=item featureStartFrame, featureEndFrame

Pass either the starting frame or the ending frame in here to determine if it matches with the state.  Whichever one you do not pass in as a valid frame you may pass in as -1.

=item featureNo 

Which feature of its time it is in the transcript, e.g. if it is the first CDS feature, the second, and so on.  (Note that this is not which feature of all the features, it is which feature of this specific type of feature.)  

=item nInTranscript 

The total number of this type of feature in the transcript, e.g. how many total CDS features there are in the transcript, if this feature is a CDS feature.

=item length

The total length of the underlying sequence.  This is used to check to see if the feature will step out of bounds and cause a problem with counting.

=back

=cut
sub addToTranscript {
    my ($this, $tx, $name, $start, $end, $start_frame, $end_frame, $strand, $featureNo, $nInTranscript, $length) = @_;

    #first we check if the feature is elegible for state annotation with 
    #this particular feature mapping.  If it is, it will pass all these
    #tests and will be used to create a state sequence.

    #check if this feature is on the same strand as the feature mapping.
    return if($strand ne $this->{strand_} && $this->{strand_} ne 'N');

    return if($strand ne $this->{annStrand_} && $this->{annStrand_} ne 'N');

    #check that the frame is correct for this feature.
    if($start_frame < 0 && $start_frame > 2 &&
       $end_frame   < 0 && $end_frame   > 2) {
        $start_frame = 0;
        $end_frame = 0;
    }

   if($start_frame < 0 || $start_frame > 2) {
        $start_frame = getOpposingFrame($start, $end, $end_frame);
    }
    if($end_frame < 0 || $end_frame > 2) {
        $end_frame = getOpposingFrame($start, $end, $start_frame);
    }

    if($start_frame ne $this->{startFrame_} && $this->{startFrame_} ne "N") 
        { return }
    if($end_frame   ne $this->{endFrame_}   && $this->{endFrame_}   ne "N") 
        { return }

    #check if the number of such features in the transcript matches the
    #range given in the feature map.
    if($this->{mayHaveMore_}) {
        if($nInTranscript < $this->{nInTranscript_}) { return }
    } else {
        if($nInTranscript != $this->{nInTranscript_}) { return }
    }

    #check that this feature is in the right position in the transcript
    #for this feature mapping.
    if(!includes($this->{featureInterval_}->translate(0, $nInTranscript-1), 
            $featureNo)) {
        return;
    }

    #check that the name of the feature is the same as this feature
    if($name ne $this->{feature_}) { return }

    #now we finally have a match of a feature mapping to a feature.  
    #create a state.
    my ($state_start, $state_end) = 
        $this->{regionInterval_}->translate($start, $end);

    #check that the length of the sequence does not prevent this feature
    #from being created.
    if($state_start < 0 || $state_end < 0 || 
            $state_start > $length-1 || $state_end > $length-1) {
        Warn(__PACKAGE__.": discarding feature for ".$this->stateName.
            " ($state_start, $state_end)\n steps out of the bounds ".
            "of the underlying sequence (0, ".($length-1).").\n");
        return;
    }
            

    msg( "Creating feature: state:".$this->stateName." start:$state_start ".
        "end:$state_end from coordinates ($start, $end).\n");

    $tx->addFeature($this->state, $state_start, $state_end, 
        $start_frame, $end_frame);
}

=head1 SEE ALSO

L<iPE::FeatureMap>, L<iPE::AnnotationPlugin>

=head1 AUTHOR

Bob Zimmermann (rpz@cse.wustl.edu)

=cut
1;
