Source code for hylite.Fingerprint
#!/usr/bin/env python3
#    (c) Copyright 2013-2018 Murray Cox, Wandrille Duchemin, Pierre-Yves Dupont.
#
#
#    This file is part of HyLiTE.
#
#    HyLiTE is a free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License version 3 as published by
#    the Free Software Foundation.
#
#    HyLiTE is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with HyLiTE.  If not, see <http://www.gnu.org/licenses/>
#===============================#
# author: Wandrille Duchemin    #
#         Murray Cox            #
# last modified: 9 January 2018 #
#===============================#
[docs]class Fingerprint:
    '''Class used to determine the possible snp distribution of an organism in a gene between two given positions (called fingerprint)
    
    Attributes:
        - genotype (list)
    '''
    def __init__(self, ploidy):
        '''Class used to determine the possible snp distribution of an organism in a gene between two given positions (called fingerprint)
        
        Args:
            - ploidy (int): the ploidy of the organism as an argument
        '''
        self.genotype = list() #list of <ploidy> dictionnaries
        for i in range(ploidy):
            self.genotype.append(dict()) #key are gene, values are tuple (position, snp_index, presence) presence is -1 if bad coverage, 0 if absent,1 if present
        #please note that the snps are naturally discovered in ascending position = we have a sorted list of snps positions
        return
    
[docs]    def add_snp(self, gene, position, id, presence, allele):
        '''Add the given snp index (and presence -1/0/1) at given position of gene on the given allele (total number of allele = ploidy of the organism)
        Args:
             - gene (str): name of the gene containing the snp
             - position (int): position of the snp on the gene
             - id (int): index of the snp in the list of snp of Hylite
             - presence (int): can take the values -1 (bad coverage) or 0 (absence) or 1 (presence)
             - allele (int): index of the allele to which we should add the snp
        '''
        if gene not in self.genotype[allele]:
            self.genotype[allele][gene]=list()
        self.genotype[allele][gene].append((position, id, presence))
        
        return  
        
    
[docs]    def get_fingerprint(self, gene, start, stop, allele):
        '''Return the list of the tuple (snps_index,presence) present between start and stop on the given gene on the given allele
        
        Args:
             - gene (str): name of the gene containing the snp
             - start (int): starting position for the fingerprint
             - stop (int): stopping position for the fingerprint
             - allele (int): index of the concerned allele
            
        Returns:
            list. actually a list of tuple (snp_index,presence)
        '''
        #we know that the list of SNPs inside a gene is sorted by ascending position
        
        #print "requested fprint:",gene,start,stop
        
        fprint = list()
        for snp in self.genotype[allele].get(gene, []): #an undefined key mean that no Snps has been found for this gene yet, thus: empty list
            if snp[0] < start:
                continue
            elif snp[0]> stop:
                break
            fprint.append((snp[1], snp[2])) #not elegant, we should implement a dichotomique expanding search
        #now that i think about it, the index are the same for every one, so we can use the index of an allele to find the other
        #print self.genotype[allele],' extracted:',fprint
        
        return fprint