Source code for hylite.Fingerprint

#!/usr/bin/env python3

#    (c) Copyright 2013-2018 Murray Cox, Wandrille Duchemin, Pierre-Yves Dupont.
#
#
#    This file is part of HyLiTE.
#
#    HyLiTE is a free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License version 3 as published by
#    the Free Software Foundation.
#
#    HyLiTE is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with HyLiTE.  If not, see <http://www.gnu.org/licenses/>


#===============================#
# author: Wandrille Duchemin    #
#         Murray Cox            #
# last modified: 9 January 2018 #
#===============================#


[docs]class Fingerprint: '''Class used to determine the possible snp distribution of an organism in a gene between two given positions (called fingerprint) Attributes: - genotype (list) ''' def __init__(self, ploidy): '''Class used to determine the possible snp distribution of an organism in a gene between two given positions (called fingerprint) Args: - ploidy (int): the ploidy of the organism as an argument ''' self.genotype = list() #list of <ploidy> dictionnaries for i in range(ploidy): self.genotype.append(dict()) #key are gene, values are tuple (position, snp_index, presence) presence is -1 if bad coverage, 0 if absent,1 if present #please note that the snps are naturally discovered in ascending position = we have a sorted list of snps positions return
[docs] def add_snp(self, gene, position, id, presence, allele): '''Add the given snp index (and presence -1/0/1) at given position of gene on the given allele (total number of allele = ploidy of the organism) Args: - gene (str): name of the gene containing the snp - position (int): position of the snp on the gene - id (int): index of the snp in the list of snp of Hylite - presence (int): can take the values -1 (bad coverage) or 0 (absence) or 1 (presence) - allele (int): index of the allele to which we should add the snp ''' if gene not in self.genotype[allele]: self.genotype[allele][gene]=list() self.genotype[allele][gene].append((position, id, presence)) return
[docs] def get_fingerprint(self, gene, start, stop, allele): '''Return the list of the tuple (snps_index,presence) present between start and stop on the given gene on the given allele Args: - gene (str): name of the gene containing the snp - start (int): starting position for the fingerprint - stop (int): stopping position for the fingerprint - allele (int): index of the concerned allele Returns: list. actually a list of tuple (snp_index,presence) ''' #we know that the list of SNPs inside a gene is sorted by ascending position #print "requested fprint:",gene,start,stop fprint = list() for snp in self.genotype[allele].get(gene, []): #an undefined key mean that no Snps has been found for this gene yet, thus: empty list if snp[0] < start: continue elif snp[0]> stop: break fprint.append((snp[1], snp[2])) #not elegant, we should implement a dichotomique expanding search #now that i think about it, the index are the same for every one, so we can use the index of an allele to find the other #print self.genotype[allele],' extracted:',fprint return fprint