Source code for hylite.Fingerprint
#!/usr/bin/env python3
# (c) Copyright 2013-2018 Murray Cox, Wandrille Duchemin, Pierre-Yves Dupont.
#
#
# This file is part of HyLiTE.
#
# HyLiTE is a free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as published by
# the Free Software Foundation.
#
# HyLiTE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HyLiTE. If not, see <http://www.gnu.org/licenses/>
#===============================#
# author: Wandrille Duchemin #
# Murray Cox #
# last modified: 9 January 2018 #
#===============================#
[docs]class Fingerprint:
'''Class used to determine the possible snp distribution of an organism in a gene between two given positions (called fingerprint)
Attributes:
- genotype (list)
'''
def __init__(self, ploidy):
'''Class used to determine the possible snp distribution of an organism in a gene between two given positions (called fingerprint)
Args:
- ploidy (int): the ploidy of the organism as an argument
'''
self.genotype = list() #list of <ploidy> dictionnaries
for i in range(ploidy):
self.genotype.append(dict()) #key are gene, values are tuple (position, snp_index, presence) presence is -1 if bad coverage, 0 if absent,1 if present
#please note that the snps are naturally discovered in ascending position = we have a sorted list of snps positions
return
[docs] def add_snp(self, gene, position, id, presence, allele):
'''Add the given snp index (and presence -1/0/1) at given position of gene on the given allele (total number of allele = ploidy of the organism)
Args:
- gene (str): name of the gene containing the snp
- position (int): position of the snp on the gene
- id (int): index of the snp in the list of snp of Hylite
- presence (int): can take the values -1 (bad coverage) or 0 (absence) or 1 (presence)
- allele (int): index of the allele to which we should add the snp
'''
if gene not in self.genotype[allele]:
self.genotype[allele][gene]=list()
self.genotype[allele][gene].append((position, id, presence))
return
[docs] def get_fingerprint(self, gene, start, stop, allele):
'''Return the list of the tuple (snps_index,presence) present between start and stop on the given gene on the given allele
Args:
- gene (str): name of the gene containing the snp
- start (int): starting position for the fingerprint
- stop (int): stopping position for the fingerprint
- allele (int): index of the concerned allele
Returns:
list. actually a list of tuple (snp_index,presence)
'''
#we know that the list of SNPs inside a gene is sorted by ascending position
#print "requested fprint:",gene,start,stop
fprint = list()
for snp in self.genotype[allele].get(gene, []): #an undefined key mean that no Snps has been found for this gene yet, thus: empty list
if snp[0] < start:
continue
elif snp[0]> stop:
break
fprint.append((snp[1], snp[2])) #not elegant, we should implement a dichotomique expanding search
#now that i think about it, the index are the same for every one, so we can use the index of an allele to find the other
#print self.genotype[allele],' extracted:',fprint
return fprint