Source code for hylite.SNP
#!/usr/bin/env python3
# (c) Copyright 2013-2018 Murray Cox, Wandrille Duchemin, Pierre-Yves Dupont.
#
#
# This file is part of HyLiTE.
#
# HyLiTE is a free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as published by
# the Free Software Foundation.
#
# HyLiTE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HyLiTE. If not, see <http://www.gnu.org/licenses/>
#===============================#
# author: Wandrille Duchemin #
# Murray Cox #
# last modified: 9 January 2018 #
#===============================#
[docs]class SNP:
'''
This class is used to model a SNP, including its position on the reference genome and
the presence/absence/coverage of a SNP in multiple organisms
Attributes:
- gene (str): the gene containing the snp
- position (int): the position of the snp on the gene
- ref (str): the reference allele
- alt (str): the alternative allele
- masked (bool): a boolean indicating if at least one organism has a bad coverage at the snp position
- presence (dict): key is organism name, value a tuple containing is 1 if SNP is present on a gene copy, 0 if absent, -1 if the coverage is bad
'''
def __init__(self, gene, position, ref, alt):
'''
Args:
- gene (str): name of a gene
- position (int): position on the gene
- ref (str): the reference allele
- alt (str): the alternative allele
'''
self.gene=gene
self.position=position
self.ref=ref
self.alt=alt
self.masked = False #is True if at least one of the organism has bad coverage at <position> of <gene>
self.presence = {} #key is organism name, value is tuple containing 1 if SNP is present on a gene copy, 0 if absent, -1 if the coverage is bad
return
[docs] def has_SNP(self, orgname, i):
'''Update the SNP with its presence in an organism
Args:
- orgname (str): name of an organism
- i (int): gene copy the snp is present on, OR -1 if it is the child
'''
if i == -1:#this is the child
self.presence[orgname] = [1]
else:#not the child
if (orgname in self.presence) is False:#new organism
self.presence[orgname] = list()#initialise
while len(self.presence[orgname])<= i:#extend the list to desired size
self.presence[orgname].append(0)#by default, we push 0s in the presence
self.presence[orgname][i]=1#updating for presence
return
[docs] def no_SNP(self, orgname, i):
'''Update the SNP with its absence in an organism
Args:
- orgname (str): name of an organism
- i (int): gene copy the snp is present on, OR -1 if it is the child
'''
if i == -1:#this is the child
if (orgname in self.presence) is False:#only if no presence or absence has been recorded yet
self.presence[orgname] = [0]
else:
self.presence[orgname].append(0)
else:#not the child
if (orgname in self.presence) is False:#new organism
self.presence[orgname] = list()#initialise
while len(self.presence[orgname])<= i:#extend the list to desired size
self.presence[orgname].append(0)#by default, we push 0s in the presence
self.presence[orgname][i]=0#updating for absence
return
[docs] def no_coverage(self, orgname):
'''Update the SNP with its bad coverage in an organism
Args:
- orgname (str): name of an organism
'''
self.presence[orgname] = [-1]
self.masked = True
return
[docs] def listed_org(self):
'''
Returns:
- list. the names of the organisms listed in self.presence
'''
return list(self.presence.keys())
def __str__(self):
return '\t'.join([self.gene, str(self.position), self.alt, self.presence.__str__()])
def __repr__(self):
return self.__str__()