Source code for hylite.SNP

#!/usr/bin/env python3

#    (c) Copyright 2013-2018 Murray Cox, Wandrille Duchemin, Pierre-Yves Dupont.
#
#
#    This file is part of HyLiTE.
#
#    HyLiTE is a free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License version 3 as published by
#    the Free Software Foundation.
#
#    HyLiTE is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with HyLiTE.  If not, see <http://www.gnu.org/licenses/>


#===============================#
# author: Wandrille Duchemin    #
#         Murray Cox            #
# last modified: 9 January 2018 #
#===============================#


[docs]class SNP: ''' This class is used to model a SNP, including its position on the reference genome and the presence/absence/coverage of a SNP in multiple organisms Attributes: - gene (str): the gene containing the snp - position (int): the position of the snp on the gene - ref (str): the reference allele - alt (str): the alternative allele - masked (bool): a boolean indicating if at least one organism has a bad coverage at the snp position - presence (dict): key is organism name, value a tuple containing is 1 if SNP is present on a gene copy, 0 if absent, -1 if the coverage is bad ''' def __init__(self, gene, position, ref, alt): ''' Args: - gene (str): name of a gene - position (int): position on the gene - ref (str): the reference allele - alt (str): the alternative allele ''' self.gene=gene self.position=position self.ref=ref self.alt=alt self.masked = False #is True if at least one of the organism has bad coverage at <position> of <gene> self.presence = {} #key is organism name, value is tuple containing 1 if SNP is present on a gene copy, 0 if absent, -1 if the coverage is bad return
[docs] def has_SNP(self, orgname, i): '''Update the SNP with its presence in an organism Args: - orgname (str): name of an organism - i (int): gene copy the snp is present on, OR -1 if it is the child ''' if i == -1:#this is the child self.presence[orgname] = [1] else:#not the child if (orgname in self.presence) is False:#new organism self.presence[orgname] = list()#initialise while len(self.presence[orgname])<= i:#extend the list to desired size self.presence[orgname].append(0)#by default, we push 0s in the presence self.presence[orgname][i]=1#updating for presence return
[docs] def no_SNP(self, orgname, i): '''Update the SNP with its absence in an organism Args: - orgname (str): name of an organism - i (int): gene copy the snp is present on, OR -1 if it is the child ''' if i == -1:#this is the child if (orgname in self.presence) is False:#only if no presence or absence has been recorded yet self.presence[orgname] = [0] else: self.presence[orgname].append(0) else:#not the child if (orgname in self.presence) is False:#new organism self.presence[orgname] = list()#initialise while len(self.presence[orgname])<= i:#extend the list to desired size self.presence[orgname].append(0)#by default, we push 0s in the presence self.presence[orgname][i]=0#updating for absence return
[docs] def no_coverage(self, orgname): '''Update the SNP with its bad coverage in an organism Args: - orgname (str): name of an organism ''' self.presence[orgname] = [-1] self.masked = True return
[docs] def listed_org(self): ''' Returns: - list. the names of the organisms listed in self.presence ''' return list(self.presence.keys())
def __str__(self): return '\t'.join([self.gene, str(self.position), self.alt, self.presence.__str__()]) def __repr__(self): return self.__str__()