Source code for hylite.Organism

#!/usr/bin/env python3

#    (c) Copyright 2013-2018 Murray Cox, Wandrille Duchemin, Pierre-Yves Dupont.
#
#
#    This file is part of HyLiTE.
#
#    HyLiTE is a free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License version 3 as published by
#    the Free Software Foundation.
#
#    HyLiTE is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with HyLiTE.  If not, see <http://www.gnu.org/licenses/>


#===============================#
# author: Wandrille Duchemin    #
#         Murray Cox            #
# last modified: 9 January 2018 #
#===============================#


from .Fingerprint import Fingerprint

[docs]class Organism: ''' This class represent an organism (parent or child) in our analysis Its attributes are: - name : str, name of the organism - ploidy : int, ploidy of the organism - dreadfile : dict, key are sample name, value is the list of the names of the files containing the reads - sample_name: list, list of the samples name - sample_type: dictionnary, key are sample name and value are 'RNAseq' or 'gDNA' corresponding to the nature of the sample - dsamfile : str, name of the resulting .sam file - dexpression: dict, key are sample name, value are dict where key are gene label, value is number of reads for the gene - fingerprint: Fingerprint ''' def __init__(self, name, ploidy, dreadfile, sample_name, sample_type): '''This class is used to represent an organism during a HyLiTE analysis Args: - name (str): the name of the organism - ploidy (int): the ploidy of the organism - dreadfile (dict): a dictionnary of the files containing reads from the organism (one file per sample) (key is the sample name, value is the filename) - sample_name (list): a list of the samples name (the order of the names are relevant) - sample_type (dict): a dictionnary where key are sample name and value are 'RNAseq' or 'gDNA' corresponding to the nature of the sample ''' self.name=name self.ploidy=ploidy self.dreadfile=dreadfile self.sample_name = sample_name self.sample_type = sample_type self.dsamfile=dict((x, None) for x in self.sample_name) self.dbamfile=dict((x, None) for x in self.sample_name) #python 2.7: {x : None for x in self.sample_name} self.dsortedbamfile=dict((x, None) for x in self.sample_name) #{x : None for x in self.sample_name} self.dexpression =dict((x, dict()) for x in self.sample_name) #{x : dict() for x in self.sample_name} # dict of dict first key is sample name, second key is gene name, value is count of said gene for said sample self.fingerprint = Fingerprint(self.ploidy) #only one fingerprint by organism return def __str__(self): return "Organism: %s" % self.name def __repr__(self): return self.__str__()
[docs] def add_expression(self, sample, gene, exp): '''Increment the count of a given gene from a given sample Args: - sample (str): a sample name - gene (str): a gene name - exp (int): the number of count ''' self.dexpression[sample][gene] = self.dexpression[sample].get(gene, 0) +exp #increment of exp if the gene already exist, put exp otherwise return
[docs] def add_snp(self,gene,position,snp_index,presence,polyploid_args=None): ''' Args: - gene (str): a gene name - position (int): the position of the snp on the gene - snp_index (int): index of the snp in the hylite list - presence (list): list of int; give its presence (-1/0/1) in each gene copy Kwargs: - polyploid_args: optional argument for polyploids #actually not used... but whatever, let's keep it ''' for i in range(self.ploidy): self.fingerprint.add_snp(gene, position, snp_index, presence[i], i)
[docs] def get_fingerprint(self, gene, start, stop, allele): ''' Returns: - list. a list of the list of the snp index that are found between start and stop on gene in one allele of the organism ''' fp = self.fingerprint.get_fingerprint(gene, start, stop, allele) return fp
[docs] def get_all_fingerprint(self, gene, start, stop): ''' Args: - gene (str): a gene - start (int): a start - stop (int): stop position on the gene Returns: - list. the list of all the fingerprints of the organism between start and stop ''' fps = list() similar =True for i in range(self.ploidy): fps.append(self.get_fingerprint(gene, start, stop, i)) if i>0: if fps[-1]!=fps[0]: similar = False if similar is False: return fps #if all fingerprints are the same, return only one fingerprint return [fps[0]]
[docs] def clear(self): ''' This function clear all the gene related data of an organism ''' del self.dexpression del self.fingerprint self.dexpression =dict((x, dict()) for x in self.sample_name) #{x : dict() for x in self.sample_name} # dict of dict first key is sample name, second key is gene name, value is count of said gene for said sample self.fingerprint = Fingerprint(self.ploidy) #only one fingerprint by organism