#!/usr/bin/env python3
# (c) Copyright 2013-2018 Murray Cox, Wandrille Duchemin, Pierre-Yves Dupont.
#
#
# This file is part of HyLiTE.
#
# HyLiTE is a free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as published by
# the Free Software Foundation.
#
# HyLiTE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HyLiTE. If not, see <http://www.gnu.org/licenses/>
#===============================#
# author: Wandrille Duchemin #
# Murray Cox #
# last modified: 9 January 2018 #
#===============================#
from .Fingerprint import Fingerprint
[docs]class Organism:
'''
This class represent an organism (parent or child) in our analysis
Its attributes are:
- name : str, name of the organism
- ploidy : int, ploidy of the organism
- dreadfile : dict, key are sample name, value is the list of the names of the files containing the reads
- sample_name: list, list of the samples name
- sample_type: dictionnary, key are sample name and value are 'RNAseq' or 'gDNA' corresponding to the nature of the sample
- dsamfile : str, name of the resulting .sam file
- dexpression: dict, key are sample name, value are dict where key are gene label, value is number of reads for the gene
- fingerprint: Fingerprint
'''
def __init__(self, name, ploidy, dreadfile, sample_name, sample_type):
'''This class is used to represent an organism during a HyLiTE analysis
Args:
- name (str): the name of the organism
- ploidy (int): the ploidy of the organism
- dreadfile (dict): a dictionnary of the files containing reads from the organism (one file per sample) (key is the sample name, value is the filename)
- sample_name (list): a list of the samples name (the order of the names are relevant)
- sample_type (dict): a dictionnary where key are sample name and value are 'RNAseq' or 'gDNA' corresponding to the nature of the sample
'''
self.name=name
self.ploidy=ploidy
self.dreadfile=dreadfile
self.sample_name = sample_name
self.sample_type = sample_type
self.dsamfile=dict((x, None) for x in self.sample_name)
self.dbamfile=dict((x, None) for x in self.sample_name) #python 2.7: {x : None for x in self.sample_name}
self.dsortedbamfile=dict((x, None) for x in self.sample_name) #{x : None for x in self.sample_name}
self.dexpression =dict((x, dict()) for x in self.sample_name) #{x : dict() for x in self.sample_name} # dict of dict first key is sample name, second key is gene name, value is count of said gene for said sample
self.fingerprint = Fingerprint(self.ploidy) #only one fingerprint by organism
return
def __str__(self):
return "Organism: %s" % self.name
def __repr__(self):
return self.__str__()
[docs] def add_expression(self, sample, gene, exp):
'''Increment the count of a given gene from a given sample
Args:
- sample (str): a sample name
- gene (str): a gene name
- exp (int): the number of count
'''
self.dexpression[sample][gene] = self.dexpression[sample].get(gene, 0) +exp #increment of exp if the gene already exist, put exp otherwise
return
[docs] def add_snp(self,gene,position,snp_index,presence,polyploid_args=None):
'''
Args:
- gene (str): a gene name
- position (int): the position of the snp on the gene
- snp_index (int): index of the snp in the hylite list
- presence (list): list of int; give its presence (-1/0/1) in each gene copy
Kwargs:
- polyploid_args: optional argument for polyploids #actually not used... but whatever, let's keep it
'''
for i in range(self.ploidy):
self.fingerprint.add_snp(gene, position, snp_index, presence[i], i)
[docs] def get_fingerprint(self, gene, start, stop, allele):
'''
Returns:
- list. a list of the list of the snp index that are found between start and stop on gene in one allele of the organism
'''
fp = self.fingerprint.get_fingerprint(gene, start, stop, allele)
return fp
[docs] def get_all_fingerprint(self, gene, start, stop):
'''
Args:
- gene (str): a gene
- start (int): a start
- stop (int): stop position on the gene
Returns:
- list. the list of all the fingerprints of the organism between start and stop
'''
fps = list()
similar =True
for i in range(self.ploidy):
fps.append(self.get_fingerprint(gene, start, stop, i))
if i>0:
if fps[-1]!=fps[0]:
similar = False
if similar is False:
return fps
#if all fingerprints are the same, return only one fingerprint
return [fps[0]]
[docs] def clear(self):
'''
This function clear all the gene related data of an organism
'''
del self.dexpression
del self.fingerprint
self.dexpression =dict((x, dict()) for x in self.sample_name) #{x : dict() for x in self.sample_name} # dict of dict first key is sample name, second key is gene name, value is count of said gene for said sample
self.fingerprint = Fingerprint(self.ploidy) #only one fingerprint by organism