Source code for snipgenie.simulate

#!/usr/bin/env python

"""
    Simulate reads
    Created Sep 2022
    Copyright (C) Damien Farrell

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
"""

import sys, os, io
import numpy as np
import string
import subprocess
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio import SeqIO, AlignIO
from .qt import *
from . import tools, widgets

home = os.path.expanduser("~")
module_path = os.path.dirname(os.path.abspath(__file__)) #path to module
config_path = os.path.join(home,'.config','snipgenie')
bin_path = os.path.join(config_path, 'binaries')

[docs]def run_phastsim(path, ref, newick): """Run phastsim """ refseq = SeqIO.read(ref, 'fasta') scale = (10/len(refseq)) outpath = os.path.join(path, 'phastsim_output') cmd = 'phastSim --outpath {o} --outputFile 1 --seed 1 --createFasta' \ ' --createPhylip --treeFile {n}' \ ' --scale {s} --invariable .1 --alpha 1.0 --omegaAlpha 1.0' \ ' --reference {r}'.format(o=outpath,s=scale,r=ref,n=newick) print (cmd) subprocess.check_output(cmd, shell=True) return
[docs]def artificial_fastq_generator(ref, outfile, cmp=100): """Generate reads from reference""" jarfile = os.path.join(bin_path,'ArtificialFastqGenerator.jar') cmd = 'java -jar {j} -O {o} -R {r} -S ">temp" -RL 150 -CMP {cmp}'\ ' -CSD 0.2 -SE true'.format(j=jarfile, r=ref, o=outfile,cmp=cmp) #-URQS true -F1 {f1} -F2 {f2} print (cmd) subprocess.check_output(cmd, shell=True) return
[docs]def generate_fastqs(infile, outpath, reads=1e5, overwrite=False): """Make multiple fastqs""" from joblib import Parallel, delayed import multiprocessing, time num_cores = 4 simrecs = list(SeqIO.parse(infile,'fasta')) def my_func(rec): from tempfile import mkstemp x,tmp = mkstemp() SeqIO.write(SeqRecord(rec.seq,id='temp'), tmp, 'fasta') out = os.path.join(outpath,rec.id) if os.path.exists(out+'.1.fastq.gz') and overwrite == False: print ('found %s' %out) return cmp = reads*2*150/len(rec.seq) print (cmp) artificial_fastq_generator(tmp, out, cmp=cmp) cmd = 'pigz %s/*.fastq' %outpath subprocess.check_output(cmd, shell=True) st = time.time() #Parallel(n_jobs=num_cores)(delayed(my_func)(i) for i in simrecs) #print (time.time()-st) for i in simrecs: my_func(i)