Source code for snipgenie.aligners

"""
    Aligner methods for bacterial genomics.
    Created Nov 2019
    Copyright (C) Damien Farrell

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 3
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
"""

import sys, os, string, types, re
import platform, tempfile
import shutil, glob, collections
import itertools
import subprocess
import numpy as np
import pandas as pd
from . import tools

tempdir = tempfile.gettempdir()
home = os.path.expanduser("~")
config_path = os.path.join(home,'.config','snipgenie')
module_path = os.path.dirname(os.path.abspath(__file__))
BOWTIE_INDEXES = os.path.join(config_path, 'genome')
SUBREAD_INDEXES = os.path.join(config_path, 'genome')

[docs]def build_bwa_index(fastafile, path=None, show_cmd=True, overwrite=True): """Build a bwa index""" out = fastafile+'.bwt' if os.path.exists(out) and overwrite == False: return print ('indexing..') bwacmd = tools.get_cmd('bwa') cmd = '{b} index {i}'.format(b=bwacmd,i=fastafile) subprocess.check_output(cmd, shell=True) if show_cmd == True: print (cmd) return
[docs]def bwa_align(file1, file2, idx, out, threads=4, overwrite=False, options='', filter=None, unmapped=None): """Align reads to a reference with bwa. Args: file1, file2: fastq files idx: bwa index name out: output bam file name options: extra command line options e.g. -k INT for seed length unmapped: path to file for unmapped reads if required """ bwacmd = tools.get_cmd('bwa') samtoolscmd = tools.get_cmd('samtools') if unmapped == None: #by default we keep mapped reads only keepmapped = '-F 4' else: keepmapped = '' if file2 == None or file2 == '': filestr = '"{f}"'.format(f=file1) else: filestr = '"{f1}" "{f2}"'.format(f1=file1,f2=file2) cmd = '{b} mem -M -t {t} {p} {i} {f} | {s} view {k} -bt - | {s} sort -o {o}'.format( b=bwacmd,i=idx,s=samtoolscmd, f=filestr,o=out,t=threads,p=options,k=keepmapped) if not os.path.exists(out) or overwrite == True: print (cmd) tmp = subprocess.check_output(cmd, shell=True) #write out unmapped reads if unmapped != None: f1 = os.path.join(unmapped,os.path.basename(file1)) f2 = os.path.join(unmapped,os.path.basename(file2)) cmd = '{s} view -b -f12 {o} | {s} fastq -1 {f1} -2 {f2}'.format( s=samtoolscmd,o=out,u=unmapped,f1=f1,f2=f2) print (cmd) tmp = subprocess.check_output(cmd, shell=True) return
[docs]def build_bowtie_index(fastafile, path=None): """Build a bowtie index Args: fastafile: file input path: folder to place index files """ if path == None: path = BOWTIE_INDEXES name = os.path.splitext(os.path.basename(fastafile))[0] name = os.path.join(path, name) if not os.path.exists(path): os.makedirs(path) cmd = 'bowtie-build -f %s %s' %(fastafile, name) try: result = subprocess.check_output(cmd, shell=True, executable='/bin/bash') except subprocess.CalledProcessError as e: print (str(e.output)) return print ('built bowtie index for %s' %fastafile) return name
[docs]def bowtie_align(file1, file2, idx, out, unmapped=None, threads=2, overwrite=False, verbose=True, options=''): """Map reads using bowtie""" bowtiecmd = tools.get_cmd('bowtie') samtoolscmd = tools.get_cmd('samtools') if BOWTIE_INDEXES == None: print ('aligners.BOWTIE_INDEXES variable not set') return os.environ["BOWTIE_INDEXES"] = BOWTIE_INDEXES if file2 != None: filestr = '-1 "{f1}" -2 "{f2}"'.format(f1=file1,f2=file2) else: filestr = file1 cmd = '{bc} -q -p {t} -S {p} -x {r} {f} | {s} view -F 0x04 -bt - | {s} sort -o {o}'\ .format(bc=bowtiecmd,t=threads,f=filestr,p=options,r=idx,o=out,s=samtoolscmd) if not os.path.exists(out) or overwrite == True: if verbose == True: print (cmd) try: result = subprocess.check_output(cmd, shell=True, executable='/bin/bash', stderr= subprocess.STDOUT) if verbose == True: print (result.decode()) except subprocess.CalledProcessError as e: print (str(e.output)) return
[docs]def build_subread_index(fastafile): """Build an index for subread""" path = SUBREAD_INDEXES name = os.path.splitext(fastafile)[0] subreadalign = tools.get_cmd('subread-buildindex') cmd = '{sc} -o {n} {f}'.format(sc=subreadalign,n=name,f=fastafile) print (cmd) try: result = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: print (str(e.output)) return exts = ['.00.b.array','.00.b.tab','.files','.reads'] files = [name+i for i in exts] tools.move_files(files, path) return
[docs]def subread_align(file1, file2, idx, out, threads=2, overwrite=False, verbose=True): """Align reads with subread""" os.environ["SUBREAD_INDEXES"] = SUBREAD_INDEXES idx = os.path.join(SUBREAD_INDEXES, idx) samtoolscmd = tools.get_cmd('samtools') subreadcmd = tools.get_cmd('subread-align') params = '-t 1 --SAMoutput -m 3 -M 2' cmd = '{sc} {p} -T {t} -i {i} -r "{f1}" -R "{f2}" | {s} view -F 0x04 -bt - | {s} sort -o {o}'.format( sc=subreadcmd,p=params,t=threads,i=idx,f1=file1,f2=file2,s=samtoolscmd,o=out) if not os.path.exists(out) or overwrite == True: print (cmd) result = subprocess.check_output(cmd, shell=True, stderr= subprocess.STDOUT) return
[docs]def minimap2_align(file1, file2, idx, out, platform='illumina', threads=4, overwrite=False): """Align illumina/ONT reads with minimap2""" samtoolscmd = tools.get_cmd('samtools') minimapcmd = tools.get_cmd('minimap2') if platform == 'illumina': cmd = '{m} -t {t} -ax sr {r} {f1} {f2} | {s} view -F 0x04 -bt - | {s} sort -o {o}'\ .format(r=idx,f1=file1,f2=file2,s=samtoolscmd,m=minimapcmd,o=out,t=threads) else: cmd = '{m} -t {t} -ax map-ont {r} {f1} | {s} view -F 0x04 -bt - | {s} sort -o {o}'\ .format(r=idx,f1=file1,s=samtoolscmd,m=minimapcmd,o=out,t=threads) if not os.path.exists(out) or overwrite == True: print (cmd) result = subprocess.check_output(cmd, shell=True, stderr= subprocess.STDOUT) return