1
0
mirror of https://github.com/exaloop/codon.git synced 2025-06-03 15:03:52 +08:00
codon/docs/workshop/section5.seq
2021-09-27 14:02:44 -04:00

43 lines
1.2 KiB
Plaintext

# SeqMap
# Seq workshop -- Section 5
# Reads index constructed in Section 2 and looks up k-mers from
# input reads to find candidate mappings, then performs alignment.
# Implemented with Seq pipelines.
# Usage: seqc run section5.seq <FASTA path> <FASTQ path>
from sys import argv
from time import timing
from bio import *
import pickle
import gzip
reference = s''
for record in FASTA(argv[1]):
reference = record.seq
K: Static[int] = 32
index = None
with gzip.open(argv[1] + '.index', 'rb') as jar:
index = pickle.load(jar, T=Dict[Kmer[K],int])
def find_candidates(record):
candidates = {} # position -> count mapping
for pos,kmer in record.read.kmers_with_pos(k=K, step=1):
found = index.get(min(kmer, ~kmer), -1)
if found > 0:
loc = found - pos
candidates[loc] = candidates.get(loc, 0) + 1
for pos,count in candidates.items():
if count > 1:
yield record, pos
def align_and_output(t):
record, pos = t
query = record.read
target = reference[pos:pos + len(query)]
alignment = query.align(target)
print(record.name, pos + 1, alignment.score, alignment.cigar)
with timing('mapping'):
FASTQ(argv[2]) |> iter |> find_candidates |> align_and_output