1
0
mirror of https://github.com/exaloop/codon.git synced 2025-06-03 15:03:52 +08:00
codon/docs/workshop/section3.codon
2021-10-01 09:56:35 -04:00

30 lines
821 B
Plaintext

# SeqMap
# Seq workshop -- Section 3
# Reads index constructed in Section 2 and looks up k-mers from
# input reads to find candidate mappings.
# Usage: seqc run section3.seq <FASTA path> <FASTQ path>
from sys import argv
from bio import *
import pickle
import gzip
K: Static[int] = 32
index = None
with gzip.open(argv[1] + '.index', 'rb') as jar:
index = pickle.load(jar, T=Dict[Kmer[K],int])
candidates = {} # position -> count mapping
for record in FASTQ(argv[2]):
for pos,kmer in record.read.kmers_with_pos(k=K, step=1):
found = index.get(min(kmer, ~kmer), -1)
if found > 0:
loc = found - pos
candidates[loc] = candidates.get(loc, 0) + 1
for pos,count in candidates.items():
if count > 1:
print(record.name, pos + 1)
candidates.clear()