mirror of
https://github.com/exaloop/codon.git
synced 2025-06-03 15:03:52 +08:00
30 lines
821 B
Plaintext
30 lines
821 B
Plaintext
# SeqMap
|
|
# Seq workshop -- Section 3
|
|
# Reads index constructed in Section 2 and looks up k-mers from
|
|
# input reads to find candidate mappings.
|
|
# Usage: seqc run section3.seq <FASTA path> <FASTQ path>
|
|
from sys import argv
|
|
from bio import *
|
|
import pickle
|
|
import gzip
|
|
|
|
K: Static[int] = 32
|
|
index = None
|
|
|
|
with gzip.open(argv[1] + '.index', 'rb') as jar:
|
|
index = pickle.load(jar, T=Dict[Kmer[K],int])
|
|
|
|
candidates = {} # position -> count mapping
|
|
for record in FASTQ(argv[2]):
|
|
for pos,kmer in record.read.kmers_with_pos(k=K, step=1):
|
|
found = index.get(min(kmer, ~kmer), -1)
|
|
if found > 0:
|
|
loc = found - pos
|
|
candidates[loc] = candidates.get(loc, 0) + 1
|
|
|
|
for pos,count in candidates.items():
|
|
if count > 1:
|
|
print(record.name, pos + 1)
|
|
|
|
candidates.clear()
|