# SeqMap # Seq workshop -- Section 3 # Reads index constructed in Section 2 and looks up k-mers from # input reads to find candidate mappings. # Usage: seqc run section3.seq from sys import argv from bio import * import pickle import gzip K: Static[int] = 32 index = None with gzip.open(argv[1] + '.index', 'rb') as jar: index = pickle.load(jar, T=Dict[Kmer[K],int]) candidates = {} # position -> count mapping for record in FASTQ(argv[2]): for pos,kmer in record.read.kmers_with_pos(k=K, step=1): found = index.get(min(kmer, ~kmer), -1) if found > 0: loc = found - pos candidates[loc] = candidates.get(loc, 0) + 1 for pos,count in candidates.items(): if count > 1: print(record.name, pos + 1) candidates.clear()