def circular_permuted1(seq):
all_possible = []
for i in range(len(seq)):
permuted_seq = (seq + seq)[i:i+len(seq)]
all_possible.append(permuted_seq)
return(all_possible)
from collections import deque
def circular_permuted2(seq):
all_possible = []
d = deque(a)
for i in range(len(a)):
d.rotate()
all_possible.append(''.join(list(d)))
return(all_possible)
import toolz
def circular_permuted3(seq):
all_possible = []
for permuted_seq in toolz.sliding_window(len(seq), seq+seq[:-1]):
all_possible.append(''.join(permuted_seq))
return(all_possible)
def circular_permuted4(seq):
n = len(seq)
return([''.join([seq[i - j] for i in range(n)]) for j in range(n)])
def circular_permuted5(seq):
n = len(seq)
return([(seq + seq)[i:i+len(seq)] for i in range(n)])
def circular_permuted6(x):
return([x[i:]+x[:i] for i in range(len(x))])
a = "ATCG"
%%timeit -r10
circular_permuted1(a)
100000 loops, best of 10: 2.09 µs per loop
%%timeit -r10
circular_permuted2(a)
100000 loops, best of 10: 3.62 µs per loop
%%timeit -r10
circular_permuted3(a)
The slowest run took 4.22 times longer than the fastest. This could mean that an intermediate result is being cached
100000 loops, best of 10: 3.65 µs per loop
%%timeit -r10
circular_permuted4(a)
100000 loops, best of 10: 5.8 µs per loop
%%timeit -r10
circular_permuted5(a)
100000 loops, best of 10: 2.01 µs per loop
%%timeit -r10
circular_permuted6(a) # Winner!
1000000 loops, best of 10: 1.87 µs per loop
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
def self_and_rev_complement1(in_dna):
all_possible = [in_dna]
# Get reverse complement
dna = Seq(in_dna, generic_dna)
rev_complement = str(dna.reverse_complement())
all_possible.append(rev_complement)
return(all_possible)
from skbio.sequence import DNASequence
def self_and_rev_complement2(in_dna):
all_possible = [in_dna]
# Get reverse complement
dna = DNASequence(in_dna)
rev_complement = str(dna.reverse_complement())
all_possible.append(rev_complement)
return(all_possible)
%%timeit -r10
self_and_rev_complement1(a)
The slowest run took 6.29 times longer than the fastest. This could mean that an intermediate result is being cached
100000 loops, best of 10: 5.99 µs per loop
%%timeit -r10
self_and_rev_complement2(a)
The slowest run took 4.44 times longer than the fastest. This could mean that an intermediate result is being cached
100000 loops, best of 10: 10.7 µs per loop
self_and_rev_complement2(a)
['ATCG', 'CGAT']
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
def normalise_str(in_dna):
"""Find all possible eqivalent STR sequences.
And return the first alphabetically.
For example, TA = AT. But would return AT.
"""
all_possible = [in_dna]
# Get reverse complement
dna = Seq(in_dna, generic_dna)
rev_complement = str(dna.reverse_complement())
all_possible.append(rev_complement)
# Permute
for seq in [in_dna, rev_complement]:
for permuted_seq in circular_permuted1(seq): # Switch to faster permutation (6)
all_possible.append(permuted_seq)
# Sort and take the first
all_possible.sort()
return(all_possible[0])
from skbio.sequence import DNASequence
def normalise_str2(in_dna):
"""Find all possible eqivalent STR sequences.
And return the first alphabetically.
For example, TA = AT. But would return AT.
"""
all_possible = [in_dna]
# Get reverse complement
dna = DNASequence(in_dna)
rev_complement = str(dna.reverse_complement())
all_possible.append(rev_complement)
# Permute
for seq in [in_dna, rev_complement]:
for permuted_seq in circular_permuted1(seq): # Switch to faster permutation (6)
all_possible.append(permuted_seq)
# Sort and take the first
all_possible.sort()
return(all_possible[0])
%%timeit -r10
normalise_str(a)
100000 loops, best of 10: 12.9 µs per loop
%%timeit -r10
normalise_str2(a)
10000 loops, best of 10: 17.4 µs per loop