# -*- coding: utf-8 -*-
"""
Burrows-Wheeler Algorithm Class, contains naive and advanced construction
methods, static methods were written as explicit as possible and were
factorized to facilitate algorithmic readability.
The yielding in some functions doesn't respect Space complexity, but it was done in this
manner to facilitate the data flow into the View class (the GUI).
"""
from __future__ import absolute_import
from typing import List, Tuple
[docs]class BurrosWheeler:
"""A class to represent the Burrows-Wheeler algorithm, all methods are
static for ease of reading and outside usability.
"""
[docs] @staticmethod
def pprint(mat: List[str]) -> None:
"""Pretty print, this method prints a burrows wheeler matrix
beautifully (without lists and strings).
Parameters
----------
mat : List[str]
The Burros-Wheeler matrix, i.e; a list of strings.
Returns
-------
None
Prints the matrix.
"""
for line in mat:
print(*line, sep="") # scatter operator to print all elements
# of a line
[docs] @staticmethod
def string_rotations(seq: str) -> List[str]:
"""Returns all string rotations of a sequence.
Parameters
----------
seq : str
he sequence to be rotated.
Returns
-------
List[str]
Returns a list of strings, i.e; a BW matrix like object.
"""
seq += '$'
double_seq = seq * 2
all_rotations = []
for i in range(0, len(seq), 1):
rot = double_seq[i:i+len(seq)]
all_rotations.append(rot)
yield [rot for rot in all_rotations]
[docs] @staticmethod
def construct_bwm(rotations: List[str]) -> List[str]:
"""This method constructs the Burrows-Wheeler Matrix from a list of
string rotations.
Parameters
----------
rotations : List[str]
A list of strings, i.e; a BW matrix like object.
Returns
-------
List[str]
A list of strings or a Burrows-Wheeler Matrix.
"""
sorted_rotations = sorted(rotations)
return sorted_rotations
[docs] @staticmethod
def encode_bwt(matrix: List[str]) -> str:
"""Returns the Burrows-Wheeler Transform from a given Burros-Wheeler
Matrix. the Burros-Wheeler Transform corresponds to the last column
of the matrix.
Parameters
----------
matrix : List[str]
A Burrows-Wheeler Matrix.
Returns
-------
str
The Burrows-Wheeler Transform.
"""
last_column = []
for line in matrix:
last_char = line[-1]
last_column.append(last_char)
transformed_seq = ''.join(last_column)
return transformed_seq
[docs] @staticmethod
def reconstruct_bwm(bwt: str) -> List[str]:
"""This method reconstructs the Burrows-Wheeler Matrix given the
corresponding Burros-Wheeler Transform. The naive algorithm for
constructing the matrix given the transform is going to iteratively
add the transform as a left column, then sorts lexicographically
the columns.
Parameters
----------
bwt : str
The Burrows-Wheeler Transform.
Returns
-------
List[str]
A Burrows-Wheeler Matrix.
"""
bwm = []
# first loop to create seeds for lines O(n)
for _ in range(0, len(bwt), 1):
bwm.append('')
for _ in range(0, len(bwt), 1):
for i in range(0, len(bwt), 1):
bwm[i] = bwt[i] + bwm[i]
yield [line for line in bwm]
bwm.sort()
yield [line for line in bwm]
[docs] @staticmethod
def decode_bwt(matrix: List[str]) -> str:
"""This method returns the original sequence from a given
Burrows-Wheeler Matrix, the original sequence is the line that ends
with the character '$'.
Parameters
----------
matrix : List[str]
A Burrows-Wheeler Matrix.
Returns
-------
str
The original sequence.
"""
seq = ""
for line in matrix: # search for the line that ends with '$'
if line[-1] == "$":
seq += line
return seq[:-1] # return the sequence without the '$' sign
[docs] @staticmethod
def suffix_array(sequence: str) -> List[Tuple[str, int]]:
"""Builds a suffix-array from a given sequence of characters.
- Complexity of the algorithm O(n^2log(n))
- Sorting is O(nlogn) and Finding is O(n)
Parameters
----------
sequence : str
The given sequence of characters.
Returns
-------
List[Tuple[str, int]]
The suffix array of the sequence; a list of tuples.
"""
sequence += '$'
suff_arr = []
for i in range(0, len(sequence), 1):
suff_arr.append((sequence[i:], i))
return sorted(suff_arr)
[docs] @staticmethod
def bwt_advanced(sequence: str) -> str:
"""Generates a Burrows-Wheeler Transfrom from a suffix array, advanced
construction of BWT. Better algorithmic complexity.
Parameters
----------
sequence : str
The sequence to ve transformed.
Returns
-------
str
The Burrows-Wheeler Transform.
"""
bwt = []
for suff in BurrosWheeler.suffix_array(sequence):
i = suff[1] # The suffix's index is the 2nd element in the tuple
if i == 0:
bwt.append('$')
else:
bwt.append(sequence[i - 1])
return ''.join(bwt)