#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Encoder classes, Controller architecture in a MVC layout."""
from __future__ import absolute_import
import os
from dnazip.sequence import Sequence
from dnazip.burros_wheeler import BurrosWheeler
from dnazip.huffman import HuffmanTree
[docs]class BWEncoder:
"""An encoder class for Burros-Wheeler transform, it is used as a
controller in a MVC architecture.
Attributes
----------
path: str
The path of the file to be transformed with Burros-Wheeler.
seq: Sequence
The sequence that was extracted from the file; a Sequence object.
bwt_output: str
The output file path for BWT.
rotations: List[str]
A matrix of rotations from the original sequence.
bwm: List[str]
The Burros-Wheeler Matrix.
bwt: str
The Burros-Wheeler transform of the sequence.
"""
def __init__(self: object, path: str) -> None:
"""Class constructor.
Parameters
----------
path : str
The path of the file to be read.
Returns
-------
None
A class instance.
"""
self.path = os.path.splitext(path)[0]
self.seq = Sequence(path)
self.bwt_output = self.path + '_bwt.txt'
self.rotations = None
self.bwm = None
self.bwt = None
[docs] def encode(self: object) -> None:
"""The main encoding method of the controller.
Returns
-------
None
Fills all the properties of an object and writes out the
transformed sequence to a file.
"""
self.rotations = list(BurrosWheeler.string_rotations(self.seq.read()))
self.bwm = BurrosWheeler.construct_bwm(self.rotations[-1])
self.bwt = BurrosWheeler.encode_bwt(self.bwm)
Sequence(self.bwt_output).write(self.bwt)
[docs]class HuffEncoder:
"""An encoder class for Huffman compression, it is used as a controller
in a MVC architecture.
Attributes
----------
path: str
The path of the file to be compressed with Huffman compression.
seq: Sequence
The sequence that was extracted from the file; a Sequence object.
huff_output: str
The output file path for Huffman compression.
binary: str
The binary sequence that was translated from the original sequence
using Huffman tree and codes.
header: str
The header of the compressed file; contains Huffman codes and paths
as well as padding that were generated when compressing the sequence.
unicode: str
The compressed format of the sequence.
compressed: str
The compressed sequence to be written to a file.
"""
def __init__(self: object, path: str) -> None:
"""Class constructor.
Parameters
----------
path : str
The path of the file to be read.
Returns
-------
None
A class instance.
"""
self.path = os.path.splitext(path)[0]
self.seq = Sequence(path)
self.huff_output = self.path + '_compressed.txt'
self.binary = None
self.header = None
self.unicode = None
self.compressed = None
[docs] def encode(self: object) -> None:
"""The main encoding method of the controller.
Returns
-------
None
Fills all the properties of an object and writes out the
compressed sequence to a file.
"""
tree = HuffmanTree(self.seq.read())
tree.get_codings(tree.root)
self.binary = tree.seq_to_binstr()
self.unicode = HuffmanTree.binstr_to_unicode(self.binary)
self.header = tree.codes_to_header()
self.compressed = self.header + self.unicode
Sequence(self.huff_output).write_bytes(self.compressed)
[docs]class FullEncoder:
"""An encoder class for both the Burros-Wheeler transform and Huffman
compression, controller architecture.
Attributes
----------
path: str
The path of the file to be compressed with BWT + Huffman compression.
bw_encoder: BWEncoder
A BWEncoder object to do the Burros-Wheeler transform on a sequence.
huff_encoder: HuffEncoder
A HuffEncoder object to do the Huffman compression on the BW transform.
"""
def __init__(self: object, path: str) -> None:
"""Class constructor.
Parameters
----------
path : str
The path of the file to be read.
Returns
-------
None
A class instance.
"""
self.path = path
self.bw_encoder = None
self.huff_encoder = None
[docs] def full_zip(self: object) -> None:
"""The main encoding method of the controller, it first encodes the
sequence with BWT, then passes the BWT to Huffman compression.
Returns
-------
None
Fills all the properties of an object and writes out the
compressed sequence to a file.
"""
self.bw_encoder = BWEncoder(self.path)
self.bw_encoder.encode()
self.huff_encoder = HuffEncoder(self.bw_encoder.bwt_output)
self.huff_encoder.encode()