Source code for py_stringmatching.similarity_measure.needleman_wunsch

import numpy as np

from py_stringmatching import utils
from six.moves import xrange
from py_stringmatching.similarity_measure.sequence_similarity_measure import \
                                                    SequenceSimilarityMeasure
from py_stringmatching.similarity_measure.cython.cython_needleman_wunsch import needleman_wunsch
from py_stringmatching.similarity_measure.cython.cython_utils import cython_sim_ident


[docs]class NeedlemanWunsch(SequenceSimilarityMeasure): """Computes Needleman-Wunsch measure. The Needleman-Wunsch distance generalizes the Levenshtein distance and considers global alignment between two strings. Specifically, it is computed by assigning a score to each alignment between the two input strings and choosing the score of the best alignment, that is, the maximal score. An alignment between two strings is a set of correspondences between their characters, allowing for gaps. Args: gap_cost (float): Cost of gap (defaults to 1.0). sim_func (function): Similarity function to give a score for each correspondence between the characters (defaults to an identity function, which returns 1 if the two characters are the same and 0 otherwise. Attributes: gap_cost (float): An attribute to store the gap cost. sim_func (function): An attribute to store the similarity function. """ def __init__(self, gap_cost=1.0, sim_func=cython_sim_ident): self.gap_cost = gap_cost self.sim_func = sim_func super(NeedlemanWunsch, self).__init__()
[docs] def get_raw_score(self, string1, string2): """Computes the raw Needleman-Wunsch score between two strings. Args: string1,string2 (str) : Input strings. Returns: Needleman-Wunsch similarity score (float). Raises: TypeError : If the inputs are not strings or if one of the inputs is None. Examples: >>> nw = NeedlemanWunsch() >>> nw.get_raw_score('dva', 'deeva') 1.0 >>> nw = NeedlemanWunsch(gap_cost=0.0) >>> nw.get_raw_score('dva', 'deeve') 2.0 >>> nw = NeedlemanWunsch(gap_cost=1.0, sim_func=lambda s1, s2 : (2.0 if s1 == s2 else -1.0)) >>> nw.get_raw_score('dva', 'deeve') 1.0 >>> nw = NeedlemanWunsch(gap_cost=0.5, sim_func=lambda s1, s2 : (1.0 if s1 == s2 else -1.0)) >>> nw.get_raw_score('GCATGCUA', 'GATTACA') 2.5 """ # input validations utils.sim_check_for_none(string1, string2) # convert input to unicode. string1 = utils.convert_to_unicode(string1) string2 = utils.convert_to_unicode(string2) utils.tok_check_for_string_input(string1, string2) # returns the similarity score from the cython function return needleman_wunsch(string1, string2, self.gap_cost, self.sim_func)
[docs] def get_gap_cost(self): """Get gap cost. Returns: Gap cost (float). """ return self.gap_cost
[docs] def get_sim_func(self): """Get the similarity function. Returns: similarity function (function). """ return self.sim_func
[docs] def set_gap_cost(self, gap_cost): """Set gap cost. Args: gap_cost (float): Cost of gap. """ self.gap_cost = gap_cost return True
[docs] def set_sim_func(self, sim_func): """Set similarity function. Args: sim_func (function): Similarity function to give a score for the correspondence between characters. """ self.sim_func = sim_func return True