from py_stringmatching import utils
from six.moves import xrange
from py_stringmatching.similarity_measure.sequence_similarity_measure import \
SequenceSimilarityMeasure
from py_stringmatching.similarity_measure.cython.cython_affine import affine
from py_stringmatching.similarity_measure.cython.cython_utils import cython_sim_ident
[docs]class Affine(SequenceSimilarityMeasure):
"""Returns the affine gap score between two strings.
The affine gap measure is an extension of the Needleman-Wunsch measure that handles the longer gaps more
gracefully. For more information refer to the string matching chapter in the DI book ("Principles of Data Integration").
Args:
gap_start (float): Cost for the gap at the start (defaults to 1).
gap_continuation (float): Cost for the gap continuation (defaults to 0.5).
sim_func (function): Function computing similarity score between two characters, which are represented as strings (defaults
to an identity function, which returns 1 if the two characters are the same and returns 0 otherwise).
Attributes:
gap_start (float): An attribute to store the gap cost at the start.
gap_continuation (float): An attribute to store the gap continuation cost.
sim_func (function): An attribute to store the similarity function.
"""
def __init__(self, gap_start=1, gap_continuation=0.5, sim_func=cython_sim_ident):
self.gap_start = gap_start
self.gap_continuation = gap_continuation
self.sim_func = sim_func
super(Affine, self).__init__()
[docs] def get_raw_score(self, string1, string2):
"""Computes the affine gap score between two strings. This score can be outside the range [0,1].
Args:
string1,string2 (str) : Input strings.
Returns:
Affine gap score betwen the two input strings (float).
Raises:
TypeError : If the inputs are not strings or if one of the inputs is None.
Examples:
>>> aff = Affine()
>>> aff.get_raw_score('dva', 'deeva')
1.5
>>> aff = Affine(gap_start=2, gap_continuation=0.5)
>>> aff.get_raw_score('dva', 'deeve')
-0.5
>>> aff = Affine(gap_continuation=0.2, sim_func=lambda s1, s2: (int(1 if s1 == s2 else 0)))
>>> aff.get_raw_score('AAAGAATTCA', 'AAATCA')
4.4
"""
# input validations
utils.sim_check_for_none(string1, string2)
# convert input to unicode.
string1 = utils.convert_to_unicode(string1)
string2 = utils.convert_to_unicode(string2)
utils.tok_check_for_string_input(string1, string2)
# if one of the strings is empty return 0
if utils.sim_check_for_empty(string1, string2):
return 0
return affine(string1, string2, self.gap_start, self.gap_continuation, self.sim_func)
[docs] def get_gap_start(self):
"""Get gap start cost.
Returns:
gap start cost (float).
"""
return self.gap_start
[docs] def get_gap_continuation(self):
"""Get gap continuation cost.
Returns:
gap continuation cost (float).
"""
return self.gap_continuation
[docs] def get_sim_func(self):
"""Get similarity function.
Returns:
similarity function (function).
"""
return self.sim_func
[docs] def set_gap_start(self, gap_start):
"""Set gap start cost.
Args:
gap_start (float): Cost for the gap at the start.
"""
self.gap_start = gap_start
return True
[docs] def set_gap_continuation(self, gap_continuation):
"""Set gap continuation cost.
Args:
gap_continuation (float): Cost for the gap continuation.
"""
self.gap_continuation = gap_continuation
return True
[docs] def set_sim_func(self, sim_func):
"""Set similarity function.
Args:
sim_func (function): Function computing similarity score between two characters, represented as strings.
"""
self.sim_func = sim_func
return True