py_stringmatching
Installation
Requirements
Platforms
Dependencies
Installing Using pip
Installing from Source Distribution
Tutorial
1. Selecting a Similarity Measure
2. Selecting a Tokenizer Type
3. Creating a Tokenizer Object and Using It to Tokenize the Input Strings
4. Creating a Similarity Measure Object and Using It to Compute a Similarity Score
Handling a Large Number of String Pairs
Handling Missing Values
References
Tokenizers
Alphabetic Tokenizer
Alphanumeric Tokenizer
Delimiter Tokenizer
Qgram Tokenizer
Whitespace Tokenizer
Similarity Measures
Affine Gap
Cosine
Dice
Hamming Distance
Jaccard
Jaro
Jaro Winkler
Levenshtein
Monge Elkan
Needleman Wunsch
Overlap Coefficient
Smith Waterman
Soft TF/IDF
TF/IDF
py_stringmatching
Docs
»
Index
A
|
C
|
D
|
G
|
H
|
J
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
W
A
Affine (class in py_stringmatching.similarity_measure.affine)
AlphabeticTokenizer (class in py_stringmatching.tokenizer.alphabetic_tokenizer)
AlphanumericTokenizer (class in py_stringmatching.tokenizer.alphanumeric_tokenizer)
C
Cosine (class in py_stringmatching.similarity_measure.cosine)
D
dampen (py_stringmatching.similarity_measure.tfidf.TfIdf attribute)
DelimiterTokenizer (class in py_stringmatching.tokenizer.delimiter_tokenizer)
Dice (class in py_stringmatching.similarity_measure.dice)
G
gap_continuation (Affine attribute)
gap_cost (NeedlemanWunsch attribute)
(SmithWaterman attribute)
gap_start (Affine attribute)
get_corpus_list() (py_stringmatching.similarity_measure.soft_tfidf.SoftTfIdf method)
(py_stringmatching.similarity_measure.tfidf.TfIdf method)
get_dampen() (py_stringmatching.similarity_measure.tfidf.TfIdf method)
get_delim_set() (py_stringmatching.tokenizer.delimiter_tokenizer.DelimiterTokenizer method)
(py_stringmatching.tokenizer.whitespace_tokenizer.WhitespaceTokenizer method)
get_gap_continuation() (py_stringmatching.similarity_measure.affine.Affine method)
get_gap_cost() (py_stringmatching.similarity_measure.needleman_wunsch.NeedlemanWunsch method)
(py_stringmatching.similarity_measure.smith_waterman.SmithWaterman method)
get_gap_start() (py_stringmatching.similarity_measure.affine.Affine method)
get_prefix_weight() (py_stringmatching.similarity_measure.jaro_winkler.JaroWinkler method)
get_qval() (py_stringmatching.tokenizer.qgram_tokenizer.QgramTokenizer method)
get_raw_score() (py_stringmatching.similarity_measure.affine.Affine method)
(py_stringmatching.similarity_measure.cosine.Cosine method)
(py_stringmatching.similarity_measure.dice.Dice method)
(py_stringmatching.similarity_measure.hamming_distance.HammingDistance method)
(py_stringmatching.similarity_measure.jaccard.Jaccard method)
(py_stringmatching.similarity_measure.jaro.Jaro method)
(py_stringmatching.similarity_measure.jaro_winkler.JaroWinkler method)
(py_stringmatching.similarity_measure.levenshtein.Levenshtein method)
(py_stringmatching.similarity_measure.monge_elkan.MongeElkan method)
(py_stringmatching.similarity_measure.needleman_wunsch.NeedlemanWunsch method)
(py_stringmatching.similarity_measure.overlap_coefficient.OverlapCoefficient method)
(py_stringmatching.similarity_measure.smith_waterman.SmithWaterman method)
(py_stringmatching.similarity_measure.soft_tfidf.SoftTfIdf method)
(py_stringmatching.similarity_measure.tfidf.TfIdf method)
get_return_set() (py_stringmatching.tokenizer.alphabetic_tokenizer.AlphabeticTokenizer method)
(py_stringmatching.tokenizer.alphanumeric_tokenizer.AlphanumericTokenizer method)
(py_stringmatching.tokenizer.delimiter_tokenizer.DelimiterTokenizer method)
(py_stringmatching.tokenizer.qgram_tokenizer.QgramTokenizer method)
(py_stringmatching.tokenizer.whitespace_tokenizer.WhitespaceTokenizer method)
get_sim_func() (py_stringmatching.similarity_measure.affine.Affine method)
(py_stringmatching.similarity_measure.monge_elkan.MongeElkan method)
(py_stringmatching.similarity_measure.needleman_wunsch.NeedlemanWunsch method)
(py_stringmatching.similarity_measure.smith_waterman.SmithWaterman method)
(py_stringmatching.similarity_measure.soft_tfidf.SoftTfIdf method)
get_sim_score() (py_stringmatching.similarity_measure.cosine.Cosine method)
(py_stringmatching.similarity_measure.dice.Dice method)
(py_stringmatching.similarity_measure.hamming_distance.HammingDistance method)
(py_stringmatching.similarity_measure.jaccard.Jaccard method)
(py_stringmatching.similarity_measure.jaro.Jaro method)
(py_stringmatching.similarity_measure.jaro_winkler.JaroWinkler method)
(py_stringmatching.similarity_measure.levenshtein.Levenshtein method)
(py_stringmatching.similarity_measure.overlap_coefficient.OverlapCoefficient method)
(py_stringmatching.similarity_measure.tfidf.TfIdf method)
get_threshold() (py_stringmatching.similarity_measure.soft_tfidf.SoftTfIdf method)
H
HammingDistance (class in py_stringmatching.similarity_measure.hamming_distance)
J
Jaccard (class in py_stringmatching.similarity_measure.jaccard)
Jaro (class in py_stringmatching.similarity_measure.jaro)
JaroWinkler (class in py_stringmatching.similarity_measure.jaro_winkler)
L
Levenshtein (class in py_stringmatching.similarity_measure.levenshtein)
M
MongeElkan (class in py_stringmatching.similarity_measure.monge_elkan)
N
NeedlemanWunsch (class in py_stringmatching.similarity_measure.needleman_wunsch)
O
OverlapCoefficient (class in py_stringmatching.similarity_measure.overlap_coefficient)
P
prefix_weight (py_stringmatching.similarity_measure.jaro_winkler.JaroWinkler attribute)
py_stringmatching.similarity_measure.cosine (module)
py_stringmatching.similarity_measure.dice (module)
py_stringmatching.similarity_measure.hamming_distance (module)
py_stringmatching.similarity_measure.jaccard (module)
py_stringmatching.similarity_measure.jaro (module)
py_stringmatching.similarity_measure.jaro_winkler (module)
py_stringmatching.similarity_measure.levenshtein (module)
py_stringmatching.similarity_measure.overlap_coefficient (module)
py_stringmatching.similarity_measure.tfidf (module)
py_stringmatching.tokenizer.alphabetic_tokenizer (module)
py_stringmatching.tokenizer.alphanumeric_tokenizer (module)
py_stringmatching.tokenizer.delimiter_tokenizer (module)
py_stringmatching.tokenizer.qgram_tokenizer (module)
py_stringmatching.tokenizer.whitespace_tokenizer (module)
Q
QgramTokenizer (class in py_stringmatching.tokenizer.qgram_tokenizer)
qval (py_stringmatching.tokenizer.qgram_tokenizer.QgramTokenizer attribute)
R
return_set (py_stringmatching.tokenizer.alphabetic_tokenizer.AlphabeticTokenizer attribute)
(py_stringmatching.tokenizer.alphanumeric_tokenizer.AlphanumericTokenizer attribute)
(py_stringmatching.tokenizer.delimiter_tokenizer.DelimiterTokenizer attribute)
(py_stringmatching.tokenizer.qgram_tokenizer.QgramTokenizer attribute)
(py_stringmatching.tokenizer.whitespace_tokenizer.WhitespaceTokenizer attribute)
S
set_corpus_list() (py_stringmatching.similarity_measure.soft_tfidf.SoftTfIdf method)
(py_stringmatching.similarity_measure.tfidf.TfIdf method)
set_dampen() (py_stringmatching.similarity_measure.tfidf.TfIdf method)
set_delim_set() (py_stringmatching.tokenizer.delimiter_tokenizer.DelimiterTokenizer method)
set_gap_continuation() (py_stringmatching.similarity_measure.affine.Affine method)
set_gap_cost() (py_stringmatching.similarity_measure.needleman_wunsch.NeedlemanWunsch method)
(py_stringmatching.similarity_measure.smith_waterman.SmithWaterman method)
set_gap_start() (py_stringmatching.similarity_measure.affine.Affine method)
set_prefix_weight() (py_stringmatching.similarity_measure.jaro_winkler.JaroWinkler method)
set_qval() (py_stringmatching.tokenizer.qgram_tokenizer.QgramTokenizer method)
set_return_set() (py_stringmatching.tokenizer.alphabetic_tokenizer.AlphabeticTokenizer method)
(py_stringmatching.tokenizer.alphanumeric_tokenizer.AlphanumericTokenizer method)
(py_stringmatching.tokenizer.delimiter_tokenizer.DelimiterTokenizer method)
(py_stringmatching.tokenizer.qgram_tokenizer.QgramTokenizer method)
(py_stringmatching.tokenizer.whitespace_tokenizer.WhitespaceTokenizer method)
set_sim_func() (py_stringmatching.similarity_measure.affine.Affine method)
(py_stringmatching.similarity_measure.monge_elkan.MongeElkan method)
(py_stringmatching.similarity_measure.needleman_wunsch.NeedlemanWunsch method)
(py_stringmatching.similarity_measure.smith_waterman.SmithWaterman method)
(py_stringmatching.similarity_measure.soft_tfidf.SoftTfIdf method)
set_threshold() (py_stringmatching.similarity_measure.soft_tfidf.SoftTfIdf method)
sim_func (Affine attribute)
(MongeElkan attribute)
(NeedlemanWunsch attribute)
(SmithWaterman attribute)
(SoftTfIdf attribute)
SmithWaterman (class in py_stringmatching.similarity_measure.smith_waterman)
SoftTfIdf (class in py_stringmatching.similarity_measure.soft_tfidf)
T
TfIdf (class in py_stringmatching.similarity_measure.tfidf)
threshold (SoftTfIdf attribute)
tokenize() (py_stringmatching.tokenizer.alphabetic_tokenizer.AlphabeticTokenizer method)
(py_stringmatching.tokenizer.alphanumeric_tokenizer.AlphanumericTokenizer method)
(py_stringmatching.tokenizer.delimiter_tokenizer.DelimiterTokenizer method)
(py_stringmatching.tokenizer.qgram_tokenizer.QgramTokenizer method)
(py_stringmatching.tokenizer.whitespace_tokenizer.WhitespaceTokenizer method)
W
WhitespaceTokenizer (class in py_stringmatching.tokenizer.whitespace_tokenizer)