Skip to content

Commit 7d2ed13

Browse files
author
Nathan Zylbersztejn
committed
init
0 parents  commit 7d2ed13

18 files changed

+185
-0
lines changed

.idea/encodings.xml

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/fuzzy_matcher.iml

+12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.pytest_cache/v/cache/lastfailed

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

.pytest_cache/v/cache/nodeids

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[
2+
"tests/test_fuzzy_matcher.py::test_partial_distance",
3+
"tests/test_fuzzy_matcher.py::test_ratio",
4+
"tests/test_fuzzy_matcher.py::test_partial_ratio"
5+
]

.pytest_cache/v/cache/stepwise

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[]

.travis.yml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
language: python
2+
python:
3+
- '2.7'
4+
- '3.4'
5+
- '3.5'
6+
- '3.6'
7+
- '3.7'
8+
install:
9+
- pip install -r requirements.txt
10+
script:
11+
- python -m pytest
12+
#deploy:
13+
# provider: pypi
14+
# distributions: sdist
15+
# user: mrbot
16+
# password:
17+
# secure: 2Ti14Ow0tpBbs/aqyocfHVs7zzTxM05mITwmBVcJWaoqLMu1DVO4r3oFMKzSSZaR9u6G8S2/kro6wvrtdUt6qQ3roBD4T+beimwX6hHcKuVXFASp96KrXT2jrVWaKCTlS+pWOodcR2VZQAX9/pn5OZuNw1iwuEIMEnRoFpuZktnfjmxFtYMT1O2fHdkisVQD0MvgYlf45BXP4axDzLgjD3pp/kQNkfyHCNQPciC/iDhZcQHcN9DeQBStEoijKQdOUTEg8TWTZfU1+Ia7XIjuNp/Kw7QHK1N4uylhtLVPcOBWfnJRugnyy62g2XvpybdTia40rye9EyWpZqZtBbFDpSsvssuxsVh/AWc0+zGYErYidBFlOE6Xj482iYqR8fCK19r0kxQMEqJWNYeeQBEHEEVWafQLZbjioGlNw58xDG2YgdHVGK4ky8BR6nfdTPqArVNlT6uQYJc2o6o+oZlTx7jFCuczCRpt6kHhd1w0XCbfoLVP3scMDy8eK/SGpvKmmWKfTBsyhx1DfTDgLFaHceH1earccXzbMslrA/2xzpOxY7iuNPObJBGYF/2dgKi5KkvBMIGw5vKG43SrnHQx2T3m12eJBDvWRK5908Zki/13FqaPVzdTieqHPnW+Ob2NUqDEqE3CDfgj2gcLbGkpp3AtfW+055E6qwMO2V45pp8=
18+
# on:
19+
# tags: true
20+
# python: 3.6

README.md

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Fuzzy Matcher
2+
3+
A super simple MIT licensed fuzzy matching library to be used as an MIT alternative to Fuzzy Wuzzy which is GPL licensed.
4+
It is much less featured as Fuzzy Wuzzy, so if GPL is not blocking you, you should probably use Fuzzy Wuzzy instead.
5+
6+
## Usage
7+
8+
```python
9+
from fuzzy_matcher import process
10+
query = "orange"
11+
val = ['blue', 'orange', 'brown', 'ornage', 'range', 'angel', 'gang', 'ang']
12+
fuzzy = process.extract(query, val, limit=3, scorer='ratio')
13+
```
14+
15+
should output:
16+
```python
17+
[('orange', 100), ('range', 83), ('ornage', 66)]
18+
```
19+
20+
### Supported scorers
21+
22+
Only `ratio` and `partial_ratio` are supported at this time.
23+
Note that scores will not be the same as FuzzyWuzzy's.

__init__.py

Whitespace-only changes.

fuzzy_matcher.egg-info/PKG-INFO

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Metadata-Version: 2.1
2+
Name: fuzzy-matcher
3+
Version: 0.1.0
4+
Summary: A super simple MIT licensed fuzzy matching library
5+
Home-page: https://mrbot.ai
6+
Author: Nathan Zylbersztejn
7+
Author-email: human@mrbot.ai
8+
License: MIT
9+
Description: # Fuzzy Matcher
10+
11+
A super simple MIT licensed fuzzy matching library.
12+
It's aiming at being an MIT alternative to Fuzzy Wuzzy which is GPL licensed.
13+
It is much less featured as FuzzyWuzzy, so if the GPL is not blocking you, you should probably use it.
14+
15+
16+
Platform: UNKNOWN
17+
Description-Content-Type: text/markdown

fuzzy_matcher.egg-info/SOURCES.txt

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
README.md
2+
fuzzy_matcher/__init__.py
3+
fuzzy_matcher/process.py
4+
fuzzy_matcher.egg-info/PKG-INFO
5+
fuzzy_matcher.egg-info/SOURCES.txt
6+
fuzzy_matcher.egg-info/dependency_links.txt
7+
fuzzy_matcher.egg-info/requires.txt
8+
fuzzy_matcher.egg-info/top_level.txt
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

fuzzy_matcher.egg-info/requires.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
editdistance

fuzzy_matcher.egg-info/top_level.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fuzzy_matcher

fuzzy_matcher/__init__.py

Whitespace-only changes.

fuzzy_matcher/process.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import editdistance
2+
import operator
3+
4+
5+
def distance(val1, val2):
6+
return editdistance.eval(val1, val2)
7+
8+
9+
def ratio(val1, val2):
10+
max_distance = max(len(val1), len(val2))
11+
return int(100*(1 - distance(val1, val2) / max_distance))
12+
13+
14+
def partial_distance(val1, val2):
15+
values = sorted([val1, val2], key=len, reverse=True)
16+
distances = []
17+
for i in range(0, len(values[0]) - len(values[1]) + 1):
18+
distances.append(editdistance.eval(values[1], values[0][i:i + len(values[1])]))
19+
return min(distances)
20+
21+
22+
def partial_ratio(val1, val2):
23+
max_distance = len(val1)
24+
return int(100*(1 - partial_distance(val1, val2) / max_distance))
25+
26+
27+
def _get_scorer(scorer_name):
28+
if scorer_name == 'ratio':
29+
return ratio
30+
elif scorer_name == 'partial_ratio':
31+
return partial_ratio
32+
33+
34+
def extract(value, choices=[], scorer='ratio', limit=5):
35+
scorerfn = _get_scorer(scorer)
36+
distances = [(choice, scorerfn(value, choice)) for choice in choices]
37+
distances.sort(key=operator.itemgetter(1), reverse=True)
38+
return distances[0:limit]
39+
40+
41+

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
editdistance

setup.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from setuptools import setup
2+
import os
3+
import io
4+
5+
with io.open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.md'), encoding='utf-8') as f:
6+
long_description = f.read()
7+
setup(
8+
name='fuzzy_matcher',
9+
version='0.1.0',
10+
author='Nathan Zylbersztejn',
11+
description="A super simple MIT licensed fuzzy matching library",
12+
long_description=long_description,
13+
long_description_content_type="text/markdown",
14+
install_requires=['editdistance', ],
15+
licence='MIT',
16+
url='https://mrbot.ai',
17+
author_email='human@mrbot.ai',
18+
packages=['fuzzy_matcher']
19+
)

tests/test_fuzzy_matcher.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from __future__ import absolute_import
2+
from __future__ import division
3+
from __future__ import print_function
4+
from __future__ import unicode_literals
5+
6+
7+
from fuzzy_matcher import process
8+
9+
10+
def test_partial_distance():
11+
query = "orange"
12+
candidates = ['orangoutan', 'orange tango', 'olive martini', 'orangemartinin', 'martininorange']
13+
partial_distances = [process.partial_distance(query, val) for val in candidates]
14+
assert partial_distances == [1, 0, 5, 0, 0]
15+
16+
17+
def test_ratio():
18+
19+
query = "orange"
20+
val = ['blue', 'orange', 'brown', 'ornage', 'range', 'angel', 'gang', 'ang']
21+
fuzzy = process.extract(query, val, limit=3, scorer='ratio')
22+
assert fuzzy == [('orange', 100), ('range', 83), ('ornage', 66)]
23+
24+
25+
def test_partial_ratio():
26+
query = "orange"
27+
val = ['blue tango', 'orange tango', 'brown tango']
28+
fuzzy = process.extract(query, val, limit=3, scorer='partial_ratio')
29+
assert fuzzy == [('orange tango', 100), ('blue tango', 50), ('brown tango', 50)]
30+

0 commit comments

Comments
 (0)