Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
mentors.debian.net
debexpo
Commits
e404d186
Commit
e404d186
authored
Jul 15, 2012
by
Clément Schreiner
Browse files
Use a simple regexp to extract words from a text, instead of previous stupid clean_word method.
parent
9b96745c
Changes
1
Hide whitespace changes
Inline
Side-by-side
debexpo/lib/axi.py
View file @
e404d186
...
...
@@ -29,6 +29,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import
xapian
import
re
"""
Classes for accessing apt-xapian-index
...
...
@@ -46,20 +47,14 @@ class XapianQuery(object):
self
.
terms
=
[]
@
classmethod
def
_clean_word
(
cls
,
word
):
"""
Remove unwanted characters from a word and make it lowercase.
"""
return
word
.
strip
(
':;,-.*
\n
'
).
replace
(
'<br />'
,
''
).
lower
()
@
classmethod
def
tokenize
(
cls
,
text
):
"""
Takes a string and returns a list of words ready for a xapian
query.
"""
return
[
cls
.
_clean_word
(
word
)
for
word
in
text
.
split
(
' '
)]
regexp
=
r
'\w+'
return
re
.
findall
(
regexp
,
text
.
lower
())
def
add_words
(
self
,
word_list
):
"""
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment