Commit 9b96745c authored by Clément Schreiner's avatar Clément Schreiner
Browse files

Module for accessing apt-xapian-index.

parent 26f418bd
# -*- coding: utf-8 -*-
#
# axi.py — Access to apt-xapian-index
#
# This file is part of debexpo - https://alioth.debian.org/projects/debexpo/
#
# Copyright © 2012 Clément Schreiner <clement@mux.me>
#
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
import xapian
"""
Classes for accessing apt-xapian-index
"""
class XapianQuery(object):
"""
Search packages in apt-xapian-index.
"""
def __init__(self):
self.db = xapian.Database('/var/lib/apt-xapian-index/index')
self.stemmer = xapian.Stem("english")
self.terms = []
@classmethod
def _clean_word(cls, word):
"""
Remove unwanted characters from a word and make it lowercase.
"""
return word.strip(':;,-.*\n').replace('<br />', '').lower()
@classmethod
def tokenize(cls, text):
"""
Takes a string and returns a list of words ready for a xapian
query.
"""
return [cls._clean_word(word) for word in text.split(' ')]
def add_words(self, word_list):
"""
Add words and their stemmed version to the query terms.
"""
for word in word_list:
self.terms.append(word)
stem = self.stemmer(word)
if stem != word:
self.terms.append('Z'+stem)
def add_text(self, description):
"""
Process a text and add its words to the query terms.
"""
self.add_words(self.tokenize(description))
def add_tags(self, tags):
"""
Add tags from an iterable to the query terms.
"""
for tag in tags:
self.terms.append('XT'+tag)
def query(self):
"""
Actually performs the query and return the results as a
generator.
"""
query = xapian.Query(xapian.Query.OP_OR, self.terms)
enquire = xapian.Enquire(self.db)
enquire.set_query(query)
matches = enquire.get_mset(0, 20)
for m in matches:
yield m.document.get_data(), m.percent
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment