From: Adam Dullage Date: Sun, 14 Aug 2022 12:19:02 +0000 (+0100) Subject: Add stemming and accent folding to index. Resolves #19. X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=45ede900ce2cd0a2be2aee5df6241d679092f6a0;p=flatnotes.git Add stemming and accent folding to index. Resolves #19. --- diff --git a/flatnotes/flatnotes.py b/flatnotes/flatnotes.py index e0db69c..ac84279 100644 --- a/flatnotes/flatnotes.py +++ b/flatnotes/flatnotes.py @@ -6,27 +6,32 @@ from typing import List, Tuple import whoosh from whoosh import writing +from whoosh.analysis import CharsetFilter, StemmingAnalyzer from whoosh.fields import ID, STORED, TEXT, SchemaClass from whoosh.index import Index from whoosh.qparser import MultifieldParser from whoosh.searching import Hit +from whoosh.support.charset import accent_map -from helpers import strip_ext +from helpers import empty_dir, strip_ext MARKDOWN_EXT = ".md" +INDEX_SCHEMA_VERSION = "2" - -class InvalidTitleError(Exception): - def __init__(self, message="The specified title is invalid"): - self.message = message - super().__init__(self.message) +StemmingFoldingAnalyzer = StemmingAnalyzer() | CharsetFilter(accent_map) class IndexSchema(SchemaClass): filename = ID(unique=True, stored=True) last_modified = STORED() - title = TEXT(field_boost=2) - content = TEXT() + title = TEXT(field_boost=2, analyzer=StemmingFoldingAnalyzer) + content = TEXT(analyzer=StemmingFoldingAnalyzer) + + +class InvalidTitleError(Exception): + def __init__(self, message="The specified title is invalid"): + self.message = message + super().__init__(self.message) class Note: @@ -118,14 +123,24 @@ class Flatnotes(object): def _load_index(self) -> Index: """Load the note index or create new if not exists.""" - if not os.path.exists(self.index_dir): - os.mkdir(self.index_dir) - if whoosh.index.exists_in(self.index_dir): - logging.info("Existing index loaded") - return whoosh.index.open_dir(self.index_dir) + index_dir_exists = os.path.exists(self.index_dir) + if index_dir_exists and whoosh.index.exists_in( + self.index_dir, indexname=INDEX_SCHEMA_VERSION + ): + logging.info("Loading existing index") + return whoosh.index.open_dir( + self.index_dir, indexname=INDEX_SCHEMA_VERSION + ) else: - logging.info("New index created") - return whoosh.index.create_in(self.index_dir, IndexSchema) + if index_dir_exists: + logging.info("Deleting outdated index") + empty_dir(self.index_dir) + else: + os.mkdir(self.index_dir) + logging.info("Creating new index") + return whoosh.index.create_in( + self.index_dir, IndexSchema, indexname=INDEX_SCHEMA_VERSION + ) def _add_note_to_index( self, writer: writing.IndexWriter, note: Note diff --git a/flatnotes/helpers.py b/flatnotes/helpers.py index 577b25f..e4e7b33 100644 --- a/flatnotes/helpers.py +++ b/flatnotes/helpers.py @@ -1,4 +1,5 @@ import os +import shutil from pydantic import BaseModel @@ -13,6 +14,15 @@ def camel_case(snake_case_str: str) -> str: return parts[0] + "".join(part.title() for part in parts[1:]) +def empty_dir(path): + for item in os.listdir(path): + item_path = os.path.join(path, item) + if os.path.isfile(item_path): + os.remove(item_path) + elif os.path.isdir(item_path): + shutil.rmtree(item_path) + + class CamelCaseBaseModel(BaseModel): class Config: alias_generator = camel_case