Add stemming and accent folding to index. Resolves #19.
authorAdam Dullage <redacted>
Sun, 14 Aug 2022 12:19:02 +0000 (13:19 +0100)
committerAdam Dullage <redacted>
Sun, 14 Aug 2022 12:19:02 +0000 (13:19 +0100)
flatnotes/flatnotes.py
flatnotes/helpers.py

index e0db69cc63014c85c4f6c17a5a8aaa74d73ac802..ac8427919e9ed077c38c2acb646d85b1608a9274 100644 (file)
@@ -6,27 +6,32 @@ from typing import List, Tuple
 
 import whoosh
 from whoosh import writing
+from whoosh.analysis import CharsetFilter, StemmingAnalyzer
 from whoosh.fields import ID, STORED, TEXT, SchemaClass
 from whoosh.index import Index
 from whoosh.qparser import MultifieldParser
 from whoosh.searching import Hit
+from whoosh.support.charset import accent_map
 
-from helpers import strip_ext
+from helpers import empty_dir, strip_ext
 
 MARKDOWN_EXT = ".md"
+INDEX_SCHEMA_VERSION = "2"
 
-
-class InvalidTitleError(Exception):
-    def __init__(self, message="The specified title is invalid"):
-        self.message = message
-        super().__init__(self.message)
+StemmingFoldingAnalyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
 
 
 class IndexSchema(SchemaClass):
     filename = ID(unique=True, stored=True)
     last_modified = STORED()
-    title = TEXT(field_boost=2)
-    content = TEXT()
+    title = TEXT(field_boost=2, analyzer=StemmingFoldingAnalyzer)
+    content = TEXT(analyzer=StemmingFoldingAnalyzer)
+
+
+class InvalidTitleError(Exception):
+    def __init__(self, message="The specified title is invalid"):
+        self.message = message
+        super().__init__(self.message)
 
 
 class Note:
@@ -118,14 +123,24 @@ class Flatnotes(object):
 
     def _load_index(self) -> Index:
         """Load the note index or create new if not exists."""
-        if not os.path.exists(self.index_dir):
-            os.mkdir(self.index_dir)
-        if whoosh.index.exists_in(self.index_dir):
-            logging.info("Existing index loaded")
-            return whoosh.index.open_dir(self.index_dir)
+        index_dir_exists = os.path.exists(self.index_dir)
+        if index_dir_exists and whoosh.index.exists_in(
+            self.index_dir, indexname=INDEX_SCHEMA_VERSION
+        ):
+            logging.info("Loading existing index")
+            return whoosh.index.open_dir(
+                self.index_dir, indexname=INDEX_SCHEMA_VERSION
+            )
         else:
-            logging.info("New index created")
-            return whoosh.index.create_in(self.index_dir, IndexSchema)
+            if index_dir_exists:
+                logging.info("Deleting outdated index")
+                empty_dir(self.index_dir)
+            else:
+                os.mkdir(self.index_dir)
+            logging.info("Creating new index")
+            return whoosh.index.create_in(
+                self.index_dir, IndexSchema, indexname=INDEX_SCHEMA_VERSION
+            )
 
     def _add_note_to_index(
         self, writer: writing.IndexWriter, note: Note
index 577b25f58ad0f90be5ebd8aace7293ad87e3b375..e4e7b339cbf6471b4f8551e3a36f09fc56be0e88 100644 (file)
@@ -1,4 +1,5 @@
 import os
+import shutil
 
 from pydantic import BaseModel
 
@@ -13,6 +14,15 @@ def camel_case(snake_case_str: str) -> str:
     return parts[0] + "".join(part.title() for part in parts[1:])
 
 
+def empty_dir(path):
+    for item in os.listdir(path):
+        item_path = os.path.join(path, item)
+        if os.path.isfile(item_path):
+            os.remove(item_path)
+        elif os.path.isdir(item_path):
+            shutil.rmtree(item_path)
+
+
 class CamelCaseBaseModel(BaseModel):
     class Config:
         alias_generator = camel_case
git clone https://git.99rst.org/PROJECT