Added tags field to the index
authorAdam Dullage <redacted>
Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)
committerAdam Dullage <redacted>
Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)
flatnotes/flatnotes.py
flatnotes/helpers.py

index 0b102032fdee7d3a148787713988f71f4d8efd3c..f655c626399806f6efa50b489341bf1b3f5c48a2 100644 (file)
@@ -1,13 +1,14 @@
 import glob
 import logging
 import os
+import re
 from datetime import datetime
 from typing import List, Tuple
 
 import whoosh
-from helpers import empty_dir, strip_ext
+from helpers import empty_dir, re_extract, strip_ext
 from whoosh import writing
-from whoosh.analysis import CharsetFilter, StemmingAnalyzer
+from whoosh.analysis import CharsetFilter, KeywordAnalyzer, StemmingAnalyzer
 from whoosh.fields import ID, STORED, TEXT, SchemaClass
 from whoosh.index import Index
 from whoosh.qparser import MultifieldParser
@@ -15,7 +16,8 @@ from whoosh.searching import Hit
 from whoosh.support.charset import accent_map
 
 MARKDOWN_EXT = ".md"
-INDEX_SCHEMA_VERSION = "2"
+INDEX_SCHEMA_VERSION = "3"
+TAG_TOKEN_REGEX = re.compile(r"(?:(?<=^#)|(?<=\s#))\w+(?=\s|$)")
 
 StemmingFoldingAnalyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
 
@@ -25,6 +27,7 @@ class IndexSchema(SchemaClass):
     last_modified = STORED()
     title = TEXT(field_boost=2, analyzer=StemmingFoldingAnalyzer)
     content = TEXT(analyzer=StemmingFoldingAnalyzer)
+    tags = TEXT(analyzer=KeywordAnalyzer(lowercase=True))
 
 
 class InvalidTitleError(Exception):
@@ -147,11 +150,13 @@ class Flatnotes(object):
         """Add a Note object to the index using the given writer. If the
         filename already exists in the index an update will be performed
         instead."""
+        content, tag_list = re_extract(TAG_TOKEN_REGEX, note.content)
         writer.update_document(
             filename=note.filename,
             last_modified=note.last_modified,
             title=note.title,
-            content=note.content,
+            content=content,
+            tags=" ".join(tag_list),
         )
 
     def get_notes(self) -> List[Note]:
@@ -208,12 +213,12 @@ class Flatnotes(object):
         ):
             self.update_index(clean=clean)
 
-    def search(self, term: str) -> Tuple[NoteHit]:
+    def search(self, term: str) -> Tuple[NoteHit, ...]:
         """Search the index for the given term."""
         self.update_index_debounced()
         with self.index.searcher() as searcher:
             query = MultifieldParser(
-                ["title", "content"], self.index.schema
+                ["title", "content", "tags"], self.index.schema
             ).parse(term)
-            results = searcher.search(query)
+            results = searcher.search(query, limit=None)
             return tuple(NoteHit(self, hit) for hit in results)
index e4e7b339cbf6471b4f8551e3a36f09fc56be0e88..24cbf0630a9d4e3b38de07e4f7e38b38f097d14f 100644 (file)
@@ -1,5 +1,7 @@
 import os
+import re
 import shutil
+from typing import List, Tuple
 
 from pydantic import BaseModel
 
@@ -23,6 +25,16 @@ def empty_dir(path):
             shutil.rmtree(item_path)
 
 
+def re_extract(pattern, string) -> Tuple[str, List[str]]:
+    """Similar to re.sub but returns a tuple of:
+
+    - `string` with matches removed
+    - list of matches"""
+    matches = []
+    text = re.sub(pattern, lambda tag: matches.append(tag.group()), string)
+    return (text, matches)
+
+
 class CamelCaseBaseModel(BaseModel):
     class Config:
         alias_generator = camel_case
git clone https://git.99rst.org/PROJECT