Added tags field to the index

author Adam Dullage <redacted>

Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)

committer Adam Dullage <redacted>

Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)
author Adam Dullage <redacted>
Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)
committer Adam Dullage <redacted>
Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)
diff --git a/flatnotes/flatnotes.py b/flatnotes/flatnotes.py

index 0b102032fdee7d3a148787713988f71f4d8efd3c..f655c626399806f6efa50b489341bf1b3f5c48a2 100644 (file)
--- a/flatnotes/flatnotes.py
+++ b/flatnotes/flatnotes.py
@@ -1,13 +1,14 @@
  import glob
  import logging
  import os
+import re
  from datetime import datetime
  from typing import List, Tuple
  
  import whoosh
-from helpers import empty_dir, strip_ext
+from helpers import empty_dir, re_extract, strip_ext
  from whoosh import writing
-from whoosh.analysis import CharsetFilter, StemmingAnalyzer
+from whoosh.analysis import CharsetFilter, KeywordAnalyzer, StemmingAnalyzer
  from whoosh.fields import ID, STORED, TEXT, SchemaClass
  from whoosh.index import Index
  from whoosh.qparser import MultifieldParser
@@ -15,7 +16,8 @@ from whoosh.searching import Hit
  from whoosh.support.charset import accent_map
  
  MARKDOWN_EXT = ".md"
-INDEX_SCHEMA_VERSION = "2"
+INDEX_SCHEMA_VERSION = "3"
+TAG_TOKEN_REGEX = re.compile(r"(?:(?<=^#)|(?<=\s#))\w+(?=\s|$)")
  
  StemmingFoldingAnalyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
  
@@ -25,6 +27,7 @@ class IndexSchema(SchemaClass):
      last_modified = STORED()
      title = TEXT(field_boost=2, analyzer=StemmingFoldingAnalyzer)
      content = TEXT(analyzer=StemmingFoldingAnalyzer)
+    tags = TEXT(analyzer=KeywordAnalyzer(lowercase=True))
  
  
  class InvalidTitleError(Exception):
@@ -147,11 +150,13 @@ class Flatnotes(object):
          """Add a Note object to the index using the given writer. If the
          filename already exists in the index an update will be performed
          instead."""
+        content, tag_list = re_extract(TAG_TOKEN_REGEX, note.content)
          writer.update_document(
              filename=note.filename,
              last_modified=note.last_modified,
              title=note.title,
-            content=note.content,
+            content=content,
+            tags=" ".join(tag_list),
          )
  
      def get_notes(self) -> List[Note]:
@@ -208,12 +213,12 @@ class Flatnotes(object):
          ):
              self.update_index(clean=clean)
  
-    def search(self, term: str) -> Tuple[NoteHit]:
+    def search(self, term: str) -> Tuple[NoteHit, ...]:
          """Search the index for the given term."""
          self.update_index_debounced()
          with self.index.searcher() as searcher:
              query = MultifieldParser(
-                ["title", "content"], self.index.schema
+                ["title", "content", "tags"], self.index.schema
              ).parse(term)
-            results = searcher.search(query)
+            results = searcher.search(query, limit=None)
              return tuple(NoteHit(self, hit) for hit in results)
diff --git a/flatnotes/helpers.py b/flatnotes/helpers.py

index e4e7b339cbf6471b4f8551e3a36f09fc56be0e88..24cbf0630a9d4e3b38de07e4f7e38b38f097d14f 100644 (file)
--- a/flatnotes/helpers.py
+++ b/flatnotes/helpers.py
@@ -1,5 +1,7 @@
  import os
+import re
  import shutil
+from typing import List, Tuple
  
  from pydantic import BaseModel
  
@@ -23,6 +25,16 @@ def empty_dir(path):
              shutil.rmtree(item_path)
  
  
+def re_extract(pattern, string) -> Tuple[str, List[str]]:
+    """Similar to re.sub but returns a tuple of:
+
+    - `string` with matches removed
+    - list of matches"""
+    matches = []
+    text = re.sub(pattern, lambda tag: matches.append(tag.group()), string)
+    return (text, matches)
+
+
  class CamelCaseBaseModel(BaseModel):
      class Config:
          alias_generator = camel_case
author	Adam Dullage <redacted>
	Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)
committer	Adam Dullage <redacted>
	Sat, 27 Aug 2022 12:23:19 +0000 (13:23 +0100)
flatnotes/flatnotes.py		patch \| blob \| history
flatnotes/helpers.py		patch \| blob \| history