Updated tagging regex

author Adam Dullage <redacted>

Sat, 24 Sep 2022 08:23:43 +0000 (09:23 +0100)

committer Adam Dullage <redacted>

Sat, 24 Sep 2022 08:23:43 +0000 (09:23 +0100)
author Adam Dullage <redacted>
Sat, 24 Sep 2022 08:23:43 +0000 (09:23 +0100)
committer Adam Dullage <redacted>
Sat, 24 Sep 2022 08:23:43 +0000 (09:23 +0100)
diff --git a/flatnotes/flatnotes.py b/flatnotes/flatnotes.py

index 2ea1bd43fa74cbffc1db3c4c9f7047b96e33abc8..424b5e61c5eaaaadd9aa8abfebaf19b3f07dfeec 100644 (file)
--- a/flatnotes/flatnotes.py
+++ b/flatnotes/flatnotes.py
@@ -112,19 +112,22 @@ class SearchResult(Note):
          # value of that field. This isn't useful so only set self._score if it
          # is a float.
          self._score = hit.score if type(hit.score) is float else None
+
          self._title_highlights = (
              hit.highlights("title", text=self.title)
              if "title" in self._matched_fields
              else None
          )
-        self._content_highlights = (
-            hit.highlights(
+
+        if "content" in self._matched_fields:
+            content_ex_tags, _ = Flatnotes.extract_tags(self.content)
+            self._content_highlights = hit.highlights(
                  "content",
-                text=self.content,
+                text=content_ex_tags,
              )
-            if "content" in self._matched_fields
-            else None
-        )
+        else:
+            self._content_highlights = None
+
          self._tag_matches = (
              [field[1] for field in hit.matched_terms() if field[0] == "tags"]
              if "tags" in self._matched_fields
@@ -155,7 +158,7 @@ class SearchResult(Note):
  
  
  class Flatnotes(object):
-    TAG_SECTION_RE = re.compile(r"(?:\s+#\w+)+$")
+    TAG_RE = re.compile(r"(?:(?<=^#)|(?<=\s#))\w+(?=\s|$)")
  
      def __init__(self, dir: str) -> None:
          if not os.path.exists(dir):
@@ -191,18 +194,15 @@ class Flatnotes(object):
                  self.index_dir, IndexSchema, indexname=INDEX_SCHEMA_VERSION
              )
  
-    def _extract_tags(self, content) -> Tuple[str, Set[str]]:
-        """Strip the tag section from the given content and return a tuple
-        consisting of:
+    @classmethod
+    def extract_tags(cls, content) -> Tuple[str, Set[str]]:
+        """Strip tags from the given content and return a tuple consisting of:
  
-        - The content without the tag section.
-        - A deduplicated list of tags converted to lowercase."""
-        content_ex_tags, tag_sections = re_extract(
-            self.TAG_SECTION_RE, content
-        )
+        - The content without the tags.
+        - A set of tags converted to lowercase."""
+        content_ex_tags, tags = re_extract(cls.TAG_RE, content)
          try:
-            tags = tag_sections[0].split()
-            tags = [tag[1:].lower() for tag in tags]
+            tags = [tag.lower() for tag in tags]
              return (content_ex_tags, set(tags))
          except IndexError:
              return (content, set())
@@ -213,7 +213,7 @@ class Flatnotes(object):
          """Add a Note object to the index using the given writer. If the
          filename already exists in the index an update will be performed
          instead."""
-        content_ex_tags, tag_set = self._extract_tags(note.content)
+        content_ex_tags, tag_set = self.extract_tags(note.content)
          tag_string = " ".join(tag_set)
          writer.update_document(
              filename=note.filename,
author	Adam Dullage <redacted>
	Sat, 24 Sep 2022 08:23:43 +0000 (09:23 +0100)
committer	Adam Dullage <redacted>
	Sat, 24 Sep 2022 08:23:43 +0000 (09:23 +0100)