Ignore markdown codeblocks for tags (#66)

author elmodor <redacted>

Sun, 21 May 2023 17:36:59 +0000 (19:36 +0200)

committer GitHub <redacted>

Sun, 21 May 2023 17:36:59 +0000 (18:36 +0100)
author elmodor <redacted>
Sun, 21 May 2023 17:36:59 +0000 (19:36 +0200)
committer GitHub <redacted>
Sun, 21 May 2023 17:36:59 +0000 (18:36 +0100)
diff --git a/flatnotes/flatnotes.py b/flatnotes/flatnotes.py

index 782387b7323196b199f552326a96cc315925a769..c904a7906616096a36f0db9d5911cf11a7910403 100644 (file)
--- a/flatnotes/flatnotes.py
+++ b/flatnotes/flatnotes.py
@@ -20,7 +20,7 @@ from helpers import empty_dir, re_extract, strip_ext
  from logger import logger
  
  MARKDOWN_EXT = ".md"
-INDEX_SCHEMA_VERSION = "3"
+INDEX_SCHEMA_VERSION = "4"
  
  StemmingFoldingAnalyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
  
@@ -162,6 +162,7 @@ class SearchResult(Note):
  
  class Flatnotes(object):
      TAGS_RE = re.compile(r"(?:(?<=^#)|(?<=\s#))\w+(?=\s|$)")
+    CODEBLOCK_RE = re.compile(r"`{1,3}.*?`{1,3}", re.DOTALL)
      TAGS_WITH_HASH_RE = re.compile(r"(?:(?<=^)|(?<=\s))#\w+(?=\s|$)")
  
      def __init__(self, dir: str) -> None:
@@ -203,7 +204,9 @@ class Flatnotes(object):
  
          - The content without the tags.
          - A set of tags converted to lowercase."""
-        content_ex_tags, tags = re_extract(cls.TAGS_RE, content)
+        content_ex_codeblock = re.sub(cls.CODEBLOCK_RE, '', content)
+        _, tags = re_extract(cls.TAGS_RE, content_ex_codeblock)
+        content_ex_tags, _ = re_extract(cls.TAGS_RE, content)
          try:
              tags = [tag.lower() for tag in tags]
              return (content_ex_tags, set(tags))
author	elmodor <redacted>
	Sun, 21 May 2023 17:36:59 +0000 (19:36 +0200)
committer	GitHub <redacted>
	Sun, 21 May 2023 17:36:59 +0000 (18:36 +0100)