From: Adam Dullage Date: Sat, 27 Aug 2022 12:23:19 +0000 (+0100) Subject: Added tags field to the index X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=2bb89092e3d7cf2d8e65c33cc6cf1629da1230a8;p=flatnotes.git Added tags field to the index --- diff --git a/flatnotes/flatnotes.py b/flatnotes/flatnotes.py index 0b10203..f655c62 100644 --- a/flatnotes/flatnotes.py +++ b/flatnotes/flatnotes.py @@ -1,13 +1,14 @@ import glob import logging import os +import re from datetime import datetime from typing import List, Tuple import whoosh -from helpers import empty_dir, strip_ext +from helpers import empty_dir, re_extract, strip_ext from whoosh import writing -from whoosh.analysis import CharsetFilter, StemmingAnalyzer +from whoosh.analysis import CharsetFilter, KeywordAnalyzer, StemmingAnalyzer from whoosh.fields import ID, STORED, TEXT, SchemaClass from whoosh.index import Index from whoosh.qparser import MultifieldParser @@ -15,7 +16,8 @@ from whoosh.searching import Hit from whoosh.support.charset import accent_map MARKDOWN_EXT = ".md" -INDEX_SCHEMA_VERSION = "2" +INDEX_SCHEMA_VERSION = "3" +TAG_TOKEN_REGEX = re.compile(r"(?:(?<=^#)|(?<=\s#))\w+(?=\s|$)") StemmingFoldingAnalyzer = StemmingAnalyzer() | CharsetFilter(accent_map) @@ -25,6 +27,7 @@ class IndexSchema(SchemaClass): last_modified = STORED() title = TEXT(field_boost=2, analyzer=StemmingFoldingAnalyzer) content = TEXT(analyzer=StemmingFoldingAnalyzer) + tags = TEXT(analyzer=KeywordAnalyzer(lowercase=True)) class InvalidTitleError(Exception): @@ -147,11 +150,13 @@ class Flatnotes(object): """Add a Note object to the index using the given writer. If the filename already exists in the index an update will be performed instead.""" + content, tag_list = re_extract(TAG_TOKEN_REGEX, note.content) writer.update_document( filename=note.filename, last_modified=note.last_modified, title=note.title, - content=note.content, + content=content, + tags=" ".join(tag_list), ) def get_notes(self) -> List[Note]: @@ -208,12 +213,12 @@ class Flatnotes(object): ): self.update_index(clean=clean) - def search(self, term: str) -> Tuple[NoteHit]: + def search(self, term: str) -> Tuple[NoteHit, ...]: """Search the index for the given term.""" self.update_index_debounced() with self.index.searcher() as searcher: query = MultifieldParser( - ["title", "content"], self.index.schema + ["title", "content", "tags"], self.index.schema ).parse(term) - results = searcher.search(query) + results = searcher.search(query, limit=None) return tuple(NoteHit(self, hit) for hit in results) diff --git a/flatnotes/helpers.py b/flatnotes/helpers.py index e4e7b33..24cbf06 100644 --- a/flatnotes/helpers.py +++ b/flatnotes/helpers.py @@ -1,5 +1,7 @@ import os +import re import shutil +from typing import List, Tuple from pydantic import BaseModel @@ -23,6 +25,16 @@ def empty_dir(path): shutil.rmtree(item_path) +def re_extract(pattern, string) -> Tuple[str, List[str]]: + """Similar to re.sub but returns a tuple of: + + - `string` with matches removed + - list of matches""" + matches = [] + text = re.sub(pattern, lambda tag: matches.append(tag.group()), string) + return (text, matches) + + class CamelCaseBaseModel(BaseModel): class Config: alias_generator = camel_case