Update bookmark and autofill certain fields

2026-02-04 10:20:26 +01:00 · 2025-05-13 19:01:13 +02:00
parent 22e73bc991
commit d8c8d87568
2 changed files with 116 additions and 60 deletions
--- a/requirements.in
+++ b/requirements.in
@@ -5,5 +5,8 @@ sqlmodel
 # Fetch title etc from links
 beautifulsoup4

+# Fetch favicons
+extract_favicon
+
 # Generate (atom) feeds for tags and such
 feedgen
--- a/src/digimarks/main.py
+++ b/src/digimarks/main.py
@@ -12,6 +12,7 @@ from urllib.parse import urlparse, urlunparse

 import bs4
 import httpx
+from extract_favicon import from_html
 from fastapi import Depends, FastAPI, HTTPException, Query, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse
@@ -196,6 +197,14 @@ def build_custom_type(internal_type: Type[T]) -> Type[AutoString]:
    return CustomType


+def get_favicon(html_content: str, root_url: str) -> str:
+    """Fetch the favicon from `html_content` using `root_url`."""
+    favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
+    for favicon in favicons:
+        print(favicon.url, favicon.width, favicon.height)
+    # TODO: save the preferred image to file and return
+
+
 class User(SQLModel, table=True):
    """User account."""

@@ -244,69 +253,77 @@ class Bookmark(SQLModel, table=True):
    @property
    def tag_list(self) -> list:
        """The tags but as a proper list."""
-        if not self.tags:
-            # Not tags, return empty list instead of [''] that split returns in that case
-            return []
-        return self.tags.split(',')
-
-    async def set_title_from_source(self, request: Request) -> str:
-        """Request the title by requesting the source url."""
-        try:
-            result = await request.app.requests_client.get(self.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
-            self.http_status = result.status_code
-        except httpx.HTTPError as err:
-            # For example, 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
-            logger.error('Exception when trying to retrieve title for %s. Error: %s', self.url, str(err))
-            self.http_status = 404
-            self.title = ''
-            return self.title
-        if self.http_status == 200 or self.http_status == 202:
-            html = bs4.BeautifulSoup(result.text, 'html.parser')
-            try:
-                self.title = html.title.text.strip()
-            except AttributeError:
-                self.title = ''
-        return self.title
-
-    def set_tags(self, new_tags: str) -> None:
-        """Set tags from `tags`, strip and sort them.
-
-        :param str new_tags: New tags to sort and set.
-        """
-        tags_split = new_tags.split(',')
-        tags_clean = clean_tags(tags_split)
-        self.tags = ','.join(tags_clean)
-
-    @property
-    def tags_list(self) -> list[str]:
-        """Get the tags as a list, iterable in template."""
        if self.tags:
            return self.tags.split(',')
+        # Not tags, return empty list instead of [''] that split returns in that case
        return []

-    @classmethod
-    def strip_url_params(cls, url: str) -> str:
-        """Strip URL params from URL.

-        :param url: URL to strip URL params from.
-        :return: clean URL
-        :rtype: str
-        """
-        parsed = urlparse(url)
-        return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
+async def set_information_from_source(bookmark: Bookmark, request: Request) -> Bookmark:
+    """Request the title by requesting the source url."""
+    logger.info('Extracting information from url %s', bookmark.url)
+    try:
+        result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
+        bookmark.http_status = result.status_code
+    except httpx.HTTPError as err:
+        # For example, 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
+        logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
+        bookmark.http_status = 404
+        bookmark.title = ''
+        return bookmark
+    if bookmark.http_status == 200 or bookmark.http_status == 202:
+        html = bs4.BeautifulSoup(result.text, 'html.parser')
+        try:
+            bookmark.title = html.title.text.strip()
+        except AttributeError:
+            bookmark.title = ''

-    def update(self, request: Request, strip_params: bool = False):
-        """Automatically update title etc."""
-        if not self.title:
-            # Title was empty, automatically fetch it from the url, will also update the status code
-            self.set_title_from_source(request)
+        url_parts = urlparse(str(bookmark.url))
+        root_url = url_parts.scheme + '://' + url_parts.netloc
+        favicon = get_favicon(result.text, root_url)
+        # filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
+        # with open(filename, 'wb') as out_file:
+        #     shutil.copyfileobj(response.raw, out_file)

-        if strip_params:
-            # Strip URL parameters, e.g., tracking params
-            self.url = self.strip_url_params(str(self.url))
+    # Extraction was successful
+    logger.info('Extracting information was successful')
+    return bookmark

-        # Sort and deduplicate tags
-        self.set_tags(self.tags)
+
+def set_tags(bookmark: Bookmark, new_tags: str) -> None:
+    """Set tags from `tags`, strip and sort them.
+
+    :param Bookmark bookmark: Bookmark to modify
+    :param str new_tags: New tags to sort and set.
+    """
+    tags_split = new_tags.split(',')
+    tags_clean = clean_tags(tags_split)
+    bookmark.tags = ','.join(tags_clean)
+
+
+def strip_url_params(url: str) -> str:
+    """Strip URL params from URL.
+
+    :param url: URL to strip URL params from.
+    :return: clean URL
+    :rtype: str
+    """
+    parsed = urlparse(url)
+    return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
+
+
+def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
+    """Automatically update title, favicon, etc."""
+    if not bookmark.title:
+        # Title was empty, automatically fetch it from the url, will also update the status code
+        set_information_from_source(bookmark, request)
+
+    if strip_params:
+        # Strip URL parameters, e.g., tracking params
+        bookmark.url = strip_url_params(str(bookmark.url))
+
+    # Sort and deduplicate tags
+    set_tags(bookmark, bookmark.tags)


 class PublicTag(SQLModel, table=True):
@@ -400,6 +417,34 @@ def get_bookmark(
    return bookmark


+@app.post('/api/v1/{user_key}/autocomplete_bookmark/', response_model=Bookmark)
+def autocomplete_bookmark(
+    session: SessionDep,
+    request: Request,
+    user_key: str,
+    bookmark: Bookmark,
+    strip_params: bool = False,
+):
+    """Autofill some fields for this (new) bookmark for user `user_key`."""
+    bookmark.userkey = user_key
+
+    # Auto-fill title, fix tags etc.
+    update_bookmark_with_info(bookmark, request, strip_params)
+
+    url_hash = generate_hash(str(bookmark.url))
+    bookmark_db = session.exec(
+        select(Bookmark).where(
+            Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
+        )
+    ).first()
+    if bookmark_db:
+        # Bookmark with this URL already exists, provide the hash so the frontend can look it up and the user can
+        # merge them if so wanted
+        bookmark.url_hash = url_hash
+
+    return bookmark
+
+
@app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark)
 def add_bookmark(
    session: SessionDep,
@@ -410,10 +455,10 @@ def add_bookmark(
 ):
    """Add new bookmark for user `user_key`."""
    bookmark.userkey = user_key
-    bookmark.url_hash = generate_hash(str(bookmark.url))

    # Auto-fill title, fix tags etc.
-    bookmark.update(request, strip_params)
+    update_bookmark_with_info(bookmark, request, strip_params)
+    bookmark.url_hash = generate_hash(str(bookmark.url))

    session.add(bookmark)
    session.commit()
@@ -431,16 +476,24 @@ def update_bookmark(
    strip_params: bool = False,
 ):
    """Update existing bookmark `bookmark_key` for user `user_key`."""
-    bookmark_db = session.get(Bookmark, {'url_hash': url_hash, 'userkey': user_key})
+    bookmark_db = session.exec(
+        select(Bookmark).where(
+            Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
+        )
+    ).first()
    if not bookmark_db:
        raise HTTPException(status_code=404, detail='Bookmark not found')

-    # Auto-fill title, fix tags etc.
-    bookmark.update(request, strip_params)
    bookmark.modified_date = datetime.now(UTC)

+    # 'patch' endpoint, which means that you can send only the data that you want to update, leaving the rest intact
    bookmark_data = bookmark.model_dump(exclude_unset=True)
+    # Merge the changed fields into the existing object
    bookmark_db.sqlmodel_update(bookmark_data)
+
+    # Autofill title, fix tags, etc. where (still) needed
+    update_bookmark_with_info(bookmark, request, strip_params)
+
    session.add(bookmark_db)
    session.commit()
    session.refresh(bookmark_db)