1
0
mirror of https://github.com/aquatix/digimarks.git synced 2025-12-07 00:15:10 +01:00

Update bookmark and autofill certain fields

This commit is contained in:
2025-05-13 19:01:13 +02:00
parent 22e73bc991
commit d8c8d87568
2 changed files with 116 additions and 60 deletions

View File

@@ -5,5 +5,8 @@ sqlmodel
# Fetch title etc from links # Fetch title etc from links
beautifulsoup4 beautifulsoup4
# Fetch favicons
extract_favicon
# Generate (atom) feeds for tags and such # Generate (atom) feeds for tags and such
feedgen feedgen

View File

@@ -12,6 +12,7 @@ from urllib.parse import urlparse, urlunparse
import bs4 import bs4
import httpx import httpx
from extract_favicon import from_html
from fastapi import Depends, FastAPI, HTTPException, Query, Request from fastapi import Depends, FastAPI, HTTPException, Query, Request
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
@@ -196,6 +197,14 @@ def build_custom_type(internal_type: Type[T]) -> Type[AutoString]:
return CustomType return CustomType
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
class User(SQLModel, table=True): class User(SQLModel, table=True):
"""User account.""" """User account."""
@@ -244,48 +253,55 @@ class Bookmark(SQLModel, table=True):
@property @property
def tag_list(self) -> list: def tag_list(self) -> list:
"""The tags but as a proper list.""" """The tags but as a proper list."""
if not self.tags: if self.tags:
return self.tags.split(',')
# Not tags, return empty list instead of [''] that split returns in that case # Not tags, return empty list instead of [''] that split returns in that case
return [] return []
return self.tags.split(',')
async def set_title_from_source(self, request: Request) -> str:
async def set_information_from_source(bookmark: Bookmark, request: Request) -> Bookmark:
"""Request the title by requesting the source url.""" """Request the title by requesting the source url."""
logger.info('Extracting information from url %s', bookmark.url)
try: try:
result = await request.app.requests_client.get(self.url, headers={'User-Agent': DIGIMARKS_USER_AGENT}) result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
self.http_status = result.status_code bookmark.http_status = result.status_code
except httpx.HTTPError as err: except httpx.HTTPError as err:
# For example, 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?' # For example, 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
logger.error('Exception when trying to retrieve title for %s. Error: %s', self.url, str(err)) logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
self.http_status = 404 bookmark.http_status = 404
self.title = '' bookmark.title = ''
return self.title return bookmark
if self.http_status == 200 or self.http_status == 202: if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser') html = bs4.BeautifulSoup(result.text, 'html.parser')
try: try:
self.title = html.title.text.strip() bookmark.title = html.title.text.strip()
except AttributeError: except AttributeError:
self.title = '' bookmark.title = ''
return self.title
def set_tags(self, new_tags: str) -> None: url_parts = urlparse(str(bookmark.url))
root_url = url_parts.scheme + '://' + url_parts.netloc
favicon = get_favicon(result.text, root_url)
# filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
# with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# Extraction was successful
logger.info('Extracting information was successful')
return bookmark
def set_tags(bookmark: Bookmark, new_tags: str) -> None:
"""Set tags from `tags`, strip and sort them. """Set tags from `tags`, strip and sort them.
:param Bookmark bookmark: Bookmark to modify
:param str new_tags: New tags to sort and set. :param str new_tags: New tags to sort and set.
""" """
tags_split = new_tags.split(',') tags_split = new_tags.split(',')
tags_clean = clean_tags(tags_split) tags_clean = clean_tags(tags_split)
self.tags = ','.join(tags_clean) bookmark.tags = ','.join(tags_clean)
@property
def tags_list(self) -> list[str]:
"""Get the tags as a list, iterable in template."""
if self.tags:
return self.tags.split(',')
return []
@classmethod def strip_url_params(url: str) -> str:
def strip_url_params(cls, url: str) -> str:
"""Strip URL params from URL. """Strip URL params from URL.
:param url: URL to strip URL params from. :param url: URL to strip URL params from.
@@ -295,18 +311,19 @@ class Bookmark(SQLModel, table=True):
parsed = urlparse(url) parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment)) return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update(self, request: Request, strip_params: bool = False):
"""Automatically update title etc.""" def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
if not self.title: """Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code # Title was empty, automatically fetch it from the url, will also update the status code
self.set_title_from_source(request) set_information_from_source(bookmark, request)
if strip_params: if strip_params:
# Strip URL parameters, e.g., tracking params # Strip URL parameters, e.g., tracking params
self.url = self.strip_url_params(str(self.url)) bookmark.url = strip_url_params(str(bookmark.url))
# Sort and deduplicate tags # Sort and deduplicate tags
self.set_tags(self.tags) set_tags(bookmark, bookmark.tags)
class PublicTag(SQLModel, table=True): class PublicTag(SQLModel, table=True):
@@ -400,6 +417,34 @@ def get_bookmark(
return bookmark return bookmark
@app.post('/api/v1/{user_key}/autocomplete_bookmark/', response_model=Bookmark)
def autocomplete_bookmark(
session: SessionDep,
request: Request,
user_key: str,
bookmark: Bookmark,
strip_params: bool = False,
):
"""Autofill some fields for this (new) bookmark for user `user_key`."""
bookmark.userkey = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
url_hash = generate_hash(str(bookmark.url))
bookmark_db = session.exec(
select(Bookmark).where(
Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
).first()
if bookmark_db:
# Bookmark with this URL already exists, provide the hash so the frontend can look it up and the user can
# merge them if so wanted
bookmark.url_hash = url_hash
return bookmark
@app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark) @app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark)
def add_bookmark( def add_bookmark(
session: SessionDep, session: SessionDep,
@@ -410,10 +455,10 @@ def add_bookmark(
): ):
"""Add new bookmark for user `user_key`.""" """Add new bookmark for user `user_key`."""
bookmark.userkey = user_key bookmark.userkey = user_key
bookmark.url_hash = generate_hash(str(bookmark.url))
# Auto-fill title, fix tags etc. # Auto-fill title, fix tags etc.
bookmark.update(request, strip_params) update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = generate_hash(str(bookmark.url))
session.add(bookmark) session.add(bookmark)
session.commit() session.commit()
@@ -431,16 +476,24 @@ def update_bookmark(
strip_params: bool = False, strip_params: bool = False,
): ):
"""Update existing bookmark `bookmark_key` for user `user_key`.""" """Update existing bookmark `bookmark_key` for user `user_key`."""
bookmark_db = session.get(Bookmark, {'url_hash': url_hash, 'userkey': user_key}) bookmark_db = session.exec(
select(Bookmark).where(
Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
).first()
if not bookmark_db: if not bookmark_db:
raise HTTPException(status_code=404, detail='Bookmark not found') raise HTTPException(status_code=404, detail='Bookmark not found')
# Auto-fill title, fix tags etc.
bookmark.update(request, strip_params)
bookmark.modified_date = datetime.now(UTC) bookmark.modified_date = datetime.now(UTC)
# 'patch' endpoint, which means that you can send only the data that you want to update, leaving the rest intact
bookmark_data = bookmark.model_dump(exclude_unset=True) bookmark_data = bookmark.model_dump(exclude_unset=True)
# Merge the changed fields into the existing object
bookmark_db.sqlmodel_update(bookmark_data) bookmark_db.sqlmodel_update(bookmark_data)
# Autofill title, fix tags, etc. where (still) needed
update_bookmark_with_info(bookmark, request, strip_params)
session.add(bookmark_db) session.add(bookmark_db)
session.commit() session.commit()
session.refresh(bookmark_db) session.refresh(bookmark_db)