1
0
mirror of https://github.com/aquatix/digimarks.git synced 2025-12-06 23:05:10 +01:00

Update bookmark and autofill certain fields

This commit is contained in:
2025-05-13 19:01:13 +02:00
parent 22e73bc991
commit d8c8d87568
2 changed files with 116 additions and 60 deletions

View File

@@ -5,5 +5,8 @@ sqlmodel
# Fetch title etc from links # Fetch title etc from links
beautifulsoup4 beautifulsoup4
# Fetch favicons
extract_favicon
# Generate (atom) feeds for tags and such # Generate (atom) feeds for tags and such
feedgen feedgen

View File

@@ -12,6 +12,7 @@ from urllib.parse import urlparse, urlunparse
import bs4 import bs4
import httpx import httpx
from extract_favicon import from_html
from fastapi import Depends, FastAPI, HTTPException, Query, Request from fastapi import Depends, FastAPI, HTTPException, Query, Request
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
@@ -196,6 +197,14 @@ def build_custom_type(internal_type: Type[T]) -> Type[AutoString]:
return CustomType return CustomType
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
class User(SQLModel, table=True): class User(SQLModel, table=True):
"""User account.""" """User account."""
@@ -244,69 +253,77 @@ class Bookmark(SQLModel, table=True):
@property @property
def tag_list(self) -> list: def tag_list(self) -> list:
"""The tags but as a proper list.""" """The tags but as a proper list."""
if not self.tags:
# Not tags, return empty list instead of [''] that split returns in that case
return []
return self.tags.split(',')
async def set_title_from_source(self, request: Request) -> str:
"""Request the title by requesting the source url."""
try:
result = await request.app.requests_client.get(self.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
self.http_status = result.status_code
except httpx.HTTPError as err:
# For example, 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
logger.error('Exception when trying to retrieve title for %s. Error: %s', self.url, str(err))
self.http_status = 404
self.title = ''
return self.title
if self.http_status == 200 or self.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
self.title = html.title.text.strip()
except AttributeError:
self.title = ''
return self.title
def set_tags(self, new_tags: str) -> None:
"""Set tags from `tags`, strip and sort them.
:param str new_tags: New tags to sort and set.
"""
tags_split = new_tags.split(',')
tags_clean = clean_tags(tags_split)
self.tags = ','.join(tags_clean)
@property
def tags_list(self) -> list[str]:
"""Get the tags as a list, iterable in template."""
if self.tags: if self.tags:
return self.tags.split(',') return self.tags.split(',')
# Not tags, return empty list instead of [''] that split returns in that case
return [] return []
@classmethod
def strip_url_params(cls, url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from. async def set_information_from_source(bookmark: Bookmark, request: Request) -> Bookmark:
:return: clean URL """Request the title by requesting the source url."""
:rtype: str logger.info('Extracting information from url %s', bookmark.url)
""" try:
parsed = urlparse(url) result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment)) bookmark.http_status = result.status_code
except httpx.HTTPError as err:
# For example, 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
bookmark.http_status = 404
bookmark.title = ''
return bookmark
if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
bookmark.title = html.title.text.strip()
except AttributeError:
bookmark.title = ''
def update(self, request: Request, strip_params: bool = False): url_parts = urlparse(str(bookmark.url))
"""Automatically update title etc.""" root_url = url_parts.scheme + '://' + url_parts.netloc
if not self.title: favicon = get_favicon(result.text, root_url)
# Title was empty, automatically fetch it from the url, will also update the status code # filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
self.set_title_from_source(request) # with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
if strip_params: # Extraction was successful
# Strip URL parameters, e.g., tracking params logger.info('Extracting information was successful')
self.url = self.strip_url_params(str(self.url)) return bookmark
# Sort and deduplicate tags
self.set_tags(self.tags) def set_tags(bookmark: Bookmark, new_tags: str) -> None:
"""Set tags from `tags`, strip and sort them.
:param Bookmark bookmark: Bookmark to modify
:param str new_tags: New tags to sort and set.
"""
tags_split = new_tags.split(',')
tags_clean = clean_tags(tags_split)
bookmark.tags = ','.join(tags_clean)
def strip_url_params(url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from.
:return: clean URL
:rtype: str
"""
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
"""Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code
set_information_from_source(bookmark, request)
if strip_params:
# Strip URL parameters, e.g., tracking params
bookmark.url = strip_url_params(str(bookmark.url))
# Sort and deduplicate tags
set_tags(bookmark, bookmark.tags)
class PublicTag(SQLModel, table=True): class PublicTag(SQLModel, table=True):
@@ -400,6 +417,34 @@ def get_bookmark(
return bookmark return bookmark
@app.post('/api/v1/{user_key}/autocomplete_bookmark/', response_model=Bookmark)
def autocomplete_bookmark(
session: SessionDep,
request: Request,
user_key: str,
bookmark: Bookmark,
strip_params: bool = False,
):
"""Autofill some fields for this (new) bookmark for user `user_key`."""
bookmark.userkey = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
url_hash = generate_hash(str(bookmark.url))
bookmark_db = session.exec(
select(Bookmark).where(
Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
).first()
if bookmark_db:
# Bookmark with this URL already exists, provide the hash so the frontend can look it up and the user can
# merge them if so wanted
bookmark.url_hash = url_hash
return bookmark
@app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark) @app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark)
def add_bookmark( def add_bookmark(
session: SessionDep, session: SessionDep,
@@ -410,10 +455,10 @@ def add_bookmark(
): ):
"""Add new bookmark for user `user_key`.""" """Add new bookmark for user `user_key`."""
bookmark.userkey = user_key bookmark.userkey = user_key
bookmark.url_hash = generate_hash(str(bookmark.url))
# Auto-fill title, fix tags etc. # Auto-fill title, fix tags etc.
bookmark.update(request, strip_params) update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = generate_hash(str(bookmark.url))
session.add(bookmark) session.add(bookmark)
session.commit() session.commit()
@@ -431,16 +476,24 @@ def update_bookmark(
strip_params: bool = False, strip_params: bool = False,
): ):
"""Update existing bookmark `bookmark_key` for user `user_key`.""" """Update existing bookmark `bookmark_key` for user `user_key`."""
bookmark_db = session.get(Bookmark, {'url_hash': url_hash, 'userkey': user_key}) bookmark_db = session.exec(
select(Bookmark).where(
Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
).first()
if not bookmark_db: if not bookmark_db:
raise HTTPException(status_code=404, detail='Bookmark not found') raise HTTPException(status_code=404, detail='Bookmark not found')
# Auto-fill title, fix tags etc.
bookmark.update(request, strip_params)
bookmark.modified_date = datetime.now(UTC) bookmark.modified_date = datetime.now(UTC)
# 'patch' endpoint, which means that you can send only the data that you want to update, leaving the rest intact
bookmark_data = bookmark.model_dump(exclude_unset=True) bookmark_data = bookmark.model_dump(exclude_unset=True)
# Merge the changed fields into the existing object
bookmark_db.sqlmodel_update(bookmark_data) bookmark_db.sqlmodel_update(bookmark_data)
# Autofill title, fix tags, etc. where (still) needed
update_bookmark_with_info(bookmark, request, strip_params)
session.add(bookmark_db) session.add(bookmark_db)
session.commit() session.commit()
session.refresh(bookmark_db) session.refresh(bookmark_db)