1
0
mirror of https://github.com/aquatix/digimarks.git synced 2025-12-06 20:55:10 +01:00

2 Commits

5 changed files with 277 additions and 158 deletions

View File

@@ -1,78 +0,0 @@
"""Bookmark helper functions, like content scrapers, favicon extractor, updater functions."""
from urllib.parse import urlparse, urlunparse
import bs4
import httpx
from extract_favicon import from_html
from fastapi import Request
from pydantic import AnyUrl
from src.digimarks import tags_helpers
from src.digimarks.models import Bookmark
DIGIMARKS_USER_AGENT = 'digimarks/2.0.0-dev'
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
async def set_information_from_source(logger, bookmark: Bookmark, request: Request) -> Bookmark:
"""Request the title by requesting the source url."""
logger.info('Extracting information from url %s', bookmark.url)
try:
result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
bookmark.http_status = result.status_code
except httpx.HTTPError as err:
# For example, "MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?"
logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
bookmark.http_status = 404
bookmark.title = ''
return bookmark
if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
bookmark.title = html.title.text.strip()
except AttributeError:
bookmark.title = ''
url_parts = urlparse(str(bookmark.url))
root_url = url_parts.scheme + '://' + url_parts.netloc
favicon = get_favicon(result.text, root_url)
# filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
# with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# Extraction was successful
logger.info('Extracting information was successful')
return bookmark
def strip_url_params(url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from.
:return: clean URL
:rtype: str
"""
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
"""Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code
set_information_from_source(bookmark, request)
if strip_params:
# Strip URL parameters, e.g., tracking params
bookmark.url = AnyUrl(strip_url_params(str(bookmark.url)))
# Sort and deduplicate tags
tags_helpers.set_tags(bookmark, bookmark.tags)

View File

@@ -0,0 +1,213 @@
"""Bookmark helper functions, like content scrapers, favicon extractor, updater functions."""
import logging
from datetime import UTC, datetime
from typing import Annotated, Sequence
from urllib.parse import urlparse, urlunparse
import bs4
import httpx
from extract_favicon import from_html
from fastapi import Query, Request
from pydantic import AnyUrl
from sqlmodel import select
from src.digimarks import tags_service, utils
from src.digimarks.exceptions import BookmarkNotFound
from src.digimarks.models import Bookmark, Visibility
DIGIMARKS_USER_AGENT = 'digimarks/2.0.0-dev'
logger = logging.getLogger('digimarks')
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
async def set_information_from_source(logger, bookmark: Bookmark, request: Request) -> Bookmark:
"""Request the title by requesting the source url."""
logger.info('Extracting information from url %s', bookmark.url)
try:
result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
bookmark.http_status = result.status_code
except httpx.HTTPError as err:
# For example, "MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?"
logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
bookmark.http_status = 404
bookmark.title = ''
return bookmark
if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
bookmark.title = html.title.text.strip()
except AttributeError:
bookmark.title = ''
url_parts = urlparse(str(bookmark.url))
root_url = url_parts.scheme + '://' + url_parts.netloc
favicon = get_favicon(result.text, root_url)
# filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
# with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# Extraction was successful
logger.info('Extracting information was successful')
return bookmark
def strip_url_params(url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from.
:return: clean URL
:rtype: str
"""
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
"""Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code
set_information_from_source(bookmark, request)
if strip_params:
# Strip URL parameters, e.g., tracking params
bookmark.url = AnyUrl(strip_url_params(str(bookmark.url)))
# Sort and deduplicate tags
tags_service.set_tags(bookmark, bookmark.tags)
async def list_bookmarks_for_user(
session,
user_key: str,
offset: int = 0,
limit: Annotated[int, Query(le=10000)] = 100,
) -> Sequence[Bookmark]:
"""List all bookmarks in the database. By default, 100 items are returned."""
result = await session.exec(
select(Bookmark)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
.offset(offset)
.limit(limit)
)
bookmarks = result.all()
return bookmarks
async def get_bookmark_for_user_with_url_hash(session, user_key: str, url_hash: str) -> Bookmark:
"""Get a bookmark from the database by its URL hash."""
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
if not result.first():
raise BookmarkNotFound(f'url_hash: {url_hash}')
return result.first()
async def autocomplete_bookmark(
session,
request: Request,
user_key: str,
bookmark: Bookmark,
strip_params: bool = False,
):
"""Autofill some fields for this (new) bookmark for user `user_key`."""
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
url_hash = utils.generate_hash(str(bookmark.url))
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
if bookmark_db:
# Bookmark with this URL already exists, provide the hash so the frontend can look it up and the user can
# merge them if so wanted
bookmark.url_hash = url_hash
return bookmark
async def add_bookmark(
session,
request: Request,
user_key: str,
bookmark: Bookmark,
strip_params: bool = False,
):
"""Add new bookmark for user `user_key`."""
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = utils.generate_hash(str(bookmark.url))
logger.info('Adding bookmark %s for user %s', bookmark.url_hash, user_key)
session.add(bookmark)
await session.commit()
await session.refresh(bookmark)
return bookmark
async def update_bookmark(
session,
request: Request,
user_key: str,
bookmark: Bookmark,
url_hash: str,
strip_params: bool = False,
):
"""Update existing bookmark `bookmark_key` for user `user_key`."""
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
if not bookmark_db:
raise BookmarkNotFound(message='Bookmark with hash {url_hash} not found')
bookmark.modified_date = datetime.now(UTC)
# 'patch' endpoint, which means that you can send only the data that you want to update, leaving the rest intact
bookmark_data = bookmark.model_dump(exclude_unset=True)
# Merge the changed fields into the existing object
bookmark_db.sqlmodel_update(bookmark_data)
# Autofill title, fix tags, etc. where (still) needed
update_bookmark_with_info(bookmark, request, strip_params)
session.add(bookmark_db)
await session.commit()
await session.refresh(bookmark_db)
return bookmark_db
async def delete_bookmark(
session,
user_key: str,
url_hash: str,
):
"""(Soft)Delete bookmark `bookmark_key` for user `user_key`."""
result = await session.get(Bookmark, {'url_hash': url_hash, 'user_key': user_key})
bookmark = result
if not bookmark:
raise BookmarkNotFound(message='Bookmark with hash {url_hash} not found')
bookmark.deleted_date = datetime.now(UTC)
bookmark.status = Visibility.DELETED
session.add(bookmark)
await session.commit()

View File

@@ -0,0 +1,21 @@
"""Exceptions that could be encountered managing digimarks."""
class BookmarkNotFound(Exception):
def __init__(self, message='Bookmark not found'):
"""Initialise the exception.
:param str message: The message for the exception
"""
super().__init__(message)
self.message = message
class BookmarkAlreadyExists(Exception):
def __init__(self, message='Bookmark already exists'):
"""Initialise the exception.
:param str message: The message for the exception
"""
super().__init__(message)
self.message = message

View File

@@ -18,7 +18,8 @@ from sqlalchemy.orm import sessionmaker
from sqlmodel import desc, select from sqlmodel import desc, select
from sqlmodel.ext.asyncio.session import AsyncSession from sqlmodel.ext.asyncio.session import AsyncSession
from src.digimarks import bookmarks_helpers, tags_helpers, utils from src.digimarks import bookmarks_service, tags_service
from src.digimarks.exceptions import BookmarkNotFound
from src.digimarks.models import DEFAULT_THEME, Bookmark, User, Visibility from src.digimarks.models import DEFAULT_THEME, Bookmark, User, Visibility
DIGIMARKS_VERSION = '2.0.0a1' DIGIMARKS_VERSION = '2.0.0a1'
@@ -72,6 +73,11 @@ app.mount('/content/favicons', StaticFiles(directory=settings.favicons_dir), nam
templates = Jinja2Templates(directory=settings.template_dir) templates = Jinja2Templates(directory=settings.template_dir)
# Set up logging # Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
logger = logging.getLogger('digimarks') logger = logging.getLogger('digimarks')
if settings.debug: if settings.debug:
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
@@ -109,6 +115,7 @@ def file_type(filename: str) -> str:
@app.head('/', response_class=HTMLResponse) @app.head('/', response_class=HTMLResponse)
def index(request: Request): def index(request: Request):
"""Homepage, point visitors to project page.""" """Homepage, point visitors to project page."""
logger.info('Root page requested')
return templates.TemplateResponse( return templates.TemplateResponse(
request=request, request=request,
name='index.html', name='index.html',
@@ -119,11 +126,15 @@ def index(request: Request):
@app.get('/api/v1/admin/{system_key}/users/{user_id}', response_model=User) @app.get('/api/v1/admin/{system_key}/users/{user_id}', response_model=User)
async def get_user(session: SessionDep, system_key: str, user_id: int) -> Type[User]: async def get_user(session: SessionDep, system_key: str, user_id: int) -> Type[User]:
"""Show user information.""" """Show user information."""
logger.info('User %d requested', user_id)
if system_key != settings.system_key: if system_key != settings.system_key:
logger.error('User %s requested but incorrect system key %s provided', user_id, system_key)
raise HTTPException(status_code=404) raise HTTPException(status_code=404)
user = session.get(User, user_id) result = await session.get(User, user_id)
user = result
if not user: if not user:
logger.error('User %s not found', user_id)
raise HTTPException(status_code=404, detail='User not found') raise HTTPException(status_code=404, detail='User not found')
return user return user
@@ -145,12 +156,13 @@ async def list_users(
:return: list of users in the system :return: list of users in the system
:rtype: list[User] :rtype: list[User]
""" """
logger.info('User listing requested')
if system_key != settings.system_key: if system_key != settings.system_key:
logger.error('User listing requested but incorrect system key %s provided', system_key)
raise HTTPException(status_code=404) raise HTTPException(status_code=404)
result = await session.exec(select(User).offset(offset).limit(limit)) result = await session.exec(select(User).offset(offset).limit(limit))
users = result.all() return result.all()
return users
@app.get('/api/v1/{user_key}/bookmarks/') @app.get('/api/v1/{user_key}/bookmarks/')
@@ -159,16 +171,10 @@ async def list_bookmarks(
user_key: str, user_key: str,
offset: int = 0, offset: int = 0,
limit: Annotated[int, Query(le=10000)] = 100, limit: Annotated[int, Query(le=10000)] = 100,
) -> list[Bookmark]: ) -> Sequence[Bookmark]:
"""List all bookmarks in the database. By default, 100 items are returned.""" """List all bookmarks in the database. By default, 100 items are returned."""
result = await session.exec( logger.info('List bookmarks for user %s with offset %d, limit %d', user_key, offset, limit)
select(Bookmark) return await bookmarks_service.list_bookmarks_for_user(session, user_key, offset, limit)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
.offset(offset)
.limit(limit)
)
bookmarks = result.all()
return bookmarks
@app.get('/api/v1/{user_key}/bookmarks/{url_hash}') @app.get('/api/v1/{user_key}/bookmarks/{url_hash}')
@@ -178,13 +184,12 @@ async def get_bookmark(
url_hash: str, url_hash: str,
) -> Bookmark: ) -> Bookmark:
"""Show bookmark details.""" """Show bookmark details."""
result = await session.exec( logger.info('Bookmark details for user %s with url_hash %s', user_key, url_hash)
select(Bookmark).where( try:
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED return await bookmarks_service.get_bookmark_for_user_with_url_hash(session, user_key, url_hash)
) except BookmarkNotFound as exc:
) logger.error('Bookmark not found: %s', exc)
bookmark = result.first() raise HTTPException(status_code=404, detail=f'Bookmark not found: {exc.message}')
return bookmark
@app.post('/api/v1/{user_key}/autocomplete_bookmark/', response_model=Bookmark) @app.post('/api/v1/{user_key}/autocomplete_bookmark/', response_model=Bookmark)
@@ -196,24 +201,8 @@ async def autocomplete_bookmark(
strip_params: bool = False, strip_params: bool = False,
): ):
"""Autofill some fields for this (new) bookmark for user `user_key`.""" """Autofill some fields for this (new) bookmark for user `user_key`."""
bookmark.user_key = user_key logger.info('Autocompleting bookmark %s for user %s', bookmark.url_hash, user_key)
return await bookmarks_service.autocomplete_bookmark(session, request, user_key, bookmark, strip_params)
# Auto-fill title, fix tags etc.
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
url_hash = utils.generate_hash(str(bookmark.url))
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
if bookmark_db:
# Bookmark with this URL already exists, provide the hash so the frontend can look it up and the user can
# merge them if so wanted
bookmark.url_hash = url_hash
return bookmark
@app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark) @app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark)
@@ -225,16 +214,8 @@ async def add_bookmark(
strip_params: bool = False, strip_params: bool = False,
): ):
"""Add new bookmark for user `user_key`.""" """Add new bookmark for user `user_key`."""
bookmark.user_key = user_key logger.info('Adding bookmark %s for user %s', bookmark.url, user_key)
return await bookmarks_service.add_bookmark(session, request, user_key, bookmark, strip_params)
# Auto-fill title, fix tags etc.
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = utils.generate_hash(str(bookmark.url))
session.add(bookmark)
await session.commit()
await session.refresh(bookmark)
return bookmark
@app.patch('/api/v1/{user_key}/bookmarks/{url_hash}', response_model=Bookmark) @app.patch('/api/v1/{user_key}/bookmarks/{url_hash}', response_model=Bookmark)
@@ -247,30 +228,13 @@ async def update_bookmark(
strip_params: bool = False, strip_params: bool = False,
): ):
"""Update existing bookmark `bookmark_key` for user `user_key`.""" """Update existing bookmark `bookmark_key` for user `user_key`."""
result = await session.exec( logger.info('Updating bookmark %s for user %s', url_hash, user_key)
select(Bookmark).where( try:
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED return await bookmarks_service.update_bookmark(session, request, user_key, bookmark, url_hash, strip_params)
) except Exception:
) logger.exception('Failed to update bookmark %s', bookmark.id)
bookmark_db = result.first()
if not bookmark_db:
raise HTTPException(status_code=404, detail='Bookmark not found') raise HTTPException(status_code=404, detail='Bookmark not found')
bookmark.modified_date = datetime.now(UTC)
# 'patch' endpoint, which means that you can send only the data that you want to update, leaving the rest intact
bookmark_data = bookmark.model_dump(exclude_unset=True)
# Merge the changed fields into the existing object
bookmark_db.sqlmodel_update(bookmark_data)
# Autofill title, fix tags, etc. where (still) needed
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
session.add(bookmark_db)
await session.commit()
await session.refresh(bookmark_db)
return bookmark_db
@app.delete('/api/v1/{user_key}/bookmarks/{url_hash}', response_model=Bookmark) @app.delete('/api/v1/{user_key}/bookmarks/{url_hash}', response_model=Bookmark)
async def delete_bookmark( async def delete_bookmark(
@@ -279,15 +243,13 @@ async def delete_bookmark(
url_hash: str, url_hash: str,
): ):
"""(Soft)Delete bookmark `bookmark_key` for user `user_key`.""" """(Soft)Delete bookmark `bookmark_key` for user `user_key`."""
result = await session.get(Bookmark, {'url_hash': url_hash, 'user_key': user_key}) logger.info('Deleting bookmark %s for user %s', url_hash, user_key)
bookmark = result try:
if not bookmark: result = await bookmarks_service.delete_bookmark(session, user_key, url_hash)
return {'ok': True}
except Exception:
logger.exception('Failed to delete bookmark %s', url_hash)
raise HTTPException(status_code=404, detail='Bookmark not found') raise HTTPException(status_code=404, detail='Bookmark not found')
bookmark.deleted_date = datetime.now(UTC)
bookmark.status = Visibility.DELETED
session.add(bookmark)
await session.commit()
return {'ok': True}
@app.get('/api/v1/{user_key}/latest_changes/') @app.get('/api/v1/{user_key}/latest_changes/')
@@ -296,6 +258,7 @@ async def bookmarks_changed_since(
user_key: str, user_key: str,
): ):
"""Last update on server, so the (browser) client knows whether to fetch an update.""" """Last update on server, so the (browser) client knows whether to fetch an update."""
logger.info('Retrieving latest changes for user %s', user_key)
result = await session.exec( result = await session.exec(
select(Bookmark) select(Bookmark)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED) .where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
@@ -332,7 +295,7 @@ async def list_tags_for_user(
tags = [] tags = []
for bookmark in bookmarks: for bookmark in bookmarks:
tags += bookmark.tag_list tags += bookmark.tag_list
return tags_helpers.clean_tags(tags) return tags_service.clean_tags(tags)
@app.get('/api/v1/{user_key}/tags/{tag_key}') @app.get('/api/v1/{user_key}/tags/{tag_key}')
@@ -343,7 +306,7 @@ async def list_bookmarks_for_tag_for_user(
"""List all tags in use by the user.""" """List all tags in use by the user."""
result = await session.exec(select(Bookmark).where(Bookmark.user_key == user_key)) result = await session.exec(select(Bookmark).where(Bookmark.user_key == user_key))
bookmarks = result.all() bookmarks = result.all()
return tags_helpers.list_tags_for_bookmarks(bookmarks) return tags_service.list_tags_for_bookmarks(bookmarks)
@app.get('/{user_key}', response_class=HTMLResponse) @app.get('/{user_key}', response_class=HTMLResponse)