1
0
mirror of https://github.com/aquatix/digimarks.git synced 2025-12-06 19:45:12 +01:00

2 Commits

5 changed files with 277 additions and 158 deletions

View File

@@ -1,78 +0,0 @@
"""Bookmark helper functions, like content scrapers, favicon extractor, updater functions."""
from urllib.parse import urlparse, urlunparse
import bs4
import httpx
from extract_favicon import from_html
from fastapi import Request
from pydantic import AnyUrl
from src.digimarks import tags_helpers
from src.digimarks.models import Bookmark
DIGIMARKS_USER_AGENT = 'digimarks/2.0.0-dev'
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
async def set_information_from_source(logger, bookmark: Bookmark, request: Request) -> Bookmark:
"""Request the title by requesting the source url."""
logger.info('Extracting information from url %s', bookmark.url)
try:
result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
bookmark.http_status = result.status_code
except httpx.HTTPError as err:
# For example, "MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?"
logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
bookmark.http_status = 404
bookmark.title = ''
return bookmark
if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
bookmark.title = html.title.text.strip()
except AttributeError:
bookmark.title = ''
url_parts = urlparse(str(bookmark.url))
root_url = url_parts.scheme + '://' + url_parts.netloc
favicon = get_favicon(result.text, root_url)
# filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
# with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# Extraction was successful
logger.info('Extracting information was successful')
return bookmark
def strip_url_params(url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from.
:return: clean URL
:rtype: str
"""
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
"""Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code
set_information_from_source(bookmark, request)
if strip_params:
# Strip URL parameters, e.g., tracking params
bookmark.url = AnyUrl(strip_url_params(str(bookmark.url)))
# Sort and deduplicate tags
tags_helpers.set_tags(bookmark, bookmark.tags)

View File

@@ -0,0 +1,213 @@
"""Bookmark helper functions, like content scrapers, favicon extractor, updater functions."""
import logging
from datetime import UTC, datetime
from typing import Annotated, Sequence
from urllib.parse import urlparse, urlunparse
import bs4
import httpx
from extract_favicon import from_html
from fastapi import Query, Request
from pydantic import AnyUrl
from sqlmodel import select
from src.digimarks import tags_service, utils
from src.digimarks.exceptions import BookmarkNotFound
from src.digimarks.models import Bookmark, Visibility
DIGIMARKS_USER_AGENT = 'digimarks/2.0.0-dev'
logger = logging.getLogger('digimarks')
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
async def set_information_from_source(logger, bookmark: Bookmark, request: Request) -> Bookmark:
"""Request the title by requesting the source url."""
logger.info('Extracting information from url %s', bookmark.url)
try:
result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
bookmark.http_status = result.status_code
except httpx.HTTPError as err:
# For example, "MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?"
logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
bookmark.http_status = 404
bookmark.title = ''
return bookmark
if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
bookmark.title = html.title.text.strip()
except AttributeError:
bookmark.title = ''
url_parts = urlparse(str(bookmark.url))
root_url = url_parts.scheme + '://' + url_parts.netloc
favicon = get_favicon(result.text, root_url)
# filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
# with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# Extraction was successful
logger.info('Extracting information was successful')
return bookmark
def strip_url_params(url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from.
:return: clean URL
:rtype: str
"""
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
"""Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code
set_information_from_source(bookmark, request)
if strip_params:
# Strip URL parameters, e.g., tracking params
bookmark.url = AnyUrl(strip_url_params(str(bookmark.url)))
# Sort and deduplicate tags
tags_service.set_tags(bookmark, bookmark.tags)
async def list_bookmarks_for_user(
session,
user_key: str,
offset: int = 0,
limit: Annotated[int, Query(le=10000)] = 100,
) -> Sequence[Bookmark]:
"""List all bookmarks in the database. By default, 100 items are returned."""
result = await session.exec(
select(Bookmark)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
.offset(offset)
.limit(limit)
)
bookmarks = result.all()
return bookmarks
async def get_bookmark_for_user_with_url_hash(session, user_key: str, url_hash: str) -> Bookmark:
"""Get a bookmark from the database by its URL hash."""
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
if not result.first():
raise BookmarkNotFound(f'url_hash: {url_hash}')
return result.first()
async def autocomplete_bookmark(
session,
request: Request,
user_key: str,
bookmark: Bookmark,
strip_params: bool = False,
):
"""Autofill some fields for this (new) bookmark for user `user_key`."""
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
url_hash = utils.generate_hash(str(bookmark.url))
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
if bookmark_db:
# Bookmark with this URL already exists, provide the hash so the frontend can look it up and the user can
# merge them if so wanted
bookmark.url_hash = url_hash
return bookmark
async def add_bookmark(
session,
request: Request,
user_key: str,
bookmark: Bookmark,
strip_params: bool = False,
):
"""Add new bookmark for user `user_key`."""
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = utils.generate_hash(str(bookmark.url))
logger.info('Adding bookmark %s for user %s', bookmark.url_hash, user_key)
session.add(bookmark)
await session.commit()
await session.refresh(bookmark)
return bookmark
async def update_bookmark(
session,
request: Request,
user_key: str,
bookmark: Bookmark,
url_hash: str,
strip_params: bool = False,
):
"""Update existing bookmark `bookmark_key` for user `user_key`."""
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
if not bookmark_db:
raise BookmarkNotFound(message='Bookmark with hash {url_hash} not found')
bookmark.modified_date = datetime.now(UTC)
# 'patch' endpoint, which means that you can send only the data that you want to update, leaving the rest intact
bookmark_data = bookmark.model_dump(exclude_unset=True)
# Merge the changed fields into the existing object
bookmark_db.sqlmodel_update(bookmark_data)
# Autofill title, fix tags, etc. where (still) needed
update_bookmark_with_info(bookmark, request, strip_params)
session.add(bookmark_db)
await session.commit()
await session.refresh(bookmark_db)
return bookmark_db
async def delete_bookmark(
session,
user_key: str,
url_hash: str,
):
"""(Soft)Delete bookmark `bookmark_key` for user `user_key`."""
result = await session.get(Bookmark, {'url_hash': url_hash, 'user_key': user_key})
bookmark = result
if not bookmark:
raise BookmarkNotFound(message='Bookmark with hash {url_hash} not found')
bookmark.deleted_date = datetime.now(UTC)
bookmark.status = Visibility.DELETED
session.add(bookmark)
await session.commit()

View File

@@ -0,0 +1,21 @@
"""Exceptions that could be encountered managing digimarks."""
class BookmarkNotFound(Exception):
def __init__(self, message='Bookmark not found'):
"""Initialise the exception.
:param str message: The message for the exception
"""
super().__init__(message)
self.message = message
class BookmarkAlreadyExists(Exception):
def __init__(self, message='Bookmark already exists'):
"""Initialise the exception.
:param str message: The message for the exception
"""
super().__init__(message)
self.message = message

View File

@@ -18,7 +18,8 @@ from sqlalchemy.orm import sessionmaker
from sqlmodel import desc, select
from sqlmodel.ext.asyncio.session import AsyncSession
from src.digimarks import bookmarks_helpers, tags_helpers, utils
from src.digimarks import bookmarks_service, tags_service
from src.digimarks.exceptions import BookmarkNotFound
from src.digimarks.models import DEFAULT_THEME, Bookmark, User, Visibility
DIGIMARKS_VERSION = '2.0.0a1'
@@ -72,6 +73,11 @@ app.mount('/content/favicons', StaticFiles(directory=settings.favicons_dir), nam
templates = Jinja2Templates(directory=settings.template_dir)
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
logger = logging.getLogger('digimarks')
if settings.debug:
logger.setLevel(logging.DEBUG)
@@ -109,6 +115,7 @@ def file_type(filename: str) -> str:
@app.head('/', response_class=HTMLResponse)
def index(request: Request):
"""Homepage, point visitors to project page."""
logger.info('Root page requested')
return templates.TemplateResponse(
request=request,
name='index.html',
@@ -119,11 +126,15 @@ def index(request: Request):
@app.get('/api/v1/admin/{system_key}/users/{user_id}', response_model=User)
async def get_user(session: SessionDep, system_key: str, user_id: int) -> Type[User]:
"""Show user information."""
logger.info('User %d requested', user_id)
if system_key != settings.system_key:
logger.error('User %s requested but incorrect system key %s provided', user_id, system_key)
raise HTTPException(status_code=404)
user = session.get(User, user_id)
result = await session.get(User, user_id)
user = result
if not user:
logger.error('User %s not found', user_id)
raise HTTPException(status_code=404, detail='User not found')
return user
@@ -145,12 +156,13 @@ async def list_users(
:return: list of users in the system
:rtype: list[User]
"""
logger.info('User listing requested')
if system_key != settings.system_key:
logger.error('User listing requested but incorrect system key %s provided', system_key)
raise HTTPException(status_code=404)
result = await session.exec(select(User).offset(offset).limit(limit))
users = result.all()
return users
return result.all()
@app.get('/api/v1/{user_key}/bookmarks/')
@@ -159,16 +171,10 @@ async def list_bookmarks(
user_key: str,
offset: int = 0,
limit: Annotated[int, Query(le=10000)] = 100,
) -> list[Bookmark]:
) -> Sequence[Bookmark]:
"""List all bookmarks in the database. By default, 100 items are returned."""
result = await session.exec(
select(Bookmark)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
.offset(offset)
.limit(limit)
)
bookmarks = result.all()
return bookmarks
logger.info('List bookmarks for user %s with offset %d, limit %d', user_key, offset, limit)
return await bookmarks_service.list_bookmarks_for_user(session, user_key, offset, limit)
@app.get('/api/v1/{user_key}/bookmarks/{url_hash}')
@@ -178,13 +184,12 @@ async def get_bookmark(
url_hash: str,
) -> Bookmark:
"""Show bookmark details."""
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark = result.first()
return bookmark
logger.info('Bookmark details for user %s with url_hash %s', user_key, url_hash)
try:
return await bookmarks_service.get_bookmark_for_user_with_url_hash(session, user_key, url_hash)
except BookmarkNotFound as exc:
logger.error('Bookmark not found: %s', exc)
raise HTTPException(status_code=404, detail=f'Bookmark not found: {exc.message}')
@app.post('/api/v1/{user_key}/autocomplete_bookmark/', response_model=Bookmark)
@@ -196,24 +201,8 @@ async def autocomplete_bookmark(
strip_params: bool = False,
):
"""Autofill some fields for this (new) bookmark for user `user_key`."""
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
url_hash = utils.generate_hash(str(bookmark.url))
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
if bookmark_db:
# Bookmark with this URL already exists, provide the hash so the frontend can look it up and the user can
# merge them if so wanted
bookmark.url_hash = url_hash
return bookmark
logger.info('Autocompleting bookmark %s for user %s', bookmark.url_hash, user_key)
return await bookmarks_service.autocomplete_bookmark(session, request, user_key, bookmark, strip_params)
@app.post('/api/v1/{user_key}/bookmarks/', response_model=Bookmark)
@@ -225,16 +214,8 @@ async def add_bookmark(
strip_params: bool = False,
):
"""Add new bookmark for user `user_key`."""
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = utils.generate_hash(str(bookmark.url))
session.add(bookmark)
await session.commit()
await session.refresh(bookmark)
return bookmark
logger.info('Adding bookmark %s for user %s', bookmark.url, user_key)
return await bookmarks_service.add_bookmark(session, request, user_key, bookmark, strip_params)
@app.patch('/api/v1/{user_key}/bookmarks/{url_hash}', response_model=Bookmark)
@@ -247,30 +228,13 @@ async def update_bookmark(
strip_params: bool = False,
):
"""Update existing bookmark `bookmark_key` for user `user_key`."""
result = await session.exec(
select(Bookmark).where(
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
if not bookmark_db:
logger.info('Updating bookmark %s for user %s', url_hash, user_key)
try:
return await bookmarks_service.update_bookmark(session, request, user_key, bookmark, url_hash, strip_params)
except Exception:
logger.exception('Failed to update bookmark %s', bookmark.id)
raise HTTPException(status_code=404, detail='Bookmark not found')
bookmark.modified_date = datetime.now(UTC)
# 'patch' endpoint, which means that you can send only the data that you want to update, leaving the rest intact
bookmark_data = bookmark.model_dump(exclude_unset=True)
# Merge the changed fields into the existing object
bookmark_db.sqlmodel_update(bookmark_data)
# Autofill title, fix tags, etc. where (still) needed
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
session.add(bookmark_db)
await session.commit()
await session.refresh(bookmark_db)
return bookmark_db
@app.delete('/api/v1/{user_key}/bookmarks/{url_hash}', response_model=Bookmark)
async def delete_bookmark(
@@ -279,15 +243,13 @@ async def delete_bookmark(
url_hash: str,
):
"""(Soft)Delete bookmark `bookmark_key` for user `user_key`."""
result = await session.get(Bookmark, {'url_hash': url_hash, 'user_key': user_key})
bookmark = result
if not bookmark:
logger.info('Deleting bookmark %s for user %s', url_hash, user_key)
try:
result = await bookmarks_service.delete_bookmark(session, user_key, url_hash)
return {'ok': True}
except Exception:
logger.exception('Failed to delete bookmark %s', url_hash)
raise HTTPException(status_code=404, detail='Bookmark not found')
bookmark.deleted_date = datetime.now(UTC)
bookmark.status = Visibility.DELETED
session.add(bookmark)
await session.commit()
return {'ok': True}
@app.get('/api/v1/{user_key}/latest_changes/')
@@ -296,6 +258,7 @@ async def bookmarks_changed_since(
user_key: str,
):
"""Last update on server, so the (browser) client knows whether to fetch an update."""
logger.info('Retrieving latest changes for user %s', user_key)
result = await session.exec(
select(Bookmark)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
@@ -332,7 +295,7 @@ async def list_tags_for_user(
tags = []
for bookmark in bookmarks:
tags += bookmark.tag_list
return tags_helpers.clean_tags(tags)
return tags_service.clean_tags(tags)
@app.get('/api/v1/{user_key}/tags/{tag_key}')
@@ -343,7 +306,7 @@ async def list_bookmarks_for_tag_for_user(
"""List all tags in use by the user."""
result = await session.exec(select(Bookmark).where(Bookmark.user_key == user_key))
bookmarks = result.all()
return tags_helpers.list_tags_for_bookmarks(bookmarks)
return tags_service.list_tags_for_bookmarks(bookmarks)
@app.get('/{user_key}', response_class=HTMLResponse)