1
0
mirror of https://github.com/aquatix/digimarks.git synced 2025-12-06 20:55:10 +01:00

7 Commits

8 changed files with 269 additions and 182 deletions

View File

@@ -48,6 +48,7 @@ def run_migrations_offline() -> None:
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={'paramstyle': 'named'},
render_as_batch=True,
)
with context.begin_transaction():
@@ -55,7 +56,11 @@ def run_migrations_offline() -> None:
def do_run_migrations(connection: Connection) -> None:
context.configure(connection=connection, target_metadata=target_metadata)
context.configure(
connection=connection,
target_metadata=target_metadata,
render_as_batch=True,
)
with context.begin_transaction():
context.run_migrations()

View File

@@ -0,0 +1,53 @@
"""Renamed keys
Revision ID: b8cbc6957df5
Revises: a8d8e45f60a1
Create Date: 2025-09-12 22:26:38.684120
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
import sqlmodel
# revision identifiers, used by Alembic.
revision: str = 'b8cbc6957df5'
down_revision: Union[str, Sequence[str], None] = 'a8d8e45f60a1'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('bookmark', schema=None) as batch_op:
batch_op.drop_constraint(batch_op.f('bookmark_user'), type_='foreignkey')
batch_op.alter_column('userkey', new_column_name='user_key')
batch_op.create_foreign_key('bookmark_user', 'user', ['user_key'], ['key'])
with op.batch_alter_table('publictag', schema=None) as batch_op:
batch_op.drop_constraint(batch_op.f('publictag_user'), type_='foreignkey')
batch_op.alter_column('userkey', new_column_name='user_key')
batch_op.alter_column('tagkey', new_column_name='tag_key')
batch_op.create_foreign_key('publictag_user', 'user', ['user_key'], ['key'])
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('publictag', schema=None) as batch_op:
batch_op.drop_constraint(batch_op.f('publictag_user'), type_='foreignkey')
batch_op.alter_column('user_key', new_column_name='userkey')
batch_op.alter_column('tag_key', new_column_name='tagkey')
batch_op.create_foreign_key('publictag_user', 'user', ['userkey'], ['key'])
with op.batch_alter_table('bookmark', schema=None) as batch_op:
batch_op.drop_constraint(batch_op.f('bookmark_user'), type_='foreignkey')
batch_op.alter_column('user_key', new_column_name='userkey')
batch_op.create_foreign_key('bookmark_user', 'user', ['userkey'], ['key'])
# ### end Alembic commands ###

View File

@@ -21,10 +21,14 @@ classifiers = [
dependencies = [
"importlib-metadata; python_version<'3.8'",
"fastapi[all]",
"sqlmodel",
"alembic",
"aiosqlite",
"pydantic>2.0",
"requests",
"bs4",
"feedgen"
"httpx",
"beautifulsoup4",
"extract_favicon",
"feedgen",
]
# dynamic = ["version"]

View File

@@ -0,0 +1,78 @@
"""Bookmark helper functions, like content scrapers, favicon extractor, updater functions."""
from urllib.parse import urlparse, urlunparse
import bs4
import httpx
from extract_favicon import from_html
from fastapi import Request
from pydantic import AnyUrl
from src.digimarks import tags_helpers
from src.digimarks.models import Bookmark
DIGIMARKS_USER_AGENT = 'digimarks/2.0.0-dev'
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
async def set_information_from_source(logger, bookmark: Bookmark, request: Request) -> Bookmark:
"""Request the title by requesting the source url."""
logger.info('Extracting information from url %s', bookmark.url)
try:
result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
bookmark.http_status = result.status_code
except httpx.HTTPError as err:
# For example, "MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?"
logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
bookmark.http_status = 404
bookmark.title = ''
return bookmark
if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
bookmark.title = html.title.text.strip()
except AttributeError:
bookmark.title = ''
url_parts = urlparse(str(bookmark.url))
root_url = url_parts.scheme + '://' + url_parts.netloc
favicon = get_favicon(result.text, root_url)
# filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
# with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# Extraction was successful
logger.info('Extracting information was successful')
return bookmark
def strip_url_params(url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from.
:return: clean URL
:rtype: str
"""
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
"""Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code
set_information_from_source(bookmark, request)
if strip_params:
# Strip URL parameters, e.g., tracking params
bookmark.url = AnyUrl(strip_url_params(str(bookmark.url)))
# Sort and deduplicate tags
tags_helpers.set_tags(bookmark, bookmark.tags)

View File

@@ -1,32 +1,26 @@
"""digimarks main module."""
import binascii
import hashlib
import logging
import os
from contextlib import asynccontextmanager
from datetime import UTC, datetime
from typing import Annotated, Sequence, Type
from urllib.parse import urlparse, urlunparse
import bs4
import httpx
from extract_favicon import from_html
from fastapi import Depends, FastAPI, HTTPException, Query, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pydantic import AnyUrl, DirectoryPath, FilePath
from pydantic import DirectoryPath, FilePath
from pydantic_settings import BaseSettings
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlmodel import desc, select
from sqlmodel.ext.asyncio.session import AsyncSession
from src.digimarks import bookmarks_helpers, tags_helpers, utils
from src.digimarks.models import DEFAULT_THEME, Bookmark, User, Visibility
DIGIMARKS_USER_AGENT = 'digimarks/2.0.0-dev'
DIGIMARKS_VERSION = '2.0.0a1'
@@ -77,6 +71,7 @@ app.mount('/static', StaticFiles(directory=settings.static_dir), name='static')
app.mount('/content/favicons', StaticFiles(directory=settings.favicons_dir), name='favicons')
templates = Jinja2Templates(directory=settings.template_dir)
# Set up logging
logger = logging.getLogger('digimarks')
if settings.debug:
logger.setLevel(logging.DEBUG)
@@ -91,61 +86,6 @@ app.add_middleware(
)
def i_filter_false(predicate, iterable):
"""Filter an iterable if predicate returns True.
i_filter_false(lambda x: x%2, range(10)) --> 0 2 4 6 8
"""
if predicate is None:
predicate = bool
for x in iterable:
if not predicate(x):
yield x
def unique_ever_seen(iterable, key=None):
"""List unique elements, preserving order. Remember all elements ever seen.
unique_ever_seen('AAAABBBCCDAABBB') --> A B C D
unique_ever_seen('ABBCcAD', str.lower) --> A B C D
"""
seen = set()
seen_add = seen.add
if key is None:
for element in i_filter_false(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def clean_tags(tags_list: list) -> list:
"""Generate a unique list of the tags.
:param list tags_list: List with all tags
:return: deduplicated list of the tags, without leading or trailing whitespace
:rtype: list
"""
tags_res = [x.strip() for x in tags_list]
tags_res = list(unique_ever_seen(tags_res))
tags_res.sort()
if tags_res and tags_res[0] == '':
del tags_res[0]
return tags_res
def list_tags_for_bookmarks(bookmarks: list) -> list:
"""Generate a unique list of the tags from the list of bookmarks."""
tags = []
for bookmark in bookmarks:
tags += bookmark.tags_list
return clean_tags(tags)
def file_type(filename: str) -> str:
"""Try to determine the file type for the file in `filename`.
@@ -165,91 +105,6 @@ def file_type(filename: str) -> str:
return 'no match'
def generate_hash(input_text: str) -> str:
"""Generate a hash from string `input`, e.g., for a URL."""
return hashlib.md5(input_text.encode('utf-8')).hexdigest()
def generate_key() -> str:
"""Generate a key to be used for a user or tag."""
return str(binascii.hexlify(os.urandom(24)))
def get_favicon(html_content: str, root_url: str) -> str:
"""Fetch the favicon from `html_content` using `root_url`."""
favicons = from_html(html_content, root_url=root_url, include_fallbacks=True)
for favicon in favicons:
print(favicon.url, favicon.width, favicon.height)
# TODO: save the preferred image to file and return
async def set_information_from_source(bookmark: Bookmark, request: Request) -> Bookmark:
"""Request the title by requesting the source url."""
logger.info('Extracting information from url %s', bookmark.url)
try:
result = await request.app.requests_client.get(bookmark.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
bookmark.http_status = result.status_code
except httpx.HTTPError as err:
# For example, 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
logger.error('Exception when trying to retrieve title for %s. Error: %s', bookmark.url, str(err))
bookmark.http_status = 404
bookmark.title = ''
return bookmark
if bookmark.http_status == 200 or bookmark.http_status == 202:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
bookmark.title = html.title.text.strip()
except AttributeError:
bookmark.title = ''
url_parts = urlparse(str(bookmark.url))
root_url = url_parts.scheme + '://' + url_parts.netloc
favicon = get_favicon(result.text, root_url)
# filename = os.path.join(settings.media_dir, 'favicons/', domain + file_extension)
# with open(filename, 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# Extraction was successful
logger.info('Extracting information was successful')
return bookmark
def set_tags(bookmark: Bookmark, new_tags: str) -> None:
"""Set tags from `tags`, strip and sort them.
:param Bookmark bookmark: Bookmark to modify
:param str new_tags: New tags to sort and set.
"""
tags_split = new_tags.split(',')
tags_clean = clean_tags(tags_split)
bookmark.tags = ','.join(tags_clean)
def strip_url_params(url: str) -> str:
"""Strip URL params from URL.
:param url: URL to strip URL params from.
:return: clean URL
:rtype: str
"""
parsed = urlparse(url)
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
def update_bookmark_with_info(bookmark: Bookmark, request: Request, strip_params: bool = False):
"""Automatically update title, favicon, etc."""
if not bookmark.title:
# Title was empty, automatically fetch it from the url, will also update the status code
set_information_from_source(bookmark, request)
if strip_params:
# Strip URL parameters, e.g., tracking params
bookmark.url = AnyUrl(strip_url_params(str(bookmark.url)))
# Sort and deduplicate tags
set_tags(bookmark, bookmark.tags)
@app.get('/', response_class=HTMLResponse)
@app.head('/', response_class=HTMLResponse)
def index(request: Request):
@@ -293,7 +148,8 @@ async def list_users(
if system_key != settings.system_key:
raise HTTPException(status_code=404)
users = session.exec(select(User).offset(offset).limit(limit)).all()
result = await session.exec(select(User).offset(offset).limit(limit))
users = result.all()
return users
@@ -304,10 +160,10 @@ async def list_bookmarks(
offset: int = 0,
limit: Annotated[int, Query(le=10000)] = 100,
) -> list[Bookmark]:
"""List all bookmarks in the database. By default 100 items are returned."""
"""List all bookmarks in the database. By default, 100 items are returned."""
result = await session.exec(
select(Bookmark)
.where(Bookmark.userkey == user_key, Bookmark.status != Visibility.DELETED)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
.offset(offset)
.limit(limit)
)
@@ -324,11 +180,10 @@ async def get_bookmark(
"""Show bookmark details."""
result = await session.exec(
select(Bookmark).where(
Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark = result.first()
# bookmark = session.get(Bookmark, {'url_hash': url_hash, 'userkey': user_key})
return bookmark
@@ -341,15 +196,15 @@ async def autocomplete_bookmark(
strip_params: bool = False,
):
"""Autofill some fields for this (new) bookmark for user `user_key`."""
bookmark.userkey = user_key
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
url_hash = generate_hash(str(bookmark.url))
url_hash = utils.generate_hash(str(bookmark.url))
result = await session.exec(
select(Bookmark).where(
Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
@@ -370,11 +225,11 @@ async def add_bookmark(
strip_params: bool = False,
):
"""Add new bookmark for user `user_key`."""
bookmark.userkey = user_key
bookmark.user_key = user_key
# Auto-fill title, fix tags etc.
update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = generate_hash(str(bookmark.url))
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
bookmark.url_hash = utils.generate_hash(str(bookmark.url))
session.add(bookmark)
await session.commit()
@@ -394,7 +249,7 @@ async def update_bookmark(
"""Update existing bookmark `bookmark_key` for user `user_key`."""
result = await session.exec(
select(Bookmark).where(
Bookmark.userkey == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
Bookmark.user_key == user_key, Bookmark.url_hash == url_hash, Bookmark.status != Visibility.DELETED
)
)
bookmark_db = result.first()
@@ -409,11 +264,11 @@ async def update_bookmark(
bookmark_db.sqlmodel_update(bookmark_data)
# Autofill title, fix tags, etc. where (still) needed
update_bookmark_with_info(bookmark, request, strip_params)
bookmarks_helpers.update_bookmark_with_info(bookmark, request, strip_params)
session.add(bookmark_db)
session.commit()
session.refresh(bookmark_db)
await session.commit()
await session.refresh(bookmark_db)
return bookmark_db
@@ -424,14 +279,14 @@ async def delete_bookmark(
url_hash: str,
):
"""(Soft)Delete bookmark `bookmark_key` for user `user_key`."""
result = await session.get(Bookmark, {'url_hash': url_hash, 'userkey': user_key})
result = await session.get(Bookmark, {'url_hash': url_hash, 'user_key': user_key})
bookmark = result
if not bookmark:
raise HTTPException(status_code=404, detail='Bookmark not found')
bookmark.deleted_date = datetime.now(UTC)
bookmark.status = Visibility.DELETED
session.add(bookmark)
session.commit()
await session.commit()
return {'ok': True}
@@ -443,13 +298,13 @@ async def bookmarks_changed_since(
"""Last update on server, so the (browser) client knows whether to fetch an update."""
result = await session.exec(
select(Bookmark)
.where(Bookmark.userkey == user_key, Bookmark.status != Visibility.DELETED)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
.order_by(desc(Bookmark.modified_date))
)
latest_modified_bookmark = result.first()
result = await session.exec(
select(Bookmark)
.where(Bookmark.userkey == user_key, Bookmark.status != Visibility.DELETED)
.where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
.order_by(desc(Bookmark.created_date))
)
latest_created_bookmark = result.first()
@@ -471,24 +326,24 @@ async def list_tags_for_user(
) -> list[str]:
"""List all tags in use by the user."""
result = await session.exec(
select(Bookmark).where(Bookmark.userkey == user_key, Bookmark.status != Visibility.DELETED)
select(Bookmark).where(Bookmark.user_key == user_key, Bookmark.status != Visibility.DELETED)
)
bookmarks = result.all()
tags = []
for bookmark in bookmarks:
tags += bookmark.tag_list
return clean_tags(tags)
return tags_helpers.clean_tags(tags)
@app.get('/api/v1/{user_key}/tags/{tag_key}')
async def list_tags_for_user(
async def list_bookmarks_for_tag_for_user(
session: SessionDep,
user_key: str,
) -> list[str]:
"""List all tags in use by the user."""
result = await session.exec(select(Bookmark).where(Bookmark.userkey == user_key))
result = await session.exec(select(Bookmark).where(Bookmark.user_key == user_key))
bookmarks = result.all()
return list_tags_for_bookmarks(bookmarks)
return tags_helpers.list_tags_for_bookmarks(bookmarks)
@app.get('/{user_key}', response_class=HTMLResponse)

View File

@@ -1,3 +1,8 @@
"""Models for digimarks.
Contains the bookmarks administration, users, tags, public tags and more.
"""
from datetime import UTC, datetime
from http import HTTPStatus
from typing import Optional, Type, TypeVar
@@ -25,6 +30,7 @@ class Visibility:
VISIBLE = 0
DELETED = 1
HIDDEN = 2
# Type var used for building custom types for the DB
@@ -66,12 +72,12 @@ class Bookmark(SQLModel, table=True):
__tablename__ = 'bookmark'
id: int = Field(primary_key=True)
userkey: str = Field(foreign_key='user.key')
title: str = Field(default='')
user_key: str = Field(foreign_key='user.key', nullable=False)
title: str = Field(default='', nullable=False)
url: AnyUrl = Field(default='', sa_type=build_custom_type(AnyUrl))
note: str = Field(default='', nullable=True)
# image: str = Field(default='')
url_hash: str = Field(default='')
url_hash: str = Field(default='', nullable=False)
tags: str = Field(default='')
starred: bool = Field(default=False)
@@ -101,7 +107,7 @@ class PublicTag(SQLModel, table=True):
__tablename__ = 'publictag'
id: int = Field(primary_key=True)
tagkey: str
userkey: str = Field(foreign_key='user.key')
tag_key: str
user_key: str = Field(foreign_key='user.key')
tag: str
created_date: datetime = Field(default=datetime.now(UTC))

View File

@@ -0,0 +1,71 @@
"""Helper functions for tags used with Bookmark models."""
from sqlalchemy import Sequence
from src.digimarks.models import Bookmark
def i_filter_false(predicate, iterable):
"""Filter an iterable if predicate returns True.
i_filter_false(lambda x: x%2, range(10)) --> 0 2 4 6 8
"""
if predicate is None:
predicate = bool
for x in iterable:
if not predicate(x):
yield x
def unique_ever_seen(iterable, key=None):
"""List unique elements, preserving order. Remember all elements ever seen.
unique_ever_seen('AAAABBBCCDAABBB') --> A B C D
unique_ever_seen('ABBCcAD', str.lower) --> A B C D
"""
seen = set()
seen_add = seen.add
if key is None:
for element in i_filter_false(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def clean_tags(tags_list: list) -> list[str]:
"""Generate a unique list of the tags.
:param list tags_list: List with all tags
:return: deduplicated list of the tags, without leading or trailing whitespace
:rtype: list
"""
tags_res = [x.strip() for x in tags_list]
tags_res = list(unique_ever_seen(tags_res))
tags_res.sort()
if tags_res and tags_res[0] == '':
del tags_res[0]
return tags_res
def list_tags_for_bookmarks(bookmarks: Sequence[Bookmark]) -> list[str]:
"""Generate a unique list of the tags from the list of bookmarks."""
tags = []
for bookmark in bookmarks:
tags += bookmark.tags_list
return clean_tags(tags)
def set_tags(bookmark: Bookmark, new_tags: str) -> None:
"""Set tags from `tags`, strip and sort them.
:param Bookmark bookmark: Bookmark to modify
:param str new_tags: New tags to sort and set.
"""
tags_split = new_tags.split(',')
tags_clean = clean_tags(tags_split)
bookmark.tags = ','.join(tags_clean)

15
src/digimarks/utils.py Normal file
View File

@@ -0,0 +1,15 @@
"""General utility functions."""
import binascii
import hashlib
import os
def generate_hash(input_text: str) -> str:
"""Generate a hash from string `input`, e.g., for a URL."""
return hashlib.md5(input_text.encode('utf-8')).hexdigest()
def generate_key() -> str:
"""Generate a key to be used for a user or tag."""
return str(binascii.hexlify(os.urandom(24)))