From ce2e1ebc6b2527dc31462edda337e967932cb065 Mon Sep 17 00:00:00 2001
From: Michiel Scholten <michiel@diginaut.net>
Date: Thu, 28 Feb 2019 13:54:29 +0100
Subject: [PATCH] Moved models to their own file

---
 digimarks/marks.py  | 308 ------------------------------------------
 digimarks/models.py | 319 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 319 insertions(+), 308 deletions(-)
 create mode 100644 digimarks/models.py

diff --git a/digimarks/marks.py b/digimarks/marks.py
index 84d876a..5a52bdd 100644
--- a/digimarks/marks.py
+++ b/digimarks/marks.py
@@ -1,28 +1,13 @@
 from __future__ import print_function
 
-import binascii
 import datetime
-import gzip
-import hashlib
 import os
-import shutil
 import sys
 
-import bs4
-import requests
 from flask import (Flask, abort, jsonify, redirect, render_template, request,
                    url_for)
-from peewee import *  # noqa
 from werkzeug.contrib.atom import AtomFeed
 
-try:
-    # Python 3
-    from urllib.parse import urljoin, urlparse, urlunparse
-except ImportError:
-    # Python 2
-    from urlparse import urljoin, urlparse, urlunparse
-
-
 DIGIMARKS_USER_AGENT = 'digimarks/1.2.0-dev'
 
 DEFAULT_THEME = 'freshgreen'
@@ -186,299 +171,6 @@ all_tags = {}
 usersettings = {}
 
 
-def ifilterfalse(predicate, iterable):
-    # ifilterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
-    if predicate is None:
-        predicate = bool
-    for x in iterable:
-        if not predicate(x):
-            yield x
-
-
-def unique_everseen(iterable, key=None):
-    "List unique elements, preserving order. Remember all elements ever seen."
-    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
-    # unique_everseen('ABBCcAD', str.lower) --> A B C D
-    seen = set()
-    seen_add = seen.add
-    if key is None:
-        for element in ifilterfalse(seen.__contains__, iterable):
-            seen_add(element)
-            yield element
-    else:
-        for element in iterable:
-            k = key(element)
-            if k not in seen:
-                seen_add(k)
-                yield element
-
-def clean_tags(tags_list):
-    tags_res = [x.strip() for x in tags_list]
-    tags_res = list(unique_everseen(tags_res))
-    tags_res.sort()
-    if tags_res and tags_res[0] == '':
-        del tags_res[0]
-    return tags_res
-
-
-magic_dict = {
-    b"\x1f\x8b\x08": "gz",
-    b"\x42\x5a\x68": "bz2",
-    b"\x50\x4b\x03\x04": "zip"
-    }
-
-max_len = max(len(x) for x in magic_dict)
-
-def file_type(filename):
-    with open(filename, "rb") as f:
-        file_start = f.read(max_len)
-    for magic, filetype in magic_dict.items():
-        if file_start.startswith(magic):
-            return filetype
-    return "no match"
-
-
-class BaseModel(Model):
-    class Meta:
-        database = database
-
-
-class User(BaseModel):
-    """ User account """
-    username = CharField()
-    key = CharField()
-    theme = CharField(default=DEFAULT_THEME)
-    created_date = DateTimeField(default=datetime.datetime.now)
-
-    def generate_key(self):
-        """ Generate userkey """
-        self.key = binascii.hexlify(os.urandom(24))
-        return self.key
-
-
-class Bookmark(BaseModel):
-    """ Bookmark instance, connected to User """
-    # Foreign key to User
-    userkey = CharField()
-
-    title = CharField(default='')
-    url = CharField()
-    note = TextField(default='')
-    #image = CharField(default='')
-    url_hash = CharField(default='')
-    tags = CharField(default='')
-    starred = BooleanField(default=False)
-
-    # Website (domain) favicon
-    favicon = CharField(null=True)
-
-    # Status code: 200 is OK, 404 is not found, for example (showing an error)
-    HTTP_CONNECTIONERROR = 0
-    HTTP_OK = 200
-    HTTP_ACCEPTED = 202
-    HTTP_MOVEDTEMPORARILY = 304
-    HTTP_NOTFOUND = 404
-
-    http_status = IntegerField(default=200)
-    redirect_uri = None
-
-    created_date = DateTimeField(default=datetime.datetime.now)
-    modified_date = DateTimeField(null=True)
-    deleted_date = DateTimeField(null=True)
-
-    # Bookmark status; deleting doesn't remove from DB
-    VISIBLE = 0
-    DELETED = 1
-    status = IntegerField(default=VISIBLE)
-
-
-    class Meta:
-        ordering = (('created_date', 'desc'),)
-
-    def set_hash(self):
-        """ Generate hash """
-        self.url_hash = hashlib.md5(self.url.encode('utf-8')).hexdigest()
-
-    def set_title_from_source(self):
-        """ Request the title by requesting the source url """
-        try:
-            result = requests.get(self.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
-            self.http_status = result.status_code
-        except:
-            # For example 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
-            self.http_status = 404
-        if self.http_status == 200 or self.http_status == 202:
-            html = bs4.BeautifulSoup(result.text, 'html.parser')
-            try:
-                self.title = html.title.text.strip()
-            except AttributeError:
-                self.title = ''
-        return self.title
-
-    def set_status_code(self):
-        """ Check the HTTP status of the url, as it might not exist for example """
-        try:
-            result = requests.head(self.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
-            self.http_status = result.status_code
-        except requests.ConnectionError:
-            self.http_status = self.HTTP_CONNECTIONERROR
-        return self.http_status
-
-    def _set_favicon_with_iconsbetterideaorg(self, domain):
-        """ Fetch favicon for the domain """
-        fileextension = '.png'
-        meta = requests.head(
-            'http://icons.better-idea.org/icon?size=60&url=' + domain,
-            allow_redirects=True,
-            headers={'User-Agent': DIGIMARKS_USER_AGENT}
-        )
-        if meta.url[-3:].lower() == 'ico':
-            fileextension = '.ico'
-        response = requests.get(
-            'http://icons.better-idea.org/icon?size=60&url=' + domain,
-            stream=True,
-            headers={'User-Agent': DIGIMARKS_USER_AGENT}
-        )
-        filename = os.path.join(MEDIA_ROOT, 'favicons/' + domain + fileextension)
-        with open(filename, 'wb') as out_file:
-            shutil.copyfileobj(response.raw, out_file)
-        del response
-        filetype = file_type(filename)
-        if filetype == 'gz':
-            # decompress
-            orig = gzip.GzipFile(filename, 'rb')
-            origcontent = orig.read()
-            orig.close()
-            os.remove(filename)
-            with open(filename, 'wb') as new:
-                new.write(origcontent)
-        self.favicon = domain + fileextension
-
-    def _set_favicon_with_realfavicongenerator(self, domain):
-        """ Fetch favicon for the domain """
-        response = requests.get(
-            'https://realfavicongenerator.p.mashape.com/favicon/icon?platform=android_chrome&site=' + domain,
-            stream=True,
-            headers={'User-Agent': DIGIMARKS_USER_AGENT, 'X-Mashape-Key': settings.MASHAPE_API_KEY}
-        )
-        if response.status_code == 404:
-            # Fall back to desktop favicon
-            response = requests.get(
-                'https://realfavicongenerator.p.mashape.com/favicon/icon?platform=desktop&site=' + domain,
-                stream=True,
-                headers={'User-Agent': DIGIMARKS_USER_AGENT, 'X-Mashape-Key': settings.MASHAPE_API_KEY}
-            )
-        # Debug for the moment
-        print(domain)
-        print(response.headers)
-        if 'Content-Length' in response.headers and response.headers['Content-Length'] == '0':
-            # No favicon found, likely
-            print('Skipping this favicon, needs fallback')
-            return
-        # Default to 'image/png'
-        fileextension = '.png'
-        if response.headers['content-type'] == 'image/jpeg':
-            fileextension = '.jpg'
-        if response.headers['content-type'] == 'image/x-icon':
-            fileextension = '.ico'
-        filename = os.path.join(MEDIA_ROOT, 'favicons/' + domain + fileextension)
-        with open(filename, 'wb') as out_file:
-            shutil.copyfileobj(response.raw, out_file)
-        del response
-        filetype = file_type(filename)
-        if filetype == 'gz':
-            # decompress
-            orig = gzip.GzipFile(filename, 'rb')
-            origcontent = orig.read()
-            orig.close()
-            os.remove(filename)
-            with open(filename, 'wb') as new:
-                new.write(origcontent)
-        self.favicon = domain + fileextension
-
-    def set_favicon(self):
-        """ Fetch favicon for the domain """
-        u = urlparse(self.url)
-        domain = u.netloc
-        if os.path.isfile(os.path.join(MEDIA_ROOT, 'favicons/' + domain + '.png')):
-            # If file exists, don't re-download it
-            self.favicon = domain + '.png'
-            return
-        if os.path.isfile(os.path.join(MEDIA_ROOT, 'favicons/' + domain + '.ico')):
-            # If file exists, don't re-download it
-            self.favicon = domain + '.ico'
-            return
-        #self._set_favicon_with_iconsbetterideaorg(domain)
-        self._set_favicon_with_realfavicongenerator(domain)
-
-    def set_tags(self, newtags):
-        """ Set tags from `tags`, strip and sort them """
-        tags_split = newtags.split(',')
-        tags_clean = clean_tags(tags_split)
-        self.tags = ','.join(tags_clean)
-
-    def get_redirect_uri(self):
-        if self.redirect_uri:
-            return self.redirect_uri
-        if self.http_status == 301 or self.http_status == 302:
-            result = requests.head(self.url, allow_redirects=True, headers={'User-Agent': DIGIMARKS_USER_AGENT})
-            self.http_status = result.status_code
-            self.redirect_uri = result.url
-            return result.url
-        return None
-
-    def get_uri_domain(self):
-        parsed = urlparse(self.url)
-        return parsed.hostname
-
-    @classmethod
-    def strip_url_params(cls, url):
-        parsed = urlparse(url)
-        return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
-
-    @property
-    def tags_list(self):
-        """ Get the tags as a list, iterable in template """
-        if self.tags:
-            return self.tags.split(',')
-        return []
-
-    def to_dict(self):
-        result = {
-            'title': self.title,
-            'url': self.url,
-            'created':  self.created_date.strftime('%Y-%m-%d %H:%M:%S'),
-            'url_hash': self.url_hash,
-            'tags': self.tags,
-        }
-        return result
-
-    @property
-    def serialize(self):
-        return self.to_dict()
-
-
-class PublicTag(BaseModel):
-    """ Publicly shared tag """
-    tagkey = CharField()
-    userkey = CharField()
-    tag = CharField()
-    created_date = DateTimeField(default=datetime.datetime.now)
-
-    def generate_key(self):
-        """ Generate hash-based key for publicly shared tag """
-        self.tagkey = binascii.hexlify(os.urandom(16))
-
-
-def get_tags_for_user(userkey):
-    """ Extract all tags from the bookmarks """
-    bookmarks = Bookmark.select().filter(Bookmark.userkey == userkey, Bookmark.status == Bookmark.VISIBLE)
-    tags = []
-    for bookmark in bookmarks:
-        tags += bookmark.tags_list
-    return clean_tags(tags)
-
-
 def get_cached_tags(userkey):
     """ Fail-safe way to get the cached tags for `userkey` """
     try:
diff --git a/digimarks/models.py b/digimarks/models.py
new file mode 100644
index 0000000..9c123ce
--- /dev/null
+++ b/digimarks/models.py
@@ -0,0 +1,319 @@
+"""digimarks data models and accompanying convenience functions"""
+import binascii
+import datetime
+import gzip
+import hashlib
+import os
+import shutil
+
+import bs4
+import requests
+from peewee import *  # noqa
+
+from . import themes
+
+try:
+    # Python 3
+    from urllib.parse import urljoin, urlparse, urlunparse
+except ImportError:
+    # Python 2
+    from urlparse import urljoin, urlparse, urlunparse
+
+
+DATABASE_PATH = os.path.dirname(os.path.realpath(__file__))
+if 'DIGIMARKS_DB_PATH' in os.environ:
+    DATABASE_PATH = os.environ['DIGIMARKS_DB_PATH']
+database = SqliteDatabase(os.path.join(DATABASE_PATH, 'bookmarks.db'))
+
+
+def ifilterfalse(predicate, iterable):
+    # ifilterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
+    if predicate is None:
+        predicate = bool
+    for x in iterable:
+        if not predicate(x):
+            yield x
+
+
+def unique_everseen(iterable, key=None):
+    "List unique elements, preserving order. Remember all elements ever seen."
+    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+    # unique_everseen('ABBCcAD', str.lower) --> A B C D
+    seen = set()
+    seen_add = seen.add
+    if key is None:
+        for element in ifilterfalse(seen.__contains__, iterable):
+            seen_add(element)
+            yield element
+    else:
+        for element in iterable:
+            k = key(element)
+            if k not in seen:
+                seen_add(k)
+                yield element
+
+def clean_tags(tags_list):
+    tags_res = [x.strip() for x in tags_list]
+    tags_res = list(unique_everseen(tags_res))
+    tags_res.sort()
+    if tags_res and tags_res[0] == '':
+        del tags_res[0]
+    return tags_res
+
+
+magic_dict = {
+    b"\x1f\x8b\x08": "gz",
+    b"\x42\x5a\x68": "bz2",
+    b"\x50\x4b\x03\x04": "zip"
+    }
+
+max_len = max(len(x) for x in magic_dict)
+
+def file_type(filename):
+    with open(filename, "rb") as f:
+        file_start = f.read(max_len)
+    for magic, filetype in magic_dict.items():
+        if file_start.startswith(magic):
+            return filetype
+    return "no match"
+
+
+class BaseModel(Model):
+    class Meta:
+        database = database
+
+
+class User(BaseModel):
+    """ User account """
+    username = CharField()
+    key = CharField()
+    theme = CharField(default=DEFAULT_THEME)
+    created_date = DateTimeField(default=datetime.datetime.now)
+
+    def generate_key(self):
+        """ Generate userkey """
+        self.key = binascii.hexlify(os.urandom(24))
+        return self.key
+
+
+class Bookmark(BaseModel):
+    """ Bookmark instance, connected to User """
+    # Foreign key to User
+    userkey = CharField()
+
+    title = CharField(default='')
+    url = CharField()
+    note = TextField(default='')
+    #image = CharField(default='')
+    url_hash = CharField(default='')
+    tags = CharField(default='')
+    starred = BooleanField(default=False)
+
+    # Website (domain) favicon
+    favicon = CharField(null=True)
+
+    # Status code: 200 is OK, 404 is not found, for example (showing an error)
+    HTTP_CONNECTIONERROR = 0
+    HTTP_OK = 200
+    HTTP_ACCEPTED = 202
+    HTTP_MOVEDTEMPORARILY = 304
+    HTTP_NOTFOUND = 404
+
+    http_status = IntegerField(default=200)
+    redirect_uri = None
+
+    created_date = DateTimeField(default=datetime.datetime.now)
+    modified_date = DateTimeField(null=True)
+    deleted_date = DateTimeField(null=True)
+
+    # Bookmark status; deleting doesn't remove from DB
+    VISIBLE = 0
+    DELETED = 1
+    status = IntegerField(default=VISIBLE)
+
+
+    class Meta:
+        ordering = (('created_date', 'desc'),)
+
+    def set_hash(self):
+        """ Generate hash """
+        self.url_hash = hashlib.md5(self.url.encode('utf-8')).hexdigest()
+
+    def set_title_from_source(self):
+        """ Request the title by requesting the source url """
+        try:
+            result = requests.get(self.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
+            self.http_status = result.status_code
+        except:
+            # For example 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
+            self.http_status = 404
+        if self.http_status == 200 or self.http_status == 202:
+            html = bs4.BeautifulSoup(result.text, 'html.parser')
+            try:
+                self.title = html.title.text.strip()
+            except AttributeError:
+                self.title = ''
+        return self.title
+
+    def set_status_code(self):
+        """ Check the HTTP status of the url, as it might not exist for example """
+        try:
+            result = requests.head(self.url, headers={'User-Agent': DIGIMARKS_USER_AGENT})
+            self.http_status = result.status_code
+        except requests.ConnectionError:
+            self.http_status = self.HTTP_CONNECTIONERROR
+        return self.http_status
+
+    def _set_favicon_with_iconsbetterideaorg(self, domain):
+        """ Fetch favicon for the domain """
+        fileextension = '.png'
+        meta = requests.head(
+            'http://icons.better-idea.org/icon?size=60&url=' + domain,
+            allow_redirects=True,
+            headers={'User-Agent': DIGIMARKS_USER_AGENT}
+        )
+        if meta.url[-3:].lower() == 'ico':
+            fileextension = '.ico'
+        response = requests.get(
+            'http://icons.better-idea.org/icon?size=60&url=' + domain,
+            stream=True,
+            headers={'User-Agent': DIGIMARKS_USER_AGENT}
+        )
+        filename = os.path.join(MEDIA_ROOT, 'favicons/' + domain + fileextension)
+        with open(filename, 'wb') as out_file:
+            shutil.copyfileobj(response.raw, out_file)
+        del response
+        filetype = file_type(filename)
+        if filetype == 'gz':
+            # decompress
+            orig = gzip.GzipFile(filename, 'rb')
+            origcontent = orig.read()
+            orig.close()
+            os.remove(filename)
+            with open(filename, 'wb') as new:
+                new.write(origcontent)
+        self.favicon = domain + fileextension
+
+    def _set_favicon_with_realfavicongenerator(self, domain):
+        """ Fetch favicon for the domain """
+        response = requests.get(
+            'https://realfavicongenerator.p.mashape.com/favicon/icon?platform=android_chrome&site=' + domain,
+            stream=True,
+            headers={'User-Agent': DIGIMARKS_USER_AGENT, 'X-Mashape-Key': settings.MASHAPE_API_KEY}
+        )
+        if response.status_code == 404:
+            # Fall back to desktop favicon
+            response = requests.get(
+                'https://realfavicongenerator.p.mashape.com/favicon/icon?platform=desktop&site=' + domain,
+                stream=True,
+                headers={'User-Agent': DIGIMARKS_USER_AGENT, 'X-Mashape-Key': settings.MASHAPE_API_KEY}
+            )
+        # Debug for the moment
+        print(domain)
+        print(response.headers)
+        if 'Content-Length' in response.headers and response.headers['Content-Length'] == '0':
+            # No favicon found, likely
+            print('Skipping this favicon, needs fallback')
+            return
+        # Default to 'image/png'
+        fileextension = '.png'
+        if response.headers['content-type'] == 'image/jpeg':
+            fileextension = '.jpg'
+        if response.headers['content-type'] == 'image/x-icon':
+            fileextension = '.ico'
+        filename = os.path.join(MEDIA_ROOT, 'favicons/' + domain + fileextension)
+        with open(filename, 'wb') as out_file:
+            shutil.copyfileobj(response.raw, out_file)
+        del response
+        filetype = file_type(filename)
+        if filetype == 'gz':
+            # decompress
+            orig = gzip.GzipFile(filename, 'rb')
+            origcontent = orig.read()
+            orig.close()
+            os.remove(filename)
+            with open(filename, 'wb') as new:
+                new.write(origcontent)
+        self.favicon = domain + fileextension
+
+    def set_favicon(self):
+        """ Fetch favicon for the domain """
+        u = urlparse(self.url)
+        domain = u.netloc
+        if os.path.isfile(os.path.join(MEDIA_ROOT, 'favicons/' + domain + '.png')):
+            # If file exists, don't re-download it
+            self.favicon = domain + '.png'
+            return
+        if os.path.isfile(os.path.join(MEDIA_ROOT, 'favicons/' + domain + '.ico')):
+            # If file exists, don't re-download it
+            self.favicon = domain + '.ico'
+            return
+        #self._set_favicon_with_iconsbetterideaorg(domain)
+        self._set_favicon_with_realfavicongenerator(domain)
+
+    def set_tags(self, newtags):
+        """ Set tags from `tags`, strip and sort them """
+        tags_split = newtags.split(',')
+        tags_clean = clean_tags(tags_split)
+        self.tags = ','.join(tags_clean)
+
+    def get_redirect_uri(self):
+        if self.redirect_uri:
+            return self.redirect_uri
+        if self.http_status == 301 or self.http_status == 302:
+            result = requests.head(self.url, allow_redirects=True, headers={'User-Agent': DIGIMARKS_USER_AGENT})
+            self.http_status = result.status_code
+            self.redirect_uri = result.url
+            return result.url
+        return None
+
+    def get_uri_domain(self):
+        parsed = urlparse(self.url)
+        return parsed.hostname
+
+    @classmethod
+    def strip_url_params(cls, url):
+        parsed = urlparse(url)
+        return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, '', parsed.fragment))
+
+    @property
+    def tags_list(self):
+        """ Get the tags as a list, iterable in template """
+        if self.tags:
+            return self.tags.split(',')
+        return []
+
+    def to_dict(self):
+        result = {
+            'title': self.title,
+            'url': self.url,
+            'created':  self.created_date.strftime('%Y-%m-%d %H:%M:%S'),
+            'url_hash': self.url_hash,
+            'tags': self.tags,
+        }
+        return result
+
+    @property
+    def serialize(self):
+        return self.to_dict()
+
+
+class PublicTag(BaseModel):
+    """ Publicly shared tag """
+    tagkey = CharField()
+    userkey = CharField()
+    tag = CharField()
+    created_date = DateTimeField(default=datetime.datetime.now)
+
+    def generate_key(self):
+        """ Generate hash-based key for publicly shared tag """
+        self.tagkey = binascii.hexlify(os.urandom(16))
+
+
+def get_tags_for_user(userkey):
+    """ Extract all tags from the bookmarks """
+    bookmarks = Bookmark.select().filter(Bookmark.userkey == userkey, Bookmark.status == Bookmark.VISIBLE)
+    tags = []
+    for bookmark in bookmarks:
+        tags += bookmark.tags_list
+    return clean_tags(tags)