1
0
mirror of https://github.com/aquatix/digimarks.git synced 2025-12-06 22:05:09 +01:00

More robust http codes, tag sanitation

This commit is contained in:
2016-07-20 22:06:17 +02:00
parent 395115eef9
commit 12bfa2352b
2 changed files with 27 additions and 9 deletions

View File

@@ -5,6 +5,7 @@ import sys
import requests
import shutil
import bs4
from more_itertools import unique_everseen
from urlparse import urlparse
from utilkit import datetimeutil
@@ -92,29 +93,28 @@ class Bookmark(db.Model):
""" Generate hash """
self.url_hash = hashlib.md5(self.url).hexdigest()
def set_title_from_source(self):
""" Request the title by requesting the source url """
try:
result = requests.get(self.url)
print result.status_code
if result.status_code == 200:
self.http_status = result.status_code
except:
# For example 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
self.http_status = 404
if self.http_status == 200:
html = bs4.BeautifulSoup(result.text, 'html.parser')
try:
self.title = html.title.text.strip()
except AttributeError:
self.title = ''
else:
self.http_status = result.status_code
return self.title
def set_status_code(self):
""" Check the HTTP status of the url, as it might not exist for example """
result = requests.head(self.url)
self.http_status = result.status_code
return self.http_status
def set_favicon(self):
""" Fetch favicon for the domain """
# http://codingclues.eu/2009/retrieve-the-favicon-for-any-url-thanks-to-google/
@@ -128,6 +128,21 @@ class Bookmark(db.Model):
del response
self.favicon = domain + '.png'
def set_tags(self, tags):
""" Set tags from `tags`, strip and sort them """
tags_split = tags.split(',')
print tags_split
#map(str.strip, tags_split)
tags_split = [x.strip() for x in tags_split]
tags_split = list(unique_everseen(tags_split))
tags_split.sort()
print tags_split
self.tags = ','.join(tags_split)
print self.tags
def get_tags(self):
return self.tags.split(',')
def to_dict(self):
result = {
@@ -220,7 +235,8 @@ def addingbookmark(userkey):
starred = False
print starred
if url:
bookmark = Bookmark(url=url, title=title, tags=tags, starred=starred, userkey=userkey)
bookmark = Bookmark(url=url, title=title, starred=starred, userkey=userkey)
bookmark.set_tags(tags)
bookmark.set_hash()
#bookmark.fetch_image()
if not title:

View File

@@ -1,5 +1,7 @@
flask
peewee
flask-peewee
bs4
more_itertools
requests
utilkit