1
0
mirror of https://github.com/aquatix/digimarks.git synced 2025-12-06 22:05:09 +01:00

More robust http codes, tag sanitation

This commit is contained in:
2016-07-20 22:06:17 +02:00
parent 395115eef9
commit 12bfa2352b
2 changed files with 27 additions and 9 deletions

View File

@@ -5,6 +5,7 @@ import sys
import requests import requests
import shutil import shutil
import bs4 import bs4
from more_itertools import unique_everseen
from urlparse import urlparse from urlparse import urlparse
from utilkit import datetimeutil from utilkit import datetimeutil
@@ -92,29 +93,28 @@ class Bookmark(db.Model):
""" Generate hash """ """ Generate hash """
self.url_hash = hashlib.md5(self.url).hexdigest() self.url_hash = hashlib.md5(self.url).hexdigest()
def set_title_from_source(self): def set_title_from_source(self):
""" Request the title by requesting the source url """ """ Request the title by requesting the source url """
result = requests.get(self.url) try:
print result.status_code result = requests.get(self.url)
if result.status_code == 200: self.http_status = result.status_code
except:
# For example 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
self.http_status = 404
if self.http_status == 200:
html = bs4.BeautifulSoup(result.text, 'html.parser') html = bs4.BeautifulSoup(result.text, 'html.parser')
try: try:
self.title = html.title.text.strip() self.title = html.title.text.strip()
except AttributeError: except AttributeError:
self.title = '' self.title = ''
else:
self.http_status = result.status_code
return self.title return self.title
def set_status_code(self): def set_status_code(self):
""" Check the HTTP status of the url, as it might not exist for example """ """ Check the HTTP status of the url, as it might not exist for example """
result = requests.head(self.url) result = requests.head(self.url)
self.http_status = result.status_code self.http_status = result.status_code
return self.http_status return self.http_status
def set_favicon(self): def set_favicon(self):
""" Fetch favicon for the domain """ """ Fetch favicon for the domain """
# http://codingclues.eu/2009/retrieve-the-favicon-for-any-url-thanks-to-google/ # http://codingclues.eu/2009/retrieve-the-favicon-for-any-url-thanks-to-google/
@@ -128,6 +128,21 @@ class Bookmark(db.Model):
del response del response
self.favicon = domain + '.png' self.favicon = domain + '.png'
def set_tags(self, tags):
""" Set tags from `tags`, strip and sort them """
tags_split = tags.split(',')
print tags_split
#map(str.strip, tags_split)
tags_split = [x.strip() for x in tags_split]
tags_split = list(unique_everseen(tags_split))
tags_split.sort()
print tags_split
self.tags = ','.join(tags_split)
print self.tags
def get_tags(self):
return self.tags.split(',')
def to_dict(self): def to_dict(self):
result = { result = {
@@ -220,7 +235,8 @@ def addingbookmark(userkey):
starred = False starred = False
print starred print starred
if url: if url:
bookmark = Bookmark(url=url, title=title, tags=tags, starred=starred, userkey=userkey) bookmark = Bookmark(url=url, title=title, starred=starred, userkey=userkey)
bookmark.set_tags(tags)
bookmark.set_hash() bookmark.set_hash()
#bookmark.fetch_image() #bookmark.fetch_image()
if not title: if not title:

View File

@@ -1,5 +1,7 @@
flask flask
peewee peewee
flask-peewee flask-peewee
bs4
more_itertools
requests requests
utilkit utilkit