mirror of
https://github.com/aquatix/digimarks.git
synced 2025-12-06 22:05:09 +01:00
More robust http codes, tag sanitation
This commit is contained in:
32
digimarks.py
32
digimarks.py
@@ -5,6 +5,7 @@ import sys
|
||||
import requests
|
||||
import shutil
|
||||
import bs4
|
||||
from more_itertools import unique_everseen
|
||||
from urlparse import urlparse
|
||||
|
||||
from utilkit import datetimeutil
|
||||
@@ -92,29 +93,28 @@ class Bookmark(db.Model):
|
||||
""" Generate hash """
|
||||
self.url_hash = hashlib.md5(self.url).hexdigest()
|
||||
|
||||
|
||||
def set_title_from_source(self):
|
||||
""" Request the title by requesting the source url """
|
||||
try:
|
||||
result = requests.get(self.url)
|
||||
print result.status_code
|
||||
if result.status_code == 200:
|
||||
self.http_status = result.status_code
|
||||
except:
|
||||
# For example 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
|
||||
self.http_status = 404
|
||||
if self.http_status == 200:
|
||||
html = bs4.BeautifulSoup(result.text, 'html.parser')
|
||||
try:
|
||||
self.title = html.title.text.strip()
|
||||
except AttributeError:
|
||||
self.title = ''
|
||||
else:
|
||||
self.http_status = result.status_code
|
||||
return self.title
|
||||
|
||||
|
||||
def set_status_code(self):
|
||||
""" Check the HTTP status of the url, as it might not exist for example """
|
||||
result = requests.head(self.url)
|
||||
self.http_status = result.status_code
|
||||
return self.http_status
|
||||
|
||||
|
||||
def set_favicon(self):
|
||||
""" Fetch favicon for the domain """
|
||||
# http://codingclues.eu/2009/retrieve-the-favicon-for-any-url-thanks-to-google/
|
||||
@@ -128,6 +128,21 @@ class Bookmark(db.Model):
|
||||
del response
|
||||
self.favicon = domain + '.png'
|
||||
|
||||
def set_tags(self, tags):
|
||||
""" Set tags from `tags`, strip and sort them """
|
||||
tags_split = tags.split(',')
|
||||
print tags_split
|
||||
#map(str.strip, tags_split)
|
||||
tags_split = [x.strip() for x in tags_split]
|
||||
tags_split = list(unique_everseen(tags_split))
|
||||
tags_split.sort()
|
||||
print tags_split
|
||||
self.tags = ','.join(tags_split)
|
||||
print self.tags
|
||||
|
||||
def get_tags(self):
|
||||
return self.tags.split(',')
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
result = {
|
||||
@@ -220,7 +235,8 @@ def addingbookmark(userkey):
|
||||
starred = False
|
||||
print starred
|
||||
if url:
|
||||
bookmark = Bookmark(url=url, title=title, tags=tags, starred=starred, userkey=userkey)
|
||||
bookmark = Bookmark(url=url, title=title, starred=starred, userkey=userkey)
|
||||
bookmark.set_tags(tags)
|
||||
bookmark.set_hash()
|
||||
#bookmark.fetch_image()
|
||||
if not title:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
flask
|
||||
peewee
|
||||
flask-peewee
|
||||
bs4
|
||||
more_itertools
|
||||
requests
|
||||
utilkit
|
||||
|
||||
Reference in New Issue
Block a user