mirror of
https://github.com/aquatix/digimarks.git
synced 2025-12-06 23:05:10 +01:00
More robust http codes, tag sanitation
This commit is contained in:
34
digimarks.py
34
digimarks.py
@@ -5,6 +5,7 @@ import sys
|
|||||||
import requests
|
import requests
|
||||||
import shutil
|
import shutil
|
||||||
import bs4
|
import bs4
|
||||||
|
from more_itertools import unique_everseen
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from utilkit import datetimeutil
|
from utilkit import datetimeutil
|
||||||
@@ -92,29 +93,28 @@ class Bookmark(db.Model):
|
|||||||
""" Generate hash """
|
""" Generate hash """
|
||||||
self.url_hash = hashlib.md5(self.url).hexdigest()
|
self.url_hash = hashlib.md5(self.url).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def set_title_from_source(self):
|
def set_title_from_source(self):
|
||||||
""" Request the title by requesting the source url """
|
""" Request the title by requesting the source url """
|
||||||
result = requests.get(self.url)
|
try:
|
||||||
print result.status_code
|
result = requests.get(self.url)
|
||||||
if result.status_code == 200:
|
self.http_status = result.status_code
|
||||||
|
except:
|
||||||
|
# For example 'MissingSchema: Invalid URL 'abc': No schema supplied. Perhaps you meant http://abc?'
|
||||||
|
self.http_status = 404
|
||||||
|
if self.http_status == 200:
|
||||||
html = bs4.BeautifulSoup(result.text, 'html.parser')
|
html = bs4.BeautifulSoup(result.text, 'html.parser')
|
||||||
try:
|
try:
|
||||||
self.title = html.title.text.strip()
|
self.title = html.title.text.strip()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self.title = ''
|
self.title = ''
|
||||||
else:
|
|
||||||
self.http_status = result.status_code
|
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
|
|
||||||
def set_status_code(self):
|
def set_status_code(self):
|
||||||
""" Check the HTTP status of the url, as it might not exist for example """
|
""" Check the HTTP status of the url, as it might not exist for example """
|
||||||
result = requests.head(self.url)
|
result = requests.head(self.url)
|
||||||
self.http_status = result.status_code
|
self.http_status = result.status_code
|
||||||
return self.http_status
|
return self.http_status
|
||||||
|
|
||||||
|
|
||||||
def set_favicon(self):
|
def set_favicon(self):
|
||||||
""" Fetch favicon for the domain """
|
""" Fetch favicon for the domain """
|
||||||
# http://codingclues.eu/2009/retrieve-the-favicon-for-any-url-thanks-to-google/
|
# http://codingclues.eu/2009/retrieve-the-favicon-for-any-url-thanks-to-google/
|
||||||
@@ -128,6 +128,21 @@ class Bookmark(db.Model):
|
|||||||
del response
|
del response
|
||||||
self.favicon = domain + '.png'
|
self.favicon = domain + '.png'
|
||||||
|
|
||||||
|
def set_tags(self, tags):
|
||||||
|
""" Set tags from `tags`, strip and sort them """
|
||||||
|
tags_split = tags.split(',')
|
||||||
|
print tags_split
|
||||||
|
#map(str.strip, tags_split)
|
||||||
|
tags_split = [x.strip() for x in tags_split]
|
||||||
|
tags_split = list(unique_everseen(tags_split))
|
||||||
|
tags_split.sort()
|
||||||
|
print tags_split
|
||||||
|
self.tags = ','.join(tags_split)
|
||||||
|
print self.tags
|
||||||
|
|
||||||
|
def get_tags(self):
|
||||||
|
return self.tags.split(',')
|
||||||
|
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
result = {
|
result = {
|
||||||
@@ -220,7 +235,8 @@ def addingbookmark(userkey):
|
|||||||
starred = False
|
starred = False
|
||||||
print starred
|
print starred
|
||||||
if url:
|
if url:
|
||||||
bookmark = Bookmark(url=url, title=title, tags=tags, starred=starred, userkey=userkey)
|
bookmark = Bookmark(url=url, title=title, starred=starred, userkey=userkey)
|
||||||
|
bookmark.set_tags(tags)
|
||||||
bookmark.set_hash()
|
bookmark.set_hash()
|
||||||
#bookmark.fetch_image()
|
#bookmark.fetch_image()
|
||||||
if not title:
|
if not title:
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
flask
|
flask
|
||||||
peewee
|
peewee
|
||||||
flask-peewee
|
flask-peewee
|
||||||
|
bs4
|
||||||
|
more_itertools
|
||||||
requests
|
requests
|
||||||
utilkit
|
utilkit
|
||||||
|
|||||||
Reference in New Issue
Block a user