From 6f49bd8f08fbd52c67e21477fc2d21553d8220d7 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber
Date: Mon, 17 Mar 2014 11:43:00 +0100
Subject: [PATCH 1/4] adding publish date to video-results and rss-feed
---
searx/templates/opensearch_response_rss.xml | 1 +
searx/templates/result_templates/videos.html | 1 +
searx/webapp.py | 2 ++
3 files changed, 4 insertions(+)
diff --git a/searx/templates/opensearch_response_rss.xml b/searx/templates/opensearch_response_rss.xml
index 417b195a3..5673eb2e1 100644
--- a/searx/templates/opensearch_response_rss.xml
+++ b/searx/templates/opensearch_response_rss.xml
@@ -16,6 +16,7 @@
{{ r.title }}
{{ r.url }}
{{ r.content }}
+ {% if r.pubdate %}{{ r.pubdate }}{% endif %}
{% endfor %}
diff --git a/searx/templates/result_templates/videos.html b/searx/templates/result_templates/videos.html
index d3391f0d3..ab869a6eb 100644
--- a/searx/templates/result_templates/videos.html
+++ b/searx/templates/result_templates/videos.html
@@ -5,6 +5,7 @@
+ {% if result.publishedDate %}{{ result.publishedDate }}
{% endif %}
{{ result.url }}
diff --git a/searx/webapp.py b/searx/webapp.py
index a52dd6289..d06c58f32 100644
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -168,6 +168,8 @@ def index():
else:
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
else:
+ # TODO using right timezone
+ result['pubdate'] = result['publishedDate'].strftime('%a, %d %b %Y %H:%M:%S +0000')
result['publishedDate'] = format_date(result['publishedDate'])
if search.request_data.get('format') == 'json':
From fd86bf8189683aee72b934c8dd7544aa362a0728 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber
Date: Tue, 18 Mar 2014 13:19:23 +0100
Subject: [PATCH 2/4] fix bug, to display publishDate with timezone without
error
---
searx/webapp.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/searx/webapp.py b/searx/webapp.py
index d06c58f32..a4c05a9e9 100644
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -159,8 +159,8 @@ def index():
# TODO, check if timezone is calculated right
if 'publishedDate' in result:
- if result['publishedDate'] >= datetime.now() - timedelta(days=1):
- timedifference = datetime.now() - result['publishedDate']
+ if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
+ timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
@@ -168,8 +168,7 @@ def index():
else:
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
else:
- # TODO using right timezone
- result['pubdate'] = result['publishedDate'].strftime('%a, %d %b %Y %H:%M:%S +0000')
+ result['pubdate'] = result['publishedDate'].strftime('%a, %d %b %Y %H:%M:%S %z')
result['publishedDate'] = format_date(result['publishedDate'])
if search.request_data.get('format') == 'json':
From 337bd6d907503176eb94290c3f386ce88167dea8 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber
Date: Tue, 18 Mar 2014 13:19:50 +0100
Subject: [PATCH 3/4] simplify datetime extraction
---
requirements.txt | 1 +
searx/engines/google_news.py | 14 ++++----------
searx/engines/yahoo_news.py | 5 ++---
searx/engines/youtube.py | 7 +++++++
setup.py | 1 +
5 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 0f69bc883..88c1bc715 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ flask-babel
grequests
lxml
pyyaml
+python-dateutil
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 43ccaa3e3..b8a7be3ee 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -2,6 +2,7 @@
from urllib import urlencode
from json import loads
+from dateutil import parser
from datetime import datetime
categories = ['news']
@@ -32,16 +33,9 @@ def response(resp):
return []
for result in search_res['responseData']['results']:
-# S.149 (159), library.pdf
-# datetime.strptime("Mon, 10 Mar 2014 16:26:15 -0700",
-# "%a, %d %b %Y %H:%M:%S %z")
-# publishedDate = parse(result['publishedDate'])
- publishedDate = datetime.strptime(
- str.join(' ', result['publishedDate'].split(None)[0:5]),
- "%a, %d %b %Y %H:%M:%S")
- #utc_offset = timedelta(result['publishedDate'].split(None)[5])
- # local = utc + offset
- #publishedDate = publishedDate + utc_offset
+
+# Mon, 10 Mar 2014 16:26:15 -0700
+ publishedDate = parser.parse(result['publishedDate'])
results.append({'url': result['unescapedUrl'],
'title': result['titleNoFormatting'],
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index a1e9df59c..53c8b07a7 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -6,6 +6,7 @@ from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url
from datetime import datetime, timedelta
import re
+from dateutil import parser
categories = ['news']
search_url = 'http://news.search.yahoo.com/search?{query}&b={offset}'
@@ -52,9 +53,7 @@ def response(resp):
- timedelta(hours=int(timeNumbers[0]))\
- timedelta(minutes=int(timeNumbers[1]))
else:
- # TODO year in string possible?
- publishedDate = datetime.strptime(publishedDate,
- "%b %d %H:%M%p")
+ publishedDate =parser.parse(publishedDate)
if publishedDate.year == 1900:
publishedDate = publishedDate.replace(year=datetime.now().year)
diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py
index 5b04f3513..f6b08b330 100644
--- a/searx/engines/youtube.py
+++ b/searx/engines/youtube.py
@@ -1,5 +1,7 @@
from json import loads
from urllib import urlencode
+from dateutil import parser
+from datetime import datetime
categories = ['videos']
@@ -35,6 +37,10 @@ def response(resp):
content = ''
thumbnail = ''
+#"2013-12-31T15:22:51.000Z"
+ pubdate = result['published']['$t']
+ publishedDate = parser.parse(pubdate)
+
if result['media$group']['media$thumbnail']:
thumbnail = result['media$group']['media$thumbnail'][0]['url']
content += ''.format(url, thumbnail) # noqa
@@ -48,6 +54,7 @@ def response(resp):
'title': title,
'content': content,
'template': 'videos.html',
+ 'publishedDate': publishedDate,
'thumbnail': thumbnail})
return results
diff --git a/setup.py b/setup.py
index fff709f2f..79f2acc42 100644
--- a/setup.py
+++ b/setup.py
@@ -35,6 +35,7 @@ setup(
'lxml',
'pyyaml',
'setuptools',
+ 'python-dateutil',
],
extras_require={
'test': [
From 993271bed30e24c7ae1e0f63b64e030829206f27 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber
Date: Tue, 18 Mar 2014 15:56:22 +0100
Subject: [PATCH 4/4] extract publishDate from vimeo
---
searx/engines/vimeo.py | 6 ++++++
searx/engines/yahoo_news.py | 2 +-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index a95c75b49..d2d2a4dd0 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -2,6 +2,8 @@ from urllib import urlencode
from HTMLParser import HTMLParser
from lxml import html
from xpath import extract_text
+from datetime import datetime
+from dateutil import parser
base_url = 'http://vimeo.com'
search_url = base_url + '/search?{query}'
@@ -10,6 +12,7 @@ content_xpath = None
title_xpath = None
results_xpath = ''
content_tpl = ' '
+publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
# the cookie set by vimeo contains all the following values,
# but only __utma seems to be requiered
@@ -40,9 +43,12 @@ def response(resp):
url = base_url + result.xpath(url_xpath)[0]
title = p.unescape(extract_text(result.xpath(title_xpath)))
thumbnail = extract_text(result.xpath(content_xpath)[0])
+ publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0]))
+
results.append({'url': url,
'title': title,
'content': content_tpl.format(url, title, thumbnail),
'template': 'videos.html',
+ 'publishedDate': publishedDate,
'thumbnail': thumbnail})
return results
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index 53c8b07a7..43da93ede 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -53,7 +53,7 @@ def response(resp):
- timedelta(hours=int(timeNumbers[0]))\
- timedelta(minutes=int(timeNumbers[1]))
else:
- publishedDate =parser.parse(publishedDate)
+ publishedDate = parser.parse(publishedDate)
if publishedDate.year == 1900:
publishedDate = publishedDate.replace(year=datetime.now().year)