mirror of
https://github.com/searxng/searxng.git
synced 2024-11-19 02:40:11 +01:00
Adds two engines : Youtube with or without API
The API needs an API_KEY The NOAPI doesn't have the published dates.
This commit is contained in:
parent
aac8d3a7bf
commit
f965c97822
83
searx/engines/youtube_api.py
Normal file
83
searx/engines/youtube_api.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
# Youtube (Videos)
|
||||||
|
#
|
||||||
|
# @website https://www.youtube.com/
|
||||||
|
# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
|
||||||
|
#
|
||||||
|
# @using-api yes
|
||||||
|
# @results JSON
|
||||||
|
# @stable yes
|
||||||
|
# @parse url, title, content, publishedDate, thumbnail, embedded
|
||||||
|
|
||||||
|
from json import loads
|
||||||
|
from urllib import urlencode
|
||||||
|
from dateutil import parser
|
||||||
|
|
||||||
|
# engine dependent config
|
||||||
|
categories = ['videos', 'music']
|
||||||
|
paging = False
|
||||||
|
language_support = True
|
||||||
|
api_key = None
|
||||||
|
|
||||||
|
# search-url
|
||||||
|
base_url = 'https://www.googleapis.com/youtube/v3/search'
|
||||||
|
search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}'
|
||||||
|
|
||||||
|
embedded_url = '<iframe width="540" height="304" ' +\
|
||||||
|
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
|
||||||
|
'frameborder="0" allowfullscreen></iframe>'
|
||||||
|
|
||||||
|
base_youtube_url = 'https://www.youtube.com/watch?v='
|
||||||
|
|
||||||
|
|
||||||
|
# do search-request
|
||||||
|
def request(query, params):
|
||||||
|
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||||
|
api_key=api_key)
|
||||||
|
|
||||||
|
# add language tag if specified
|
||||||
|
if params['language'] != 'all':
|
||||||
|
params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
# get response from search-request
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
search_results = loads(resp.text)
|
||||||
|
|
||||||
|
# return empty array if there are no results
|
||||||
|
if 'items' not in search_results:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# parse results
|
||||||
|
for result in search_results['items']:
|
||||||
|
videoid = result['id']['videoId']
|
||||||
|
|
||||||
|
title = result['snippet']['title']
|
||||||
|
content = ''
|
||||||
|
thumbnail = ''
|
||||||
|
|
||||||
|
pubdate = result['snippet']['publishedAt']
|
||||||
|
publishedDate = parser.parse(pubdate)
|
||||||
|
|
||||||
|
thumbnail = result['snippet']['thumbnails']['high']['url']
|
||||||
|
|
||||||
|
content = result['snippet']['description']
|
||||||
|
|
||||||
|
url = base_youtube_url + videoid
|
||||||
|
|
||||||
|
embedded = embedded_url.format(videoid=videoid)
|
||||||
|
|
||||||
|
# append result
|
||||||
|
results.append({'url': url,
|
||||||
|
'title': title,
|
||||||
|
'content': content,
|
||||||
|
'template': 'videos.html',
|
||||||
|
'publishedDate': publishedDate,
|
||||||
|
'embedded': embedded,
|
||||||
|
'thumbnail': thumbnail})
|
||||||
|
|
||||||
|
# return results
|
||||||
|
return results
|
72
searx/engines/youtube_noapi.py
Normal file
72
searx/engines/youtube_noapi.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# Youtube (Videos)
|
||||||
|
#
|
||||||
|
# @website https://www.youtube.com/
|
||||||
|
# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
|
||||||
|
#
|
||||||
|
# @using-api no
|
||||||
|
# @results HTML
|
||||||
|
# @stable no
|
||||||
|
# @parse url, title, content, publishedDate, thumbnail, embedded
|
||||||
|
|
||||||
|
from urllib import quote_plus
|
||||||
|
from lxml import html
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
|
# engine dependent config
|
||||||
|
categories = ['videos', 'music']
|
||||||
|
paging = True
|
||||||
|
language_support = False
|
||||||
|
|
||||||
|
# search-url
|
||||||
|
base_url = 'https://www.youtube.com/results'
|
||||||
|
search_url = base_url + '?search_query={query}&page={page}'
|
||||||
|
|
||||||
|
embedded_url = '<iframe width="540" height="304" ' +\
|
||||||
|
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
|
||||||
|
'frameborder="0" allowfullscreen></iframe>'
|
||||||
|
|
||||||
|
base_youtube_url = 'https://www.youtube.com/watch?v='
|
||||||
|
|
||||||
|
# specific xpath variables
|
||||||
|
results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]"
|
||||||
|
url_xpath = './/h3/a/@href'
|
||||||
|
title_xpath = './/div[@class="yt-lockup-content"]/h3/a'
|
||||||
|
content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]'
|
||||||
|
|
||||||
|
|
||||||
|
# do search-request
|
||||||
|
def request(query, params):
|
||||||
|
params['url'] = search_url.format(query=quote_plus(query),
|
||||||
|
page=params['pageno'])
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
# get response from search-request
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
|
# parse results
|
||||||
|
for result in dom.xpath(results_xpath):
|
||||||
|
videoid = result.xpath('@data-context-item-id')[0]
|
||||||
|
|
||||||
|
url = base_youtube_url + videoid
|
||||||
|
thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
|
||||||
|
|
||||||
|
title = extract_text(result.xpath(title_xpath)[0])
|
||||||
|
content = extract_text(result.xpath(content_xpath)[0])
|
||||||
|
|
||||||
|
embedded = embedded_url.format(videoid=videoid)
|
||||||
|
|
||||||
|
# append result
|
||||||
|
results.append({'url': url,
|
||||||
|
'title': title,
|
||||||
|
'content': content,
|
||||||
|
'template': 'videos.html',
|
||||||
|
'embedded': embedded,
|
||||||
|
'thumbnail': thumbnail})
|
||||||
|
|
||||||
|
# return results
|
||||||
|
return results
|
@ -242,8 +242,13 @@ engines:
|
|||||||
shortcut : yhn
|
shortcut : yhn
|
||||||
|
|
||||||
- name : youtube
|
- name : youtube
|
||||||
engine : youtube
|
|
||||||
shortcut : yt
|
shortcut : yt
|
||||||
|
# You can use the engine using the official stable API, but you need an API key
|
||||||
|
# See : https://console.developers.google.com/project
|
||||||
|
# engine : youtube_api
|
||||||
|
# api_key: 'apikey' # required!
|
||||||
|
# Or you can use the html non-stable engine, activated by default
|
||||||
|
engine : youtube_noapi
|
||||||
|
|
||||||
- name : dailymotion
|
- name : dailymotion
|
||||||
engine : dailymotion
|
engine : dailymotion
|
||||||
|
111
searx/tests/engines/test_youtube_api.py
Normal file
111
searx/tests/engines/test_youtube_api.py
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
from collections import defaultdict
|
||||||
|
import mock
|
||||||
|
from searx.engines import youtube_api
|
||||||
|
from searx.testing import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeAPIEngine(SearxTestCase):
|
||||||
|
|
||||||
|
def test_request(self):
|
||||||
|
query = 'test_query'
|
||||||
|
dicto = defaultdict(dict)
|
||||||
|
dicto['pageno'] = 0
|
||||||
|
dicto['language'] = 'fr_FR'
|
||||||
|
params = youtube_api.request(query, dicto)
|
||||||
|
self.assertTrue('url' in params)
|
||||||
|
self.assertTrue(query in params['url'])
|
||||||
|
self.assertIn('googleapis.com', params['url'])
|
||||||
|
self.assertIn('youtube', params['url'])
|
||||||
|
self.assertIn('fr', params['url'])
|
||||||
|
|
||||||
|
dicto['language'] = 'all'
|
||||||
|
params = youtube_api.request(query, dicto)
|
||||||
|
self.assertFalse('fr' in params['url'])
|
||||||
|
|
||||||
|
def test_response(self):
|
||||||
|
self.assertRaises(AttributeError, youtube_api.response, None)
|
||||||
|
self.assertRaises(AttributeError, youtube_api.response, [])
|
||||||
|
self.assertRaises(AttributeError, youtube_api.response, '')
|
||||||
|
self.assertRaises(AttributeError, youtube_api.response, '[]')
|
||||||
|
|
||||||
|
response = mock.Mock(text='{}')
|
||||||
|
self.assertEqual(youtube_api.response(response), [])
|
||||||
|
|
||||||
|
response = mock.Mock(text='{"data": []}')
|
||||||
|
self.assertEqual(youtube_api.response(response), [])
|
||||||
|
|
||||||
|
json = """
|
||||||
|
{
|
||||||
|
"kind": "youtube#searchListResponse",
|
||||||
|
"etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
|
||||||
|
"nextPageToken": "CAUQAA",
|
||||||
|
"pageInfo": {
|
||||||
|
"totalResults": 1000000,
|
||||||
|
"resultsPerPage": 20
|
||||||
|
},
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"kind": "youtube#searchResult",
|
||||||
|
"etag": "xmg9xJZuZD438sF4hb-VcBBREXc/IbLO64BMhbHIgWLwLw7MDYe7Hs4",
|
||||||
|
"id": {
|
||||||
|
"kind": "youtube#video",
|
||||||
|
"videoId": "DIVZCPfAOeM"
|
||||||
|
},
|
||||||
|
"snippet": {
|
||||||
|
"publishedAt": "2015-05-29T22:41:04.000Z",
|
||||||
|
"channelId": "UCNodmx1ERIjKqvcJLtdzH5Q",
|
||||||
|
"title": "Title",
|
||||||
|
"description": "Description",
|
||||||
|
"thumbnails": {
|
||||||
|
"default": {
|
||||||
|
"url": "https://i.ytimg.com/vi/DIVZCPfAOeM/default.jpg"
|
||||||
|
},
|
||||||
|
"medium": {
|
||||||
|
"url": "https://i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
|
||||||
|
},
|
||||||
|
"high": {
|
||||||
|
"url": "https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"channelTitle": "MinecraftUniverse",
|
||||||
|
"liveBroadcastContent": "none"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=json)
|
||||||
|
results = youtube_api.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 1)
|
||||||
|
self.assertEqual(results[0]['title'], 'Title')
|
||||||
|
self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
|
||||||
|
self.assertEqual(results[0]['content'], 'Description')
|
||||||
|
self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
|
||||||
|
self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
|
||||||
|
|
||||||
|
json = """
|
||||||
|
{
|
||||||
|
"kind": "youtube#searchListResponse",
|
||||||
|
"etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
|
||||||
|
"nextPageToken": "CAUQAA",
|
||||||
|
"pageInfo": {
|
||||||
|
"totalResults": 1000000,
|
||||||
|
"resultsPerPage": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=json)
|
||||||
|
results = youtube_api.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 0)
|
||||||
|
|
||||||
|
json = """
|
||||||
|
{"toto":{"entry":[]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=json)
|
||||||
|
results = youtube_api.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 0)
|
103
searx/tests/engines/test_youtube_noapi.py
Normal file
103
searx/tests/engines/test_youtube_noapi.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from collections import defaultdict
|
||||||
|
import mock
|
||||||
|
from searx.engines import youtube_noapi
|
||||||
|
from searx.testing import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeNoAPIEngine(SearxTestCase):
|
||||||
|
|
||||||
|
def test_request(self):
|
||||||
|
query = 'test_query'
|
||||||
|
dicto = defaultdict(dict)
|
||||||
|
dicto['pageno'] = 0
|
||||||
|
params = youtube_noapi.request(query, dicto)
|
||||||
|
self.assertIn('url', params)
|
||||||
|
self.assertIn(query, params['url'])
|
||||||
|
self.assertIn('youtube.com', params['url'])
|
||||||
|
|
||||||
|
def test_response(self):
|
||||||
|
self.assertRaises(AttributeError, youtube_noapi.response, None)
|
||||||
|
self.assertRaises(AttributeError, youtube_noapi.response, [])
|
||||||
|
self.assertRaises(AttributeError, youtube_noapi.response, '')
|
||||||
|
self.assertRaises(AttributeError, youtube_noapi.response, '[]')
|
||||||
|
|
||||||
|
response = mock.Mock(text='<html></html>')
|
||||||
|
self.assertEqual(youtube_noapi.response(response), [])
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<ol id="item-section-063864" class="item-section">
|
||||||
|
<li>
|
||||||
|
<div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile"
|
||||||
|
data-context-item-id="DIVZCPfAOeM"
|
||||||
|
data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB">
|
||||||
|
<div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto">
|
||||||
|
<a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link"
|
||||||
|
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA">
|
||||||
|
<div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
|
||||||
|
width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a>
|
||||||
|
<span class="thumb-menu dark-overflow-action-menu video-actions">
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="yt-lockup-content">
|
||||||
|
<h3 class="yt-lockup-title">
|
||||||
|
<a href="/watch?v=DIVZCPfAOeM"
|
||||||
|
class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link"
|
||||||
|
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"
|
||||||
|
title="Top Speed Test Kawasaki Ninja H2 (Thailand) By. MEHAY SUPERBIKE"
|
||||||
|
aria-describedby="description-id-259079" rel="spf-prefetch" dir="ltr">
|
||||||
|
Title
|
||||||
|
</a>
|
||||||
|
<span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span>
|
||||||
|
</h3>
|
||||||
|
<div class="yt-lockup-byline">de
|
||||||
|
<a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard"
|
||||||
|
data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA"
|
||||||
|
data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta">
|
||||||
|
<ul class="yt-lockup-meta-info">
|
||||||
|
<li>il y a 20 heures</li>
|
||||||
|
<li>8 424 vues</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2" dir="ltr">
|
||||||
|
Description
|
||||||
|
</div>
|
||||||
|
<div class="yt-lockup-badges">
|
||||||
|
<ul class="yt-badge-list ">
|
||||||
|
<li class="yt-badge-item" >
|
||||||
|
<span class="yt-badge">Nouveauté</span>
|
||||||
|
</li>
|
||||||
|
<li class="yt-badge-item" ><span class="yt-badge " >HD</span></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="yt-lockup-action-menu yt-uix-menu-container">
|
||||||
|
<div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded"
|
||||||
|
data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=html)
|
||||||
|
results = youtube_noapi.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 1)
|
||||||
|
self.assertEqual(results[0]['title'], 'Title')
|
||||||
|
self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
|
||||||
|
self.assertEqual(results[0]['content'], 'Description')
|
||||||
|
self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
|
||||||
|
self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<ol id="item-section-063864" class="item-section">
|
||||||
|
<li>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=html)
|
||||||
|
results = youtube_noapi.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 0)
|
@ -39,4 +39,6 @@ from searx.tests.engines.test_www500px import * # noqa
|
|||||||
from searx.tests.engines.test_yacy import * # noqa
|
from searx.tests.engines.test_yacy import * # noqa
|
||||||
from searx.tests.engines.test_yahoo import * # noqa
|
from searx.tests.engines.test_yahoo import * # noqa
|
||||||
from searx.tests.engines.test_youtube import * # noqa
|
from searx.tests.engines.test_youtube import * # noqa
|
||||||
|
from searx.tests.engines.test_youtube_api import * # noqa
|
||||||
|
from searx.tests.engines.test_youtube_noapi import * # noqa
|
||||||
from searx.tests.engines.test_yahoo_news import * # noqa
|
from searx.tests.engines.test_yahoo_news import * # noqa
|
||||||
|
Loading…
Reference in New Issue
Block a user