1
0
mirror of https://github.com/searxng/searxng.git synced 2024-10-01 09:00:15 +02:00

Merge pull request #900 from return42/fix-883

[fix] bandcamp: fix itemtype (album|track) and exceptions
This commit is contained in:
Alexandre Flament 2022-02-19 13:42:53 +01:00 committed by GitHub
commit ace5401632
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 47 additions and 21 deletions

View File

@ -1,16 +1,23 @@
"""
Bandcamp (Music)
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bandcamp (Music)
@website https://bandcamp.com/
@provide-api no
@results HTML
@parse url, title, content, publishedDate, iframe_src, thumbnail
"""
from urllib.parse import urlencode, urlparse, parse_qs
from dateutil.parser import parse as dateparse
from lxml import html
from searx.utils import extract_text
from searx.utils import (
eval_xpath_getindex,
eval_xpath_list,
extract_text,
)
# about
about = {
@ -26,12 +33,13 @@ categories = ['music']
paging = True
base_url = "https://bandcamp.com/"
search_string = search_string = 'search?{query}&page={page}'
iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/"
search_string = 'search?{query}&page={page}'
iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small"
def request(query, params):
'''pre-request callback
params<dict>:
method : POST/GET
headers : {}
@ -42,37 +50,45 @@ def request(query, params):
'''
search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
params['url'] = base_url + search_path
return params
def response(resp):
'''post-response callback
resp: requests response object
'''
results = []
tree = html.fromstring(resp.text)
search_results = tree.xpath('//li[contains(@class, "searchresult")]')
for result in search_results:
link = result.xpath('.//div[@class="itemurl"]/a')[0]
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
dom = html.fromstring(resp.text)
for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'):
link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None)
if link is None:
continue
title = result.xpath('.//div[@class="heading"]/a/text()')
date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", ""))
content = result.xpath('.//div[@class="subhead"]/text()')
new_result = {
"url": extract_text(link),
"title": extract_text(title),
"content": extract_text(content),
"publishedDate": date,
}
date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None)
if date:
new_result["publishedDate"] = dateparse(date.replace("released ", ""))
thumbnail = result.xpath('.//div[@class="art"]/img/@src')
if thumbnail:
new_result['thumbnail'] = thumbnail[0]
if "album" in result.classes:
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower()
if "album" == itemtype:
new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id)
elif "track" in result.classes:
elif "track" == itemtype:
new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id)
results.append(new_result)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -17,3 +17,13 @@ iframe[src^="https://www.mixcloud.com"] {
// 200px, somtimes 250px.
height: 250px;
}
iframe[src^="https://bandcamp.com/EmbeddedPlayer"] {
// show playlist
height: 350px;
}
iframe[src^="https://bandcamp.com/EmbeddedPlayer/track"] {
// hide playlist
height: 120px;
}