From 05c105b8371e3766dba35e815601881d83ef6383 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 17 Feb 2022 22:10:34 +0100 Subject: [PATCH] [fix] bandcamp: fix itemtype (album|track) and exceptions BTW: polish implementation and show tracklist for albums Closes: https://github.com/searxng/searxng/issues/883 Signed-off-by: Markus Heiser --- searx/engines/bandcamp.py | 48 ++++++++++++------- .../themes/simple/src/less/embedded.less | 10 ++++ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py index f868b44ed..f83ca6d4f 100644 --- a/searx/engines/bandcamp.py +++ b/searx/engines/bandcamp.py @@ -1,16 +1,23 @@ -""" -Bandcamp (Music) +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Bandcamp (Music) @website https://bandcamp.com/ @provide-api no @results HTML @parse url, title, content, publishedDate, iframe_src, thumbnail + """ from urllib.parse import urlencode, urlparse, parse_qs from dateutil.parser import parse as dateparse from lxml import html -from searx.utils import extract_text + +from searx.utils import ( + eval_xpath_getindex, + eval_xpath_list, + extract_text, +) # about about = { @@ -26,12 +33,13 @@ categories = ['music'] paging = True base_url = "https://bandcamp.com/" -search_string = search_string = 'search?{query}&page={page}' -iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/" +search_string = 'search?{query}&page={page}' +iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small" def request(query, params): '''pre-request callback + params: method : POST/GET headers : {} @@ -42,37 +50,45 @@ def request(query, params): ''' search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) - params['url'] = base_url + search_path - return params def response(resp): '''post-response callback + resp: requests response object ''' results = [] - tree = html.fromstring(resp.text) - search_results = tree.xpath('//li[contains(@class, "searchresult")]') - for result in search_results: - link = result.xpath('.//div[@class="itemurl"]/a')[0] - result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] + dom = html.fromstring(resp.text) + + for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'): + + link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None) + if link is None: + continue + title = result.xpath('.//div[@class="heading"]/a/text()') - date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", "")) content = result.xpath('.//div[@class="subhead"]/text()') new_result = { "url": extract_text(link), "title": extract_text(title), "content": extract_text(content), - "publishedDate": date, } + + date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None) + if date: + new_result["publishedDate"] = dateparse(date.replace("released ", "")) + thumbnail = result.xpath('.//div[@class="art"]/img/@src') if thumbnail: new_result['thumbnail'] = thumbnail[0] - if "album" in result.classes: + + result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] + itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower() + if "album" == itemtype: new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id) - elif "track" in result.classes: + elif "track" == itemtype: new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id) results.append(new_result) diff --git a/searx/static/themes/simple/src/less/embedded.less b/searx/static/themes/simple/src/less/embedded.less index 4a43ea78d..1074802f2 100644 --- a/searx/static/themes/simple/src/less/embedded.less +++ b/searx/static/themes/simple/src/less/embedded.less @@ -17,3 +17,13 @@ iframe[src^="https://www.mixcloud.com"] { // 200px, somtimes 250px. height: 250px; } + +iframe[src^="https://bandcamp.com/EmbeddedPlayer"] { + // show playlist + height: 350px; +} + +iframe[src^="https://bandcamp.com/EmbeddedPlayer/track"] { + // hide playlist + height: 120px; +}