1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-22 20:17:45 +01:00

Merge pull request #2460 from dalf/engine-about

[enh] engines: add about variable
This commit is contained in:
Alexandre Flament 2021-01-16 19:05:17 +01:00 committed by GitHub
commit 1d13ad8452
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
90 changed files with 1421 additions and 725 deletions

View File

@ -1,7 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
1337x
"""
from urllib.parse import quote, urljoin from urllib.parse import quote, urljoin
from lxml import html from lxml import html
from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://1337x.to/',
"wikidata_id": 'Q28134166',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
url = 'https://1337x.to/' url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/' search_url = url + 'search/{search_term}/{pageno}/'

View File

@ -1,18 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Acgsou (Japanese Animation/Music/Comics Bittorrent tracker) Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
@website https://www.acgsou.com/
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, content, seed, leech, torrentfile
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://www.acgsou.com/',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['files', 'images', 'videos', 'music'] categories = ['files', 'images', 'videos', 'music']
paging = True paging = True

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Ahmia (Onions) Ahmia (Onions)
@website http://msydqstlz2kzerdg.onion
@provides-api no
@using-api no
@results HTML
@stable no
@parse url, title, content
""" """
from urllib.parse import urlencode, urlparse, parse_qs from urllib.parse import urlencode, urlparse, parse_qs
from lxml.html import fromstring from lxml.html import fromstring
from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
# about
about = {
"website": 'http://msydqstlz2kzerdg.onion',
"wikidata_id": 'Q18693938',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine config # engine config
categories = ['onions'] categories = ['onions']
paging = True paging = True

View File

@ -1,18 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
APK Mirror APK Mirror
@website https://www.apkmirror.com
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, thumbnail_src
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://www.apkmirror.com',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -1,20 +1,24 @@
# -*- coding: utf-8 -*- # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Arch Linux Wiki Arch Linux Wiki
@website https://wiki.archlinux.org API: Mediawiki provides API, but Arch Wiki blocks access to it
@provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title
""" """
from urllib.parse import urlencode, urljoin from urllib.parse import urlencode, urljoin
from lxml import html from lxml import html
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://wiki.archlinux.org/',
"wikidata_id": 'Q101445877',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
language_support = True language_support = True

View File

@ -1,20 +1,21 @@
#!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
ArXiV (Scientific preprints) ArXiV (Scientific preprints)
@website https://arxiv.org
@provide-api yes (export.arxiv.org/api/query)
@using-api yes
@results XML-RSS
@stable yes
@parse url, title, publishedDate, content
More info on api: https://arxiv.org/help/api/user-manual
""" """
from lxml import html from lxml import html
from datetime import datetime from datetime import datetime
from searx.utils import eval_xpath_list, eval_xpath_getindex from searx.utils import eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://arxiv.org',
"wikidata_id": 'Q118398',
"official_api_documentation": 'https://arxiv.org/help/api',
"use_official_api": True,
"require_api_key": False,
"results": 'XML-RSS',
}
categories = ['science'] categories = ['science']
paging = True paging = True

View File

@ -1,16 +1,6 @@
#!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
BASE (Scholar publications) BASE (Scholar publications)
@website https://base-search.net
@provide-api yes with authorization (https://api.base-search.net/)
@using-api yes
@results XML
@stable ?
@parse url, title, publishedDate, content
More info on api: http://base-search.net/about/download/base_interface.pdf
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
@ -19,6 +9,15 @@ from datetime import datetime
import re import re
from searx.utils import searx_useragent from searx.utils import searx_useragent
# about
about = {
"website": 'https://base-search.net',
"wikidata_id": 'Q448335',
"official_api_documentation": 'https://api.base-search.net/',
"use_official_api": True,
"require_api_key": False,
"results": 'XML',
}
categories = ['science'] categories = ['science']

View File

@ -1,16 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Bing (Web) Bing (Web)
@website https://www.bing.com
@provide-api yes (http://datamarket.azure.com/dataset/bing/search),
max. 5000 query/month
@using-api no (because of query limit)
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content
@todo publishedDate
""" """
import re import re
@ -21,6 +11,16 @@ from searx.utils import eval_xpath, extract_text, match_language
logger = logger.getChild('bing engine') logger = logger.getChild('bing engine')
# about
about = {
"website": 'https://www.bing.com',
"wikidata_id": 'Q182496',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = True paging = True

View File

@ -1,15 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Bing (Images) Bing (Images)
@website https://www.bing.com/images
@provide-api yes (http://datamarket.azure.com/dataset/bing/search),
max. 5000 query/month
@using-api no (because of query limit)
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, img_src
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
@ -20,6 +11,16 @@ from searx.utils import match_language
from searx.engines.bing import language_aliases from searx.engines.bing import language_aliases
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
# about
about = {
"website": 'https://www.bing.com/images',
"wikidata_id": 'Q182496',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
paging = True paging = True

View File

@ -1,14 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Bing (News) Bing (News)
@website https://www.bing.com/news
@provide-api yes (http://datamarket.azure.com/dataset/bing/search),
max. 5000 query/month
@using-api no (because of query limit)
@results RSS (using search portal)
@stable yes (except perhaps for the images)
@parse url, title, content, publishedDate, thumbnail
""" """
from datetime import datetime from datetime import datetime
@ -20,6 +12,16 @@ from searx.utils import match_language, eval_xpath_getindex
from searx.engines.bing import language_aliases from searx.engines.bing import language_aliases
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
# about
about = {
"website": 'https://www.bing.com/news',
"wikidata_id": 'Q2878637',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'RSS',
}
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']
paging = True paging = True

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Bing (Videos) Bing (Videos)
@website https://www.bing.com/videos
@provide-api yes (http://datamarket.azure.com/dataset/bing/search)
@using-api no
@results HTML
@stable no
@parse url, title, content, thumbnail
""" """
from json import loads from json import loads
@ -18,6 +11,16 @@ from searx.utils import match_language
from searx.engines.bing import language_aliases from searx.engines.bing import language_aliases
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
# about
about = {
"website": 'https://www.bing.com/videos',
"wikidata_id": 'Q4914152',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['videos'] categories = ['videos']
paging = True paging = True
safesearch = True safesearch = True

View File

@ -1,19 +1,25 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
BTDigg (Videos, Music, Files) BTDigg (Videos, Music, Files)
@website https://btdig.com
@provide-api yes (on demand)
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content, seed, leech, magnetlink
""" """
from lxml import html from lxml import html
from urllib.parse import quote, urljoin from urllib.parse import quote, urljoin
from searx.utils import extract_text, get_torrent_size from searx.utils import extract_text, get_torrent_size
# about
about = {
"website": 'https://btdig.com',
"wikidata_id": 'Q4836698',
"official_api_documentation": {
'url': 'https://btdig.com/contacts',
'comment': 'on demand'
},
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']
paging = True paging = True

View File

@ -1,18 +1,7 @@
''' # SPDX-License-Identifier: AGPL-3.0-or-later
searx is free software: you can redistribute it and/or modify """
it under the terms of the GNU Affero General Public License as published by Command (offline)
the Free Software Foundation, either version 3 of the License, or """
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
import re import re
from os.path import expanduser, isabs, realpath, commonprefix from os.path import expanduser, isabs, realpath, commonprefix

View File

@ -1,5 +1,19 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
currency convert (DuckDuckGo)
"""
import json import json
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": 'https://duckduckgo.com/api',
"use_official_api": False,
"require_api_key": False,
"results": 'JSONP',
}
engine_type = 'online_currency' engine_type = 'online_currency'
categories = [] categories = []

View File

@ -1,15 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Dailymotion (Videos) Dailymotion (Videos)
@website https://www.dailymotion.com
@provide-api yes (http://www.dailymotion.com/developer)
@using-api yes
@results JSON
@stable yes
@parse url, title, thumbnail, publishedDate, embedded
@todo set content-parameter with correct data
""" """
from json import loads from json import loads
@ -17,6 +8,16 @@ from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import match_language, html_to_text from searx.utils import match_language, html_to_text
# about
about = {
"website": 'https://www.dailymotion.com',
"wikidata_id": 'Q769222',
"official_api_documentation": 'https://www.dailymotion.com/developer',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
paging = True paging = True

View File

@ -1,18 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Deezer (Music) Deezer (Music)
@website https://deezer.com
@provide-api yes (http://developers.deezer.com/api/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content, embedded
""" """
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
# about
about = {
"website": 'https://deezer.com',
"wikidata_id": 'Q602243',
"official_api_documentation": 'https://developers.deezer.com/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
paging = True paging = True

View File

@ -1,21 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Deviantart (Images) Deviantart (Images)
@website https://www.deviantart.com/
@provide-api yes (https://www.deviantart.com/developers/) (RSS)
@using-api no (TODO, rewrite to api)
@results HTML
@stable no (HTML can change)
@parse url, title, img_src
@todo rewrite to api
""" """
# pylint: disable=missing-function-docstring # pylint: disable=missing-function-docstring
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml import html from lxml import html
# about
about = {
"website": 'https://www.deviantart.com/',
"wikidata_id": 'Q46523',
"official_api_documentation": 'https://www.deviantart.com/developers/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
paging = True paging = True

View File

@ -1,18 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Dictzone Dictzone
@website https://dictzone.com/
@provide-api no
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content
""" """
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import html from lxml import html
from searx.utils import eval_xpath from searx.utils import eval_xpath
# about
about = {
"website": 'https://dictzone.com/',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
engine_type = 'online_dictionnary' engine_type = 'online_dictionnary'
categories = ['general'] categories = ['general']

View File

@ -1,19 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
DigBT (Videos, Music, Files) DigBT (Videos, Music, Files)
@website https://digbt.org
@provide-api no
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content, magnetlink
""" """
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import html from lxml import html
from searx.utils import extract_text, get_torrent_size from searx.utils import extract_text, get_torrent_size
# about
about = {
"website": 'https://digbt.org',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']
paging = True paging = True

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Digg (News, Social media) Digg (News, Social media)
@website https://digg.com
@provide-api no
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content, publishedDate, thumbnail
""" """
# pylint: disable=missing-function-docstring # pylint: disable=missing-function-docstring
@ -17,6 +10,16 @@ from datetime import datetime
from lxml import html from lxml import html
# about
about = {
"website": 'https://digg.com',
"wikidata_id": 'Q270478',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['news', 'social media'] categories = ['news', 'social media']
paging = True paging = True

View File

@ -1,18 +1,22 @@
# Doku Wiki # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www.dokuwiki.org/ Doku Wiki
# @provide-api yes """
# (https://www.dokuwiki.org/devel:xmlrpc)
#
# @using-api no
# @results HTML
# @stable yes
# @parse (general) url, title, content
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml.html import fromstring from lxml.html import fromstring
from searx.utils import extract_text, eval_xpath from searx.utils import extract_text, eval_xpath
# about
about = {
"website": 'https://www.dokuwiki.org/',
"wikidata_id": 'Q851864',
"official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
paging = False paging = False

View File

@ -1,22 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
DuckDuckGo (Web) DuckDuckGo (Web)
@website https://duckduckgo.com/
@provide-api yes (https://duckduckgo.com/api),
but not all results from search-site
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content
@todo rewrite to api
""" """
from lxml.html import fromstring from lxml.html import fromstring
from json import loads from json import loads
from searx.utils import extract_text, match_language, eval_xpath from searx.utils import extract_text, match_language, eval_xpath
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": 'https://duckduckgo.com/api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = False paging = False

View File

@ -1,12 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
DuckDuckGo (definitions) DuckDuckGo (Instant Answer API)
- `Instant Answer API`_
- `DuckDuckGo query`_
.. _Instant Answer API: https://duckduckgo.com/api
.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1
""" """
import json import json
@ -22,6 +16,16 @@ from searx.external_urls import get_external_url, get_earth_coordinates_url, are
logger = logger.getChild('duckduckgo_definitions') logger = logger.getChild('duckduckgo_definitions')
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": 'https://duckduckgo.com/api',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
URL = 'https://api.duckduckgo.com/'\ URL = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

View File

@ -1,16 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
DuckDuckGo (Images) DuckDuckGo (Images)
@website https://duckduckgo.com/
@provide-api yes (https://duckduckgo.com/api),
but images are not supported
@using-api no
@results JSON (site requires js to get images)
@stable no (JSON can change)
@parse url, title, img_src
@todo avoid extra request
""" """
from json import loads from json import loads
@ -20,6 +10,19 @@ from searx.engines.duckduckgo import get_region_code
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
from searx.poolrequests import get from searx.poolrequests import get
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": {
'url': 'https://duckduckgo.com/api',
'comment': 'but images are not supported',
},
"use_official_api": False,
"require_api_key": False,
"results": 'JSON (site requires js to get images)',
}
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
paging = True paging = True

View File

@ -1,11 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Duden Duden
@website https://www.duden.de
@provide-api no
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content
""" """
import re import re
@ -13,6 +8,16 @@ from urllib.parse import quote, urljoin
from lxml import html from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://www.duden.de',
"wikidata_id": 'Q73624591',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['general'] categories = ['general']
paging = True paging = True
language_support = False language_support = False

View File

@ -1,11 +1,19 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Dummy Offline Dummy Offline
@results one result
@stable yes
""" """
# about
about = {
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
def search(query, request_params): def search(query, request_params):
return [{ return [{
'result': 'this is what you get', 'result': 'this is what you get',

View File

@ -1,10 +1,18 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Dummy Dummy
@results empty array
@stable yes
""" """
# about
about = {
"website": None,
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'empty array',
}
# do search-request # do search-request
def request(query, params): def request(query, params):

View File

@ -1,17 +1,22 @@
# Ebay (Videos, Music, Files) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www.ebay.com Ebay (Videos, Music, Files)
# @provide-api no (nothing found) """
#
# @using-api no
# @results HTML (using search portal)
# @stable yes (HTML can change)
# @parse url, title, content, price, shipping, source
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from urllib.parse import quote from urllib.parse import quote
# about
about = {
"website": 'https://www.ebay.com',
"wikidata_id": 'Q58024',
"official_api_documentation": 'https://developer.ebay.com/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['shopping'] categories = ['shopping']
paging = True paging = True

View File

@ -1,3 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Elasticsearch
"""
from json import loads, dumps from json import loads, dumps
from requests.auth import HTTPBasicAuth from requests.auth import HTTPBasicAuth
from searx.exceptions import SearxEngineAPIException from searx.exceptions import SearxEngineAPIException

View File

@ -1,18 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
eTools (Web) eTools (Web)
@website https://www.etools.ch
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, content
""" """
from lxml import html from lxml import html
from urllib.parse import quote from urllib.parse import quote
from searx.utils import extract_text, eval_xpath from searx.utils import extract_text, eval_xpath
# about
about = {
"website": 'https://www.etools.ch',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['general'] categories = ['general']
paging = False paging = False
language_support = False language_support = False

View File

@ -1,18 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
F-Droid (a repository of FOSS applications for Android) F-Droid (a repository of FOSS applications for Android)
@website https://f-droid.org/
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, content
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.utils import extract_text from searx.utils import extract_text
# about
about = {
"website": 'https://f-droid.org/',
"wikidata_id": 'Q1386210',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['files'] categories = ['files']
paging = True paging = True

View File

@ -1,21 +1,23 @@
#!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Flickr (Images) Flickr (Images)
@website https://www.flickr.com
@provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
@using-api yes
@results JSON
@stable yes
@parse url, title, thumbnail, img_src
More info on api-key : https://www.flickr.com/services/apps/create/ More info on api-key : https://www.flickr.com/services/apps/create/
""" """
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
# about
about = {
"website": 'https://www.flickr.com',
"wikidata_id": 'Q103204',
"official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
"use_official_api": True,
"require_api_key": True,
"results": 'JSON',
}
categories = ['images'] categories = ['images']
nb_per_page = 15 nb_per_page = 15

View File

@ -1,15 +1,6 @@
#!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Flickr (Images) Flickr (Images)
@website https://www.flickr.com
@provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
@using-api no
@results HTML
@stable no
@parse url, title, thumbnail, img_src
""" """
from json import loads from json import loads
@ -21,6 +12,16 @@ from searx.utils import ecma_unescape, html_to_text
logger = logger.getChild('flickr-noapi') logger = logger.getChild('flickr-noapi')
# about
about = {
"website": 'https://www.flickr.com',
"wikidata_id": 'Q103204',
"official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['images'] categories = ['images']
url = 'https://www.flickr.com/' url = 'https://www.flickr.com/'

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
FramaLibre (It) FramaLibre (It)
@website https://framalibre.org/
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, content, thumbnail, img_src
""" """
from html import escape from html import escape
@ -15,6 +8,16 @@ from urllib.parse import urljoin, urlencode
from lxml import html from lxml import html
from searx.utils import extract_text from searx.utils import extract_text
# about
about = {
"website": 'https://framalibre.org/',
"wikidata_id": 'Q30213882',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
paging = True paging = True

View File

@ -1,17 +1,24 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Frinkiac (Images) Frinkiac (Images)
@website https://www.frinkiac.com
@provide-api no
@using-api no
@results JSON
@stable no
@parse url, title, img_src
""" """
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
# about
about = {
"website": 'https://frinkiac.com',
"wikidata_id": 'Q24882614',
"official_api_documentation": {
'url': None,
'comment': 'see https://github.com/MitchellAW/CompuGlobal'
},
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
categories = ['images'] categories = ['images']
BASE = 'https://frinkiac.com/' BASE = 'https://frinkiac.com/'

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Genius Genius
@website https://www.genius.com/
@provide-api yes (https://docs.genius.com/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content, thumbnail, publishedDate
""" """
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from datetime import datetime from datetime import datetime
# about
about = {
"website": 'https://genius.com/',
"wikidata_id": 'Q3419343',
"official_api_documentation": 'https://docs.genius.com/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
paging = True paging = True

View File

@ -1,20 +1,22 @@
# -*- coding: utf-8 -*- # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Gentoo Wiki Gentoo Wiki
@website https://wiki.gentoo.org
@provide-api yes
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title
""" """
from urllib.parse import urlencode, urljoin from urllib.parse import urlencode, urljoin
from lxml import html from lxml import html
from searx.utils import extract_text from searx.utils import extract_text
# about
about = {
"website": 'https://wiki.gentoo.org/',
"wikidata_id": 'Q1050637',
"official_api_documentation": 'https://wiki.gentoo.org/api.php',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
language_support = True language_support = True

View File

@ -1,14 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Gigablast (Web) Gigablast (Web)
@website https://gigablast.com
@provide-api yes (https://gigablast.com/api.html)
@using-api yes
@results XML
@stable yes
@parse url, title, content
""" """
# pylint: disable=missing-function-docstring, invalid-name # pylint: disable=missing-function-docstring, invalid-name
@ -18,6 +10,16 @@ from urllib.parse import urlencode
# from searx import logger # from searx import logger
from searx.poolrequests import get from searx.poolrequests import get
# about
about = {
"website": 'https://www.gigablast.com',
"wikidata_id": 'Q3105449',
"official_api_documentation": 'https://gigablast.com/api.html',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
# gigablast's pagination is totally damaged, don't use it # gigablast's pagination is totally damaged, don't use it

View File

@ -1,18 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Github (It) Github (IT)
@website https://github.com/
@provide-api yes (https://developer.github.com/v3/)
@using-api yes
@results JSON
@stable yes (using api)
@parse url, title, content
""" """
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
# about
about = {
"website": 'https://github.com/',
"wikidata_id": 'Q364',
"official_api_documentation": 'https://developer.github.com/v3/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -1,19 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Google (Web) """Google (Web)
:website: https://www.google.com
:provide-api: yes (https://developers.google.com/custom-search/)
:using-api: not the offical, since it needs registration to another service
:results: HTML
:stable: no
:parse: url, title, content, number_of_results, answer, suggestion, correction
For detailed description of the *REST-full* API see: `Query Parameter For detailed description of the *REST-full* API see: `Query Parameter
Definitions`_. Definitions`_.
.. _Query Parameter Definitions: .. _Query Parameter Definitions:
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
""" """
# pylint: disable=invalid-name, missing-function-docstring # pylint: disable=invalid-name, missing-function-docstring
@ -27,6 +19,16 @@ from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('google engine') logger = logger.getChild('google engine')
# about
about = {
"website": 'https://www.google.com',
"wikidata_id": 'Q9366',
"official_api_documentation": 'https://developers.google.com/custom-search/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = True paging = True

View File

@ -1,14 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Google (Images) """Google (Images)
:website: https://images.google.com (redirected to subdomain www.)
:provide-api: yes (https://developers.google.com/custom-search/)
:using-api: not the offical, since it needs registration to another service
:results: HTML
:stable: no
:template: images.html
:parse: url, title, content, source, thumbnail_src, img_src
For detailed description of the *REST-full* API see: `Query Parameter For detailed description of the *REST-full* API see: `Query Parameter
Definitions`_. Definitions`_.
@ -18,10 +10,6 @@ Definitions`_.
``data:` scheme).:: ``data:` scheme).::
Header set Content-Security-Policy "img-src 'self' data: ;" Header set Content-Security-Policy "img-src 'self' data: ;"
.. _Query Parameter Definitions:
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
""" """
from urllib.parse import urlencode, urlparse, unquote from urllib.parse import urlencode, urlparse, unquote
@ -39,6 +27,16 @@ from searx.engines.google import (
logger = logger.getChild('google images') logger = logger.getChild('google images')
# about
about = {
"website": 'https://images.google.com/',
"wikidata_id": 'Q521550',
"official_api_documentation": 'https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions', # NOQA
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Google (News) Google (News)
@website https://news.google.com
@provide-api no
@using-api no
@results HTML
@stable no
@parse url, title, content, publishedDate
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
@ -15,6 +8,16 @@ from lxml import html
from searx.utils import match_language from searx.utils import match_language
from searx.engines.google import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import from searx.engines.google import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
# about
about = {
"website": 'https://news.google.com',
"wikidata_id": 'Q12020',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# search-url # search-url
categories = ['news'] categories = ['news']
paging = True paging = True

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Google (Videos) Google (Videos)
@website https://www.google.com
@provide-api yes (https://developers.google.com/custom-search/)
@using-api no
@results HTML
@stable no
@parse url, title, content, thumbnail
""" """
from datetime import date, timedelta from datetime import date, timedelta
@ -16,6 +9,16 @@ from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
import re import re
# about
about = {
"website": 'https://www.google.com',
"wikidata_id": 'Q219885',
"official_api_documentation": 'https://developers.google.com/custom-search/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
paging = True paging = True

View File

@ -1,15 +1,7 @@
# INA (Videos) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www.ina.fr/ INA (Videos)
# @provide-api no """
#
# @using-api no
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, content, publishedDate, thumbnail
#
# @todo set content-parameter with correct data
# @todo embedded (needs some md5 from video page)
from json import loads from json import loads
from html import unescape from html import unescape
@ -18,6 +10,15 @@ from lxml import html
from dateutil import parser from dateutil import parser
from searx.utils import extract_text from searx.utils import extract_text
# about
about = {
"website": 'https://www.ina.fr/',
"wikidata_id": 'Q1665109',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -1,17 +1,22 @@
# Invidious (Videos) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://invidio.us/ Invidious (Videos)
# @provide-api yes (https://github.com/omarroth/invidious/wiki/API) """
#
# @using-api yes
# @results JSON
# @stable yes
# @parse url, title, content, publishedDate, thumbnail, embedded, author, length
from urllib.parse import quote_plus from urllib.parse import quote_plus
from dateutil import parser from dateutil import parser
import time import time
# about
about = {
"website": 'https://instances.invidio.us/',
"wikidata_id": 'Q79343316',
"official_api_documentation": 'https://github.com/omarroth/invidious/wiki/API',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ["videos", "music"] categories = ["videos", "music"]
paging = True paging = True

View File

@ -1,3 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
from collections.abc import Iterable from collections.abc import Iterable
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Kickass Torrent (Videos, Music, Files) Kickass Torrent (Videos, Music, Files)
@website https://kickass.so
@provide-api no (nothing found)
@using-api no
@results HTML (using search portal)
@stable yes (HTML can change)
@parse url, title, content, seed, leech, magnetlink
""" """
from lxml import html from lxml import html
@ -15,6 +8,16 @@ from operator import itemgetter
from urllib.parse import quote, urljoin from urllib.parse import quote, urljoin
from searx.utils import extract_text, get_torrent_size, convert_str_to_int from searx.utils import extract_text, get_torrent_size, convert_str_to_int
# about
about = {
"website": 'https://kickass.so',
"wikidata_id": 'Q17062285',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']
paging = True paging = True

View File

@ -1,21 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
general mediawiki-engine (Web) General mediawiki-engine (Web)
@website websites built on mediawiki (https://www.mediawiki.org)
@provide-api yes (http://www.mediawiki.org/wiki/API:Search)
@using-api yes
@results JSON
@stable yes
@parse url, title
@todo content
""" """
from json import loads from json import loads
from string import Formatter from string import Formatter
from urllib.parse import urlencode, quote from urllib.parse import urlencode, quote
# about
about = {
"website": None,
"wikidata_id": None,
"official_api_documentation": 'http://www.mediawiki.org/wiki/API:Search',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
language_support = True language_support = True

View File

@ -1,12 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Microsoft Academic (Science) Microsoft Academic (Science)
@website https://academic.microsoft.com
@provide-api yes
@using-api no
@results JSON
@stable no
@parse url, title, content
""" """
from datetime import datetime from datetime import datetime
@ -15,6 +9,16 @@ from uuid import uuid4
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text
# about
about = {
"website": 'https://academic.microsoft.com',
"wikidata_id": 'Q28136779',
"official_api_documentation": 'http://ma-graph.org/',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
categories = ['images'] categories = ['images']
paging = True paging = True
result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}' result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Mixcloud (Music) Mixcloud (Music)
@website https://http://www.mixcloud.com/
@provide-api yes (http://www.mixcloud.com/developers/
@using-api yes
@results JSON
@stable yes
@parse url, title, content, embedded, publishedDate
""" """
from json import loads from json import loads
from dateutil import parser from dateutil import parser
from urllib.parse import urlencode from urllib.parse import urlencode
# about
about = {
"website": 'https://www.mixcloud.com/',
"wikidata_id": 'Q6883832',
"official_api_documentation": 'http://www.mixcloud.com/developers/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
paging = True paging = True

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
not Evil (Onions) not Evil (Onions)
@website http://hss3uro2hsxfogfq.onion
@provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
@using-api no
@results HTML
@stable no
@parse url, title, content
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
# about
about = {
"website": 'http://hss3uro2hsxfogfq.onion',
"wikidata_id": None,
"official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['onions'] categories = ['onions']
paging = True paging = True

View File

@ -1,18 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Nyaa.si (Anime Bittorrent tracker) Nyaa.si (Anime Bittorrent tracker)
@website https://nyaa.si/
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, content, seed, leech, torrentfile
""" """
from lxml import html from lxml import html
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import extract_text, get_torrent_size, int_or_zero from searx.utils import extract_text, get_torrent_size, int_or_zero
# about
about = {
"website": 'https://nyaa.si/',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['files', 'images', 'videos', 'music'] categories = ['files', 'images', 'videos', 'music']
paging = True paging = True

View File

@ -1,18 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Open Semantic Search Open Semantic Search
@website https://www.opensemanticsearch.org/
@provide-api yes (https://www.opensemanticsearch.org/dev)
@using-api yes
@results JSON
@stable yes
@parse url, title, content, publishedDate
""" """
from dateutil import parser from dateutil import parser
from json import loads from json import loads
from urllib.parse import quote from urllib.parse import quote
# about
about = {
"website": 'https://www.opensemanticsearch.org/',
"wikidata_id": None,
"official_api_documentation": 'https://www.opensemanticsearch.org/dev',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
base_url = 'http://localhost:8983/solr/opensemanticsearch/' base_url = 'http://localhost:8983/solr/opensemanticsearch/'
search_string = 'query?q={query}' search_string = 'query?q={query}'

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
OpenStreetMap (Map) OpenStreetMap (Map)
@website https://openstreetmap.org/
@provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
@using-api yes
@results JSON
@stable yes
@parse url, title
""" """
import re import re
from json import loads from json import loads
from flask_babel import gettext from flask_babel import gettext
# about
about = {
"website": 'https://www.openstreetmap.org/',
"wikidata_id": 'Q936',
"official_api_documentation": 'http://wiki.openstreetmap.org/wiki/Nominatim',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['map'] categories = ['map']
paging = False paging = False

View File

@ -1,18 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
PDBe (Protein Data Bank in Europe) PDBe (Protein Data Bank in Europe)
@website https://www.ebi.ac.uk/pdbe
@provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
unlimited
@using-api yes
@results python dictionary (from json)
@stable yes
@parse url, title, content, img_src
""" """
from json import loads from json import loads
from flask_babel import gettext from flask_babel import gettext
# about
about = {
"website": 'https://www.ebi.ac.uk/pdbe',
"wikidata_id": 'Q55823905',
"official_api_documentation": 'https://www.ebi.ac.uk/pdbe/api/doc/search.html',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
categories = ['science'] categories = ['science']
hide_obsolete = False hide_obsolete = False

View File

@ -1,15 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
peertube (Videos) peertube (Videos)
@website https://www.peertube.live
@provide-api yes (https://docs.joinpeertube.org/api-rest-reference.html)
@using-api yes
@results JSON
@stable yes
@parse url, title, thumbnail, publishedDate, embedded
@todo implement time range support
""" """
from json import loads from json import loads
@ -17,6 +8,16 @@ from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text
# about
about = {
"website": 'https://joinpeertube.org',
"wikidata_id": 'Q50938515',
"official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ["videos"] categories = ["videos"]
paging = True paging = True

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Photon (Map) Photon (Map)
@website https://photon.komoot.de
@provide-api yes (https://photon.komoot.de/)
@using-api yes
@results JSON
@stable yes
@parse url, title
""" """
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import searx_useragent from searx.utils import searx_useragent
# about
about = {
"website": 'https://photon.komoot.de',
"wikidata_id": None,
"official_api_documentation": 'https://photon.komoot.de/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['map'] categories = ['map']
paging = False paging = False

View File

@ -1,12 +1,7 @@
# Piratebay (Videos, Music, Files) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://thepiratebay.org Piratebay (Videos, Music, Files)
# @provide-api yes (https://apibay.org/) """
#
# @using-api yes
# @results JSON
# @stable no (the API is not documented nor versioned)
# @parse url, title, seed, leech, magnetlink, filesize, publishedDate
from json import loads from json import loads
from datetime import datetime from datetime import datetime
@ -15,6 +10,16 @@ from operator import itemgetter
from urllib.parse import quote from urllib.parse import quote
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
# about
about = {
"website": 'https://thepiratebay.org',
"wikidata_id": 'Q22663',
"official_api_documentation": 'https://apibay.org/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ["videos", "music", "files"] categories = ["videos", "music", "files"]

View File

@ -1,14 +1,6 @@
#!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
PubMed (Scholar publications) PubMed (Scholar publications)
@website https://www.ncbi.nlm.nih.gov/pubmed/
@provide-api yes (https://www.ncbi.nlm.nih.gov/home/develop/api/)
@using-api yes
@results XML
@stable yes
@parse url, title, publishedDate, content
More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/
""" """
from flask_babel import gettext from flask_babel import gettext
@ -17,6 +9,18 @@ from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.poolrequests import get from searx.poolrequests import get
# about
about = {
"website": 'https://www.ncbi.nlm.nih.gov/pubmed/',
"wikidata_id": 'Q1540899',
"official_api_documentation": {
'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/',
'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/'
},
"use_official_api": True,
"require_api_key": False,
"results": 'XML',
}
categories = ['science'] categories = ['science']

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Qwant (Web, Images, News, Social) Qwant (Web, Images, News, Social)
@website https://qwant.com/
@provide-api not officially (https://api.qwant.com/api/search/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content
""" """
from datetime import datetime from datetime import datetime
@ -17,6 +10,15 @@ from searx.utils import html_to_text, match_language
from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException
from searx.raise_for_httperror import raise_for_httperror from searx.raise_for_httperror import raise_for_httperror
# about
about = {
"website": 'https://www.qwant.com/',
"wikidata_id": 'Q14657870',
"official_api_documentation": None,
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = [] categories = []

View File

@ -1,17 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Recoll (local search engine) Recoll (local search engine)
@using-api yes
@results JSON
@stable yes
@parse url, content, size, abstract, author, mtype, subtype, time, \
filename, label, type, embedded
""" """
from datetime import date, timedelta from datetime import date, timedelta
from json import loads from json import loads
from urllib.parse import urlencode, quote from urllib.parse import urlencode, quote
# about
about = {
"website": None,
"wikidata_id": 'Q15735774',
"official_api_documentation": 'https://www.lesbonscomptes.com/recoll/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
time_range_support = True time_range_support = True

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Reddit Reddit
@website https://www.reddit.com/
@provide-api yes (https://www.reddit.com/dev/api)
@using-api yes
@results JSON
@stable yes
@parse url, title, content, thumbnail, publishedDate
""" """
import json import json
from datetime import datetime from datetime import datetime
from urllib.parse import urlencode, urljoin, urlparse from urllib.parse import urlencode, urljoin, urlparse
# about
about = {
"website": 'https://www.reddit.com/',
"wikidata_id": 'Q1136',
"official_api_documentation": 'https://www.reddit.com/dev/api',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['general', 'images', 'news', 'social media'] categories = ['general', 'images', 'news', 'social media']
page_size = 25 page_size = 25

View File

@ -1,18 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
ScanR Structures (Science) ScanR Structures (Science)
@website https://scanr.enseignementsup-recherche.gouv.fr
@provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html)
@using-api yes
@results JSON
@stable yes
@parse url, title, content, img_src
""" """
from json import loads, dumps from json import loads, dumps
from searx.utils import html_to_text from searx.utils import html_to_text
# about
about = {
"website": 'https://scanr.enseignementsup-recherche.gouv.fr',
"wikidata_id": 'Q44105684',
"official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['science'] categories = ['science']
paging = True paging = True

View File

@ -1,18 +1,20 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Searchcode (It) Searchcode (IT)
@website https://searchcode.com/
@provide-api yes (https://searchcode.com/api/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content
""" """
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
# about
about = {
"website": 'https://searchcode.com/',
"wikidata_id": None,
"official_api_documentation": 'https://searchcode.com/api/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -1,18 +1,20 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Searx (all) Searx (all)
@website https://github.com/searx/searx
@provide-api yes (https://searx.github.io/searx/dev/search_api.html)
@using-api yes
@results JSON
@stable yes (using api)
@parse url, title, content
""" """
from json import loads from json import loads
from searx.engines import categories as searx_categories from searx.engines import categories as searx_categories
# about
about = {
"website": 'https://github.com/searx/searx',
"wikidata_id": 'Q17639196',
"official_api_documentation": 'https://searx.github.io/searx/dev/search_api.html',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
categories = searx_categories.keys() categories = searx_categories.keys()

View File

@ -1,17 +1,23 @@
# SepiaSearch (Videos) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://sepiasearch.org SepiaSearch (Videos)
# @provide-api https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api """
# @using-api yes
# @results JSON
# @stable yes
# @parse url, title, content, publishedDate, thumbnail
from json import loads from json import loads
from dateutil import parser, relativedelta from dateutil import parser, relativedelta
from urllib.parse import urlencode from urllib.parse import urlencode
from datetime import datetime from datetime import datetime
# about
about = {
"website": 'https://sepiasearch.org',
"wikidata_id": None,
"official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api", # NOQA
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
categories = ['videos'] categories = ['videos']
paging = True paging = True
language_support = True language_support = True

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Soundcloud (Music) Soundcloud (Music)
@website https://soundcloud.com
@provide-api yes (https://developers.soundcloud.com/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content, publishedDate, embedded
""" """
import re import re
@ -18,6 +11,15 @@ from urllib.parse import quote_plus, urlencode
from searx import logger from searx import logger
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
# about
about = {
"website": 'https://soundcloud.com',
"wikidata_id": 'Q568769',
"official_api_documentation": 'https://developers.soundcloud.com/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Spotify (Music) Spotify (Music)
@website https://spotify.com
@provide-api yes (https://developer.spotify.com/web-api/search-item/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content, embedded
""" """
from json import loads from json import loads
@ -15,6 +8,16 @@ from urllib.parse import urlencode
import requests import requests
import base64 import base64
# about
about = {
"website": 'https://www.spotify.com',
"wikidata_id": 'Q689141',
"official_api_documentation": 'https://developer.spotify.com/web-api/search-item/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
paging = True paging = True

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Stackoverflow (It) Stackoverflow (IT)
@website https://stackoverflow.com/
@provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, content
""" """
from urllib.parse import urlencode, urljoin, urlparse from urllib.parse import urlencode, urljoin, urlparse
@ -15,6 +8,16 @@ from lxml import html
from searx.utils import extract_text from searx.utils import extract_text
from searx.exceptions import SearxEngineCaptchaException from searx.exceptions import SearxEngineCaptchaException
# about
about = {
"website": 'https://stackoverflow.com/',
"wikidata_id": 'Q549037',
"official_api_documentation": 'https://api.stackexchange.com/docs',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
paging = True paging = True

View File

@ -1,14 +1,7 @@
# Startpage (Web) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://startpage.com Startpage (Web)
# @provide-api no (nothing found) """
#
# @using-api no
# @results HTML
# @stable no (HTML can change)
# @parse url, title, content
#
# @todo paging
from lxml import html from lxml import html
from dateutil import parser from dateutil import parser
@ -19,6 +12,16 @@ from babel import Locale
from babel.localedata import locale_identifiers from babel.localedata import locale_identifiers
from searx.utils import extract_text, eval_xpath, match_language from searx.utils import extract_text, eval_xpath, match_language
# about
about = {
"website": 'https://startpage.com',
"wikidata_id": 'Q2333295',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
# there is a mechanism to block "bot" search # there is a mechanism to block "bot" search

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Tokyo Toshokan (A BitTorrent Library for Japanese Media) Tokyo Toshokan (A BitTorrent Library for Japanese Media)
@website https://www.tokyotosho.info/
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, publishedDate, seed, leech,
filesize, magnetlink, content
""" """
import re import re
@ -16,6 +9,16 @@ from lxml import html
from datetime import datetime from datetime import datetime
from searx.utils import extract_text, get_torrent_size, int_or_zero from searx.utils import extract_text, get_torrent_size, int_or_zero
# about
about = {
"website": 'https://www.tokyotosho.info/',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['files', 'videos', 'music'] categories = ['files', 'videos', 'music']
paging = True paging = True

View File

@ -1,14 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Torrentz2.is (BitTorrent meta-search engine) Torrentz2.is (BitTorrent meta-search engine)
@website https://torrentz2.is/
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change, although unlikely,
see https://torrentz.is/torrentz.btsearch)
@parse url, title, publishedDate, seed, leech, filesize, magnetlink
""" """
import re import re
@ -17,6 +9,16 @@ from lxml import html
from datetime import datetime from datetime import datetime
from searx.utils import extract_text, get_torrent_size from searx.utils import extract_text, get_torrent_size
# about
about = {
"website": 'https://torrentz2.is/',
"wikidata_id": 'Q1156687',
"official_api_documentation": 'https://torrentz.is/torrentz.btsearch',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['files', 'videos', 'music'] categories = ['files', 'videos', 'music']
paging = True paging = True

View File

@ -1,14 +1,18 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
MyMemory Translated MyMemory Translated
@website https://mymemory.translated.net/
@provide-api yes (https://mymemory.translated.net/doc/spec.php)
@using-api yes
@results JSON
@stable yes
@parse url, title, content
""" """
# about
about = {
"website": 'https://mymemory.translated.net/',
"wikidata_id": None,
"official_api_documentation": 'https://mymemory.translated.net/doc/spec.php',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
engine_type = 'online_dictionnary' engine_type = 'online_dictionnary'
categories = ['general'] categories = ['general']
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'

View File

@ -1,18 +1,21 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Unsplash Unsplash
@website https://unsplash.com
@provide-api yes (https://unsplash.com/developers)
@using-api no
@results JSON (using search portal's infiniscroll API)
@stable no (JSON format could change any time)
@parse url, title, img_src, thumbnail_src
""" """
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
from json import loads from json import loads
# about
about = {
"website": 'https://unsplash.com',
"wikidata_id": 'Q28233552',
"official_api_documentation": 'https://unsplash.com/developers',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
url = 'https://unsplash.com/' url = 'https://unsplash.com/'
search_url = url + 'napi/search/photos?' search_url = url + 'napi/search/photos?'
categories = ['images'] categories = ['images']

View File

@ -1,21 +1,22 @@
# Vimeo (Videos) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://vimeo.com/ Wikipedia (Web
# @provide-api yes (http://developer.vimeo.com/api), """
# they have a maximum count of queries/hour
#
# @using-api no (TODO, rewrite to api)
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, publishedDate, thumbnail, embedded
#
# @todo rewrite to api
# @todo set content-parameter with correct data
from urllib.parse import urlencode from urllib.parse import urlencode
from json import loads from json import loads
from dateutil import parser from dateutil import parser
# about
about = {
"website": 'https://vimeo.com/',
"wikidata_id": 'Q156376',
"official_api_documentation": 'http://developer.vimeo.com/api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
paging = True paging = True

View File

@ -1,14 +1,6 @@
# -*- coding: utf-8 -*- # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Wikidata Wikidata
@website https://wikidata.org
@provide-api yes (https://query.wikidata.org/)
@using-api yes
@results JSON
@stable yes
@parse url, infobox
""" """
@ -27,6 +19,16 @@ from searx.engines.wikipedia import _fetch_supported_languages, supported_langua
logger = logger.getChild('wikidata') logger = logger.getChild('wikidata')
# about
about = {
"website": 'https://wikidata.org/',
"wikidata_id": 'Q2013',
"official_api_documentation": 'https://query.wikidata.org/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# SPARQL # SPARQL
SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql' SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain' SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'

View File

@ -1,13 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Wikipedia (Web) Wikipedia (Web)
@website https://en.wikipedia.org/api/rest_v1/
@provide-api yes
@using-api yes
@results JSON
@stable yes
@parse url, infobox
""" """
from urllib.parse import quote from urllib.parse import quote
@ -16,6 +9,16 @@ from lxml.html import fromstring
from searx.utils import match_language, searx_useragent from searx.utils import match_language, searx_useragent
from searx.raise_for_httperror import raise_for_httperror from searx.raise_for_httperror import raise_for_httperror
# about
about = {
"website": 'https://www.wikipedia.org/',
"wikidata_id": 'Q52',
"official_api_documentation": 'https://en.wikipedia.org/api/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# search-url # search-url
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'

View File

@ -1,16 +1,21 @@
# Wolfram Alpha (Science) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www.wolframalpha.com Wolfram|Alpha (Science)
# @provide-api yes (https://api.wolframalpha.com/v2/) """
#
# @using-api yes
# @results XML
# @stable yes
# @parse url, infobox
from lxml import etree from lxml import etree
from urllib.parse import urlencode from urllib.parse import urlencode
# about
about = {
"website": 'https://www.wolframalpha.com',
"wikidata_id": 'Q207006',
"official_api_documentation": 'https://products.wolframalpha.com/api/',
"use_official_api": True,
"require_api_key": False,
"results": 'XML',
}
# search-url # search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
site_url = 'https://www.wolframalpha.com/input/?{query}' site_url = 'https://www.wolframalpha.com/input/?{query}'

View File

@ -1,12 +1,7 @@
# Wolfram|Alpha (Science) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www.wolframalpha.com/ Wolfram|Alpha (Science)
# @provide-api yes (https://api.wolframalpha.com/v2/) """
#
# @using-api no
# @results JSON
# @stable no
# @parse url, infobox
from json import loads from json import loads
from time import time from time import time
@ -14,6 +9,16 @@ from urllib.parse import urlencode
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
# about
about = {
"website": 'https://www.wolframalpha.com/',
"wikidata_id": 'Q207006',
"official_api_documentation": 'https://products.wolframalpha.com/api/',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
# search-url # search-url
url = 'https://www.wolframalpha.com/' url = 'https://www.wolframalpha.com/'

View File

@ -1,19 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
1x (Images) 1x (Images)
@website http://1x.com/
@provide-api no
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title, thumbnail
""" """
from lxml import html, etree from lxml import html, etree
from urllib.parse import urlencode, urljoin from urllib.parse import urlencode, urljoin
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://1x.com/',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
paging = False paging = False

View File

@ -1,3 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
from lxml import html from lxml import html
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list

View File

@ -1,16 +1,7 @@
# Yacy (Web, Images, Videos, Music, Files) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website http://yacy.net Yacy (Web, Images, Videos, Music, Files)
# @provide-api yes """
# (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
#
# @using-api yes
# @results JSON
# @stable yes
# @parse (general) url, title, content, publishedDate
# @parse (images) url, title, img_src
#
# @todo parse video, audio and file results
from json import loads from json import loads
from dateutil import parser from dateutil import parser
@ -20,6 +11,16 @@ from requests.auth import HTTPDigestAuth
from searx.utils import html_to_text from searx.utils import html_to_text
# about
about = {
"website": 'https://yacy.net/',
"wikidata_id": 'Q1759675',
"official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['general', 'images'] # TODO , 'music', 'videos', 'files' categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
paging = True paging = True

View File

@ -1,20 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Yahoo (Web) Yahoo (Web)
@website https://search.yahoo.com/web
@provide-api yes (https://developer.yahoo.com/boss/search/),
$0.80/1000 queries
@using-api no (because pricing)
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content, suggestion
""" """
from urllib.parse import unquote, urlencode from urllib.parse import unquote, urlencode
from lxml import html from lxml import html
from searx.utils import extract_text, extract_url, match_language, eval_xpath from searx.utils import extract_text, extract_url, match_language, eval_xpath
# about
about = {
"website": 'https://search.yahoo.com/',
"wikidata_id": None,
"official_api_documentation": 'https://developer.yahoo.com/api/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = True paging = True

View File

@ -1,13 +1,7 @@
# Yahoo (News) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://news.yahoo.com Yahoo (News)
# @provide-api yes (https://developer.yahoo.com/boss/search/) """
# $0.80/1000 queries
#
# @using-api no (because pricing)
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, content, publishedDate
import re import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -18,6 +12,16 @@ from searx.engines.yahoo import _fetch_supported_languages, supported_languages_
from dateutil import parser from dateutil import parser
from searx.utils import extract_text, extract_url, match_language from searx.utils import extract_text, extract_url, match_language
# about
about = {
"website": 'https://news.yahoo.com',
"wikidata_id": 'Q3044717',
"official_api_documentation": 'https://developer.yahoo.com/api/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']
paging = True paging = True

View File

@ -1,12 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""" """
Yahoo (Web) Yahoo (Web)
@website https://yandex.ru/
@provide-api ?
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content
""" """
from urllib.parse import urlencode, urlparse from urllib.parse import urlencode, urlparse
@ -16,6 +10,16 @@ from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('yandex engine') logger = logger.getChild('yandex engine')
# about
about = {
"website": 'https://yandex.ru/',
"wikidata_id": 'Q5281',
"official_api_documentation": "?",
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = True paging = True

View File

@ -1,12 +1,7 @@
# Yggtorrent (Videos, Music, Files) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www2.yggtorrent.si Yggtorrent (Videos, Music, Files)
# @provide-api no (nothing found) """
#
# @using-api no
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, seed, leech, publishedDate, filesize
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
@ -15,6 +10,16 @@ from urllib.parse import quote
from searx.utils import extract_text, get_torrent_size from searx.utils import extract_text, get_torrent_size
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
# about
about = {
"website": 'https://www2.yggtorrent.si',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']
paging = True paging = True

View File

@ -1,18 +1,23 @@
# Youtube (Videos) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www.youtube.com/ Youtube (Videos)
# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) """
#
# @using-api yes
# @results JSON
# @stable yes
# @parse url, title, content, publishedDate, thumbnail, embedded
from json import loads from json import loads
from dateutil import parser from dateutil import parser
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.exceptions import SearxEngineAPIException from searx.exceptions import SearxEngineAPIException
# about
about = {
"website": 'https://www.youtube.com/',
"wikidata_id": 'Q866',
"official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config # engine dependent config
categories = ['videos', 'music'] categories = ['videos', 'music']
paging = False paging = False

View File

@ -1,17 +1,22 @@
# Youtube (Videos) # SPDX-License-Identifier: AGPL-3.0-or-later
# """
# @website https://www.youtube.com/ Youtube (Videos)
# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) """
#
# @using-api no
# @results HTML
# @stable no
# @parse url, title, content, publishedDate, thumbnail, embedded
from functools import reduce from functools import reduce
from json import loads from json import loads
from urllib.parse import quote_plus from urllib.parse import quote_plus
# about
about = {
"website": 'https://www.youtube.com/',
"wikidata_id": 'Q866',
"official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config # engine dependent config
categories = ['videos', 'music'] categories = ['videos', 'music']
paging = True paging = True

View File

@ -157,6 +157,13 @@ engines:
timeout : 7.0 timeout : 7.0
disabled : True disabled : True
shortcut : ai shortcut : ai
about:
website: https://archive.is/
wikidata_id: Q13515725
official_api_documentation: http://mementoweb.org/depot/native/archiveis/
use_official_api: false
require_api_key: false
results: HTML
- name : arxiv - name : arxiv
engine : arxiv engine : arxiv
@ -201,6 +208,13 @@ engines:
timeout : 4.0 timeout : 4.0
disabled : True disabled : True
shortcut : bb shortcut : bb
about:
website: https://bitbucket.org/
wikidata_id: Q2493781
official_api_documentation: https://developer.atlassian.com/bitbucket
use_official_api: false
require_api_key: false
results: HTML
- name : btdigg - name : btdigg
engine : btdigg engine : btdigg
@ -216,6 +230,13 @@ engines:
categories : videos categories : videos
disabled : True disabled : True
shortcut : c3tv shortcut : c3tv
about:
website: https://media.ccc.de/
wikidata_id: Q80729951
official_api_documentation: https://github.com/voc/voctoweb
use_official_api: false
require_api_key: false
results: HTML
- name : crossref - name : crossref
engine : json_engine engine : json_engine
@ -226,6 +247,13 @@ engines:
content_query : fullCitation content_query : fullCitation
categories : science categories : science
shortcut : cr shortcut : cr
about:
website: https://www.crossref.org/
wikidata_id: Q5188229
official_api_documentation: https://github.com/CrossRef/rest-api-doc
use_official_api: false
require_api_key: false
results: JSON
- name : currency - name : currency
engine : currency_convert engine : currency_convert
@ -271,6 +299,13 @@ engines:
categories : general categories : general
shortcut : ew shortcut : ew
disabled : True disabled : True
about:
website: https://www.erowid.org/
wikidata_id: Q1430691
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
# - name : elasticsearch # - name : elasticsearch
# shortcut : es # shortcut : es
@ -321,6 +356,13 @@ engines:
first_page_num : 1 first_page_num : 1
shortcut : et shortcut : et
disabled : True disabled : True
about:
website: https://www.etymonline.com/
wikidata_id: Q1188617
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
# - name : ebay # - name : ebay
# engine : ebay # engine : ebay
@ -360,6 +402,9 @@ engines:
search_type : title search_type : title
timeout : 5.0 timeout : 5.0
disabled : True disabled : True
about:
website: https://directory.fsf.org/
wikidata_id: Q2470288
- name : frinkiac - name : frinkiac
engine : frinkiac engine : frinkiac
@ -394,6 +439,13 @@ engines:
shortcut : gl shortcut : gl
timeout : 10.0 timeout : 10.0
disabled : True disabled : True
about:
website: https://about.gitlab.com/
wikidata_id: Q16639197
official_api_documentation: https://docs.gitlab.com/ee/api/
use_official_api: false
require_api_key: false
results: JSON
- name : github - name : github
engine : github engine : github
@ -411,6 +463,13 @@ engines:
categories : it categories : it
shortcut : cb shortcut : cb
disabled : True disabled : True
about:
website: https://codeberg.org/
wikidata_id:
official_api_documentation: https://try.gitea.io/api/swagger
use_official_api: false
require_api_key: false
results: JSON
- name : google - name : google
engine : google engine : google
@ -441,6 +500,13 @@ engines:
first_page_num : 0 first_page_num : 0
categories : science categories : science
shortcut : gos shortcut : gos
about:
website: https://scholar.google.com/
wikidata_id: Q494817
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : google play apps - name : google play apps
engine : xpath engine : xpath
@ -453,6 +519,13 @@ engines:
categories : files categories : files
shortcut : gpa shortcut : gpa
disabled : True disabled : True
about:
website: https://play.google.com/
wikidata_id: Q79576
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : google play movies - name : google play movies
engine : xpath engine : xpath
@ -465,6 +538,13 @@ engines:
categories : videos categories : videos
shortcut : gpm shortcut : gpm
disabled : True disabled : True
about:
website: https://play.google.com/
wikidata_id: Q79576
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : google play music - name : google play music
engine : xpath engine : xpath
@ -477,6 +557,13 @@ engines:
categories : music categories : music
shortcut : gps shortcut : gps
disabled : True disabled : True
about:
website: https://play.google.com/
wikidata_id: Q79576
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : geektimes - name : geektimes
engine : xpath engine : xpath
@ -489,6 +576,13 @@ engines:
timeout : 4.0 timeout : 4.0
disabled : True disabled : True
shortcut : gt shortcut : gt
about:
website: https://geektimes.ru/
wikidata_id: Q50572423
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : habrahabr - name : habrahabr
engine : xpath engine : xpath
@ -501,6 +595,13 @@ engines:
timeout : 4.0 timeout : 4.0
disabled : True disabled : True
shortcut : habr shortcut : habr
about:
website: https://habr.com/
wikidata_id: Q4494434
official_api_documentation: https://habr.com/en/docs/help/api/
use_official_api: false
require_api_key: false
results: HTML
- name : hoogle - name : hoogle
engine : json_engine engine : json_engine
@ -513,6 +614,13 @@ engines:
page_size : 20 page_size : 20
categories : it categories : it
shortcut : ho shortcut : ho
about:
website: https://www.haskell.org/
wikidata_id: Q34010
official_api_documentation: https://hackage.haskell.org/api
use_official_api: false
require_api_key: false
results: JSON
- name : ina - name : ina
engine : ina engine : ina
@ -543,6 +651,13 @@ engines:
timeout : 7.0 timeout : 7.0
disabled : True disabled : True
shortcut : lg shortcut : lg
about:
website: http://libgen.rs/
wikidata_id: Q22017206
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : lobste.rs - name : lobste.rs
engine : xpath engine : xpath
@ -555,6 +670,13 @@ engines:
shortcut : lo shortcut : lo
timeout : 3.0 timeout : 3.0
disabled: True disabled: True
about:
website: https://lobste.rs/
wikidata_id: Q60762874
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : metager - name : metager
engine : xpath engine : xpath
@ -566,6 +688,13 @@ engines:
categories : general categories : general
shortcut : mg shortcut : mg
disabled : True disabled : True
about:
website: https://metager.org/
wikidata_id: Q1924645
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : microsoft academic - name : microsoft academic
engine : microsoft_academic engine : microsoft_academic
@ -589,6 +718,13 @@ engines:
disabled: True disabled: True
timeout: 5.0 timeout: 5.0
shortcut : npm shortcut : npm
about:
website: https://npms.io/
wikidata_id: Q7067518
official_api_documentation: https://api-docs.npms.io/
use_official_api: false
require_api_key: false
results: JSON
# Requires Tor # Requires Tor
- name : not evil - name : not evil
@ -617,6 +753,13 @@ engines:
categories : science categories : science
shortcut : oad shortcut : oad
timeout: 5.0 timeout: 5.0
about:
website: https://www.openaire.eu/
wikidata_id: Q25106053
official_api_documentation: https://api.openaire.eu/
use_official_api: false
require_api_key: false
results: JSON
- name : openairepublications - name : openairepublications
engine : json_engine engine : json_engine
@ -629,6 +772,13 @@ engines:
categories : science categories : science
shortcut : oap shortcut : oap
timeout: 5.0 timeout: 5.0
about:
website: https://www.openaire.eu/
wikidata_id: Q25106053
official_api_documentation: https://api.openaire.eu/
use_official_api: false
require_api_key: false
results: JSON
# - name : opensemanticsearch # - name : opensemanticsearch
# engine : opensemantic # engine : opensemantic
@ -650,6 +800,13 @@ engines:
timeout : 4.0 timeout : 4.0
disabled : True disabled : True
shortcut : or shortcut : or
about:
website: https://openrepos.net/
wikidata_id:
official_api_documentation:
use_official_api: false
require_api_key: false
results: HTML
- name : pdbe - name : pdbe
engine : pdbe engine : pdbe
@ -768,6 +925,13 @@ engines:
content_xpath : .//div[@class="search-result-abstract"] content_xpath : .//div[@class="search-result-abstract"]
shortcut : se shortcut : se
categories : science categories : science
about:
website: https://www.semanticscholar.org/
wikidata_id: Q22908627
official_api_documentation: https://api.semanticscholar.org/
use_official_api: false
require_api_key: false
results: HTML
# Spotify needs API credentials # Spotify needs API credentials
# - name : spotify # - name : spotify
@ -876,6 +1040,9 @@ engines:
number_of_results : 5 number_of_results : 5
search_type : text search_type : text
disabled : True disabled : True
about:
website: https://www.wikibooks.org/
wikidata_id: Q367
- name : wikinews - name : wikinews
engine : mediawiki engine : mediawiki
@ -885,6 +1052,9 @@ engines:
number_of_results : 5 number_of_results : 5
search_type : text search_type : text
disabled : True disabled : True
about:
website: https://www.wikinews.org/
wikidata_id: Q964
- name : wikiquote - name : wikiquote
engine : mediawiki engine : mediawiki
@ -896,6 +1066,9 @@ engines:
disabled : True disabled : True
additional_tests: additional_tests:
rosebud: *test_rosebud rosebud: *test_rosebud
about:
website: https://www.wikiquote.org/
wikidata_id: Q369
- name : wikisource - name : wikisource
engine : mediawiki engine : mediawiki
@ -905,6 +1078,9 @@ engines:
number_of_results : 5 number_of_results : 5
search_type : text search_type : text
disabled : True disabled : True
about:
website: https://www.wikisource.org/
wikidata_id: Q263
- name : wiktionary - name : wiktionary
engine : mediawiki engine : mediawiki
@ -914,6 +1090,9 @@ engines:
number_of_results : 5 number_of_results : 5
search_type : text search_type : text
disabled : True disabled : True
about:
website: https://www.wiktionary.org/
wikidata_id: Q151
- name : wikiversity - name : wikiversity
engine : mediawiki engine : mediawiki
@ -923,6 +1102,9 @@ engines:
number_of_results : 5 number_of_results : 5
search_type : text search_type : text
disabled : True disabled : True
about:
website: https://www.wikiversity.org/
wikidata_id: Q370
- name : wikivoyage - name : wikivoyage
engine : mediawiki engine : mediawiki
@ -932,6 +1114,9 @@ engines:
number_of_results : 5 number_of_results : 5
search_type : text search_type : text
disabled : True disabled : True
about:
website: https://www.wikivoyage.org/
wikidata_id: Q373
- name : wolframalpha - name : wolframalpha
shortcut : wa shortcut : wa
@ -979,6 +1164,13 @@ engines:
first_page_num : 0 first_page_num : 0
page_size : 10 page_size : 10
disabled : True disabled : True
about:
website: https://www.seznam.cz/
wikidata_id: Q3490485
official_api_documentation: https://api.sklik.cz/
use_official_api: false
require_api_key: false
results: HTML
- name : mojeek - name : mojeek
shortcut: mjk shortcut: mjk
@ -993,6 +1185,13 @@ engines:
first_page_num : 0 first_page_num : 0
page_size : 10 page_size : 10
disabled : True disabled : True
about:
website: https://www.mojeek.com/
wikidata_id: Q60747299
official_api_documentation: https://www.mojeek.com/services/api.html/
use_official_api: false
require_api_key: false
results: HTML
- name : naver - name : naver
shortcut: nvr shortcut: nvr
@ -1007,6 +1206,13 @@ engines:
first_page_num : 1 first_page_num : 1
page_size : 10 page_size : 10
disabled : True disabled : True
about:
website: https://www.naver.com/
wikidata_id: Q485639
official_api_documentation: https://developers.naver.com/docs/nmt/examples/
use_official_api: false
require_api_key: false
results: HTML
- name : rubygems - name : rubygems
shortcut: rbg shortcut: rbg
@ -1021,6 +1227,13 @@ engines:
first_page_num : 1 first_page_num : 1
categories: it categories: it
disabled : True disabled : True
about:
website: https://rubygems.org/
wikidata_id: Q1853420
official_api_documentation: https://guides.rubygems.org/rubygems-org-api/
use_official_api: false
require_api_key: false
results: HTML
- name : peertube - name : peertube
engine: peertube engine: peertube

View File

@ -0,0 +1,206 @@
#!/usr/bin/env python
import sys
import json
from urllib.parse import quote, urlparse
from os.path import realpath, dirname
import cld3
from lxml.html import fromstring
# set path
sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
from searx.engines.wikidata import send_wikidata_query
from searx.utils import extract_text
import searx
import searx.search
import searx.poolrequests
SPARQL_WIKIPEDIA_ARTICLE = """
SELECT DISTINCT ?item ?name
WHERE {
VALUES ?item { %IDS% }
?article schema:about ?item ;
schema:inLanguage ?lang ;
schema:name ?name ;
schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] .
FILTER(?lang in (%LANGUAGES_SPARQL%)) .
FILTER (!CONTAINS(?name, ':')) .
}
"""
SPARQL_DESCRIPTION = """
SELECT DISTINCT ?item ?itemDescription
WHERE {
VALUES ?item { %IDS% }
?item schema:description ?itemDescription .
FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%))
}
ORDER BY ?itemLang
"""
LANGUAGES = searx.settings['locales'].keys()
LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
IDS = None
descriptions = {}
wd_to_engine_name = {}
def normalize_description(description):
for c in [chr(c) for c in range(0, 31)]:
description = description.replace(c, ' ')
description = ' '.join(description.strip().split())
return description
def update_description(engine_name, lang, description, source, replace=True):
if replace or lang not in descriptions[engine_name]:
descriptions[engine_name][lang] = [normalize_description(description), source]
def get_wikipedia_summary(language, pageid):
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
url = search_url.format(title=quote(pageid), language=language)
try:
response = searx.poolrequests.get(url)
response.raise_for_status()
api_result = json.loads(response.text)
return api_result.get('extract')
except:
return None
def detect_language(text):
r = cld3.get_language(str(text)) # pylint: disable=E1101
if r is not None and r.probability >= 0.98 and r.is_reliable:
return r.language
return None
def get_website_description(url, lang1, lang2=None):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'DNT': '1',
'Upgrade-Insecure-Requests': '1',
'Sec-GPC': '1',
'Cache-Control': 'max-age=0',
}
if lang1 is not None:
lang_list = [lang1]
if lang2 is not None:
lang_list.append(lang2)
headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
try:
response = searx.poolrequests.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception:
return (None, None)
try:
html = fromstring(response.text)
except ValueError:
html = fromstring(response.content)
description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content'))
if not description:
description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content'))
if not description:
description = extract_text(html.xpath('/html/head/title'))
lang = extract_text(html.xpath('/html/@lang'))
if lang is None and len(lang1) > 0:
lang = lang1
lang = detect_language(description) or lang or 'en'
lang = lang.split('_')[0]
lang = lang.split('-')[0]
return (lang, description)
def initialize():
global descriptions, wd_to_engine_name, IDS
searx.search.initialize()
for engine_name, engine in searx.engines.engines.items():
descriptions[engine_name] = {}
wikidata_id = getattr(engine, "about", {}).get('wikidata_id')
if wikidata_id is not None:
wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name)
IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys())))
def fetch_wikidata_descriptions():
global IDS
result = send_wikidata_query(SPARQL_DESCRIPTION
.replace('%IDS%', IDS)
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
if result is not None:
for binding in result['results']['bindings']:
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
lang = binding['itemDescription']['xml:lang']
description = binding['itemDescription']['value']
if ' ' in description: # skip unique word description (like "website")
for engine_name in wd_to_engine_name[wikidata_id]:
update_description(engine_name, lang, description, 'wikidata')
def fetch_wikipedia_descriptions():
global IDS
result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE
.replace('%IDS%', IDS)
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
if result is not None:
for binding in result['results']['bindings']:
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
lang = binding['name']['xml:lang']
pageid = binding['name']['value']
description = get_wikipedia_summary(lang, pageid)
if description is not None and ' ' in description:
for engine_name in wd_to_engine_name[wikidata_id]:
update_description(engine_name, lang, description, 'wikipedia')
def normalize_url(url):
url = url.replace('{language}', 'en')
url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl()
url = url.replace('https://api.', 'https://')
return url
def fetch_website_description(engine_name, website):
default_lang, default_description = get_website_description(website, None, None)
if default_lang is None or default_description is None:
return
if default_lang not in descriptions[engine_name]:
descriptions[engine_name][default_lang] = [normalize_description(default_description), website]
for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'):
if request_lang.split('-')[0] not in descriptions[engine_name]:
lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0])
if desc is not None and desc != default_description:
update_description(engine_name, lang, desc, website, replace=False)
else:
break
def fetch_website_descriptions():
for engine_name, engine in searx.engines.engines.items():
website = getattr(engine, "about", {}).get('website')
if website is None:
website = normalize_url(getattr(engine, "search_url"))
if website is None:
website = normalize_url(getattr(engine, "base_url"))
if website is not None:
fetch_website_description(engine_name, website)
def main():
initialize()
fetch_wikidata_descriptions()
fetch_wikipedia_descriptions()
fetch_website_descriptions()
sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False))
if __name__ == "__main__":
main()