searxng/searx/engines/duckduckgo.py

# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""DuckDuckGo Lite
"""

from json import loads

from lxml.html import fromstring

from searx.utils import (
    dict_subset,
    eval_xpath,
    eval_xpath_getindex,
    extract_text,
    match_language,
)
from searx.network import get

# about
about = {
    "website": 'https://lite.duckduckgo.com/lite',
    "wikidata_id": 'Q12805',
    "official_api_documentation": 'https://duckduckgo.com/api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

# engine dependent config
categories = ['general']
paging = True
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
time_range_support = True

language_aliases = {
    'ar-SA': 'ar-XA',
    'es-419': 'es-XL',
    'ja': 'jp-JP',
    'ko': 'kr-KR',
    'sl-SI': 'sl-SL',
    'zh-TW': 'tzh-TW',
    'zh-HK': 'tzh-HK'
}

time_range_dict = {
    'day': 'd',
    'week': 'w',
    'month': 'm',
    'year': 'y'
}

# search-url
url = 'https://lite.duckduckgo.com/lite'
url_ping = 'https://duckduckgo.com/t/sl_l'

# match query's language to a region code that duckduckgo will accept
def get_region_code(lang, lang_list=None):
    if lang == 'all':
        return None

    lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')
    lang_parts = lang_code.split('-')

    # country code goes first
    return lang_parts[1].lower() + '-' + lang_parts[0].lower()


def request(query, params):

    params['url'] = url
    params['method'] = 'POST'

    params['data']['q'] = query

    # The API is not documented, so we do some reverse engineering and emulate
    # what https://lite.duckduckgo.com/lite/ does when you press "next Page"
    # link again and again ..

    params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'

    # initial page does not have an offset
    if params['pageno'] == 2:
        # second page does have an offset of 30
        offset = (params['pageno'] - 1) * 30
        params['data']['s'] = offset
        params['data']['dc'] = offset + 1

    elif params['pageno'] > 2:
        # third and following pages do have an offset of 30 + n*50
        offset = 30 + (params['pageno'] - 2) * 50
        params['data']['s'] = offset
        params['data']['dc'] = offset + 1

    # initial page does not have additional data in the input form
    if params['pageno'] > 1:
        # request the second page (and more pages) needs 'o' and 'api' arguments
        params['data']['o'] = 'json'
        params['data']['api'] = 'd.js'

    # initial page does not have additional data in the input form
    if params['pageno'] > 2:
        # request the third page (and more pages) some more arguments
        params['data']['nextParams'] = ''
        params['data']['v'] = ''
        params['data']['vqd'] = ''

    region_code = get_region_code(params['language'], supported_languages)
    if region_code:
        params['data']['kl'] = region_code
        params['cookies']['kl'] = region_code

    params['data']['df'] = ''
    if params['time_range'] in time_range_dict:
        params['data']['df'] = time_range_dict[params['time_range']]
        params['cookies']['df'] = time_range_dict[params['time_range']]

    logger.debug("param data: %s", params['data'])
    logger.debug("param cookies: %s", params['cookies'])
    return params

# get response from search-request
def response(resp):

    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
    get(url_ping, headers=headers_ping)

    if resp.status_code == 303:
        return []

    results = []
    doc = fromstring(resp.text)

    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
    if not len(result_table) >= 3:
        # no more results
        return []
    result_table = result_table[2]

    tr_rows = eval_xpath(result_table, './/tr')

    # In the last <tr> is the form of the 'previous/next page' links
    tr_rows = tr_rows[:-1]

    len_tr_rows = len(tr_rows)
    offset = 0

    while len_tr_rows >= offset + 4:

        # assemble table rows we need to scrap
        tr_title = tr_rows[offset]
        tr_content = tr_rows[offset + 1]
        offset += 4

        # ignore sponsored Adds <tr class="result-sponsored">
        if tr_content.get('class') == 'result-sponsored':
            continue

        a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
        if a_tag is None:
            continue

        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
        if td_content is None:
            continue

        results.append({
            'title': a_tag.text_content(),
            'content': extract_text(td_content),
            'url': a_tag.get('href'),
        })

    return results

# get supported languages from their site
def _fetch_supported_languages(resp):

    # response is a js file with regions as an embedded object
    response_page = resp.text
    response_page = response_page[response_page.find('regions:{') + 8:]
    response_page = response_page[:response_page.find('}') + 1]

    regions_json = loads(response_page)
    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())

    return list(supported_languages)
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`# SPDX-License-Identifier: AGPL-3.0-or-later`
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`# lint: pylint`
			`"""DuckDuckGo Lite`
update versions.cfg to use the current up-to-date packages 2015-05-02 15:45:17 +02:00			`"""`
rewrite duckduckgo engine and add comments 2014-09-02 17:14:57 +02:00
[mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. 2016-11-06 03:51:38 +01:00			`from json import loads`
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00
			`from lxml.html import fromstring`

			`from searx.utils import (`
			`dict_subset,`
			`eval_xpath,`
			`eval_xpath_getindex,`
			`extract_text,`
			`match_language,`
			`)`
[httpx] replace searx.poolrequests by searx.network settings.yml: * outgoing.networks: * can contains network definition * propertiers: enable_http, verify, http2, max_connections, max_keepalive_connections, keepalive_expiry, local_addresses, support_ipv4, support_ipv6, proxies, max_redirects, retries * retries: 0 by default, number of times searx retries to send the HTTP request (using different IP & proxy each time) * local_addresses can be "192.168.0.1/24" (it supports IPv6) * support_ipv4 & support_ipv6: both True by default see https://github.com/searx/searx/pull/1034 * each engine can define a "network" section: * either a full network description * either reference an existing network * all HTTP requests of engine use the same HTTP configuration (it was not the case before, see proxy configuration in master) 2021-04-05 10:43:33 +02:00			`from searx.network import get`
[enh] initial commit 2013-10-14 23:09:13 +02:00
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`# about`
			`about = {`
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`"website": 'https://lite.duckduckgo.com/lite',`
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`"wikidata_id": 'Q12805',`
			`"official_api_documentation": 'https://duckduckgo.com/api',`
			`"use_official_api": False,`
			`"require_api_key": False,`
			`"results": 'HTML',`
			`}`

rewrite duckduckgo engine and add comments 2014-09-02 17:14:57 +02:00			`# engine dependent config`
			`categories = ['general']`
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`paging = True`
[mod] engine duckduckgo - update supported_languages_url Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:49:12 +02:00			`supported_languages_url = 'https://duckduckgo.com/util/u588.js'`
add time range search for duckduckgo 2016-07-18 16:15:37 +02:00			`time_range_support = True`
[enh] initial commit 2013-10-14 23:09:13 +02:00
refactor engine's search language handling Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine. 2018-03-01 05:30:48 +01:00			`language_aliases = {`
			`'ar-SA': 'ar-XA',`
			`'es-419': 'es-XL',`
			`'ja': 'jp-JP',`
			`'ko': 'kr-KR',`
			`'sl-SI': 'sl-SL',`
			`'zh-TW': 'tzh-TW',`
			`'zh-HK': 'tzh-HK'`
			`}`

[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`time_range_dict = {`
			`'day': 'd',`
			`'week': 'w',`
			`'month': 'm',`
			`'year': 'y'`
			`}`
[fix] pep8 2014-03-29 16:38:45 +01:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`# search-url`
			`url = 'https://lite.duckduckgo.com/lite'`
			`url_ping = 'https://duckduckgo.com/t/sl_l'`
rewrite duckduckgo engine and add comments 2014-09-02 17:14:57 +02:00
add duckduckgo images engine 2017-05-21 05:33:08 +02:00			`# match query's language to a region code that duckduckgo will accept`
[mod] pylint: numerous minor code fixes 2020-11-16 09:43:23 +01:00			`def get_region_code(lang, lang_list=None):`
Revert "remove 'all' option from search languages" This reverts commit 4d1770398a6af8902e75c0bd885781584d39e796. 2019-01-06 15:27:46 +01:00			`if lang == 'all':`
			`return None`

[mod] pylint: numerous minor code fixes 2020-11-16 09:43:23 +01:00			`lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')`
refactor engine's search language handling Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine. 2018-03-01 05:30:48 +01:00			`lang_parts = lang_code.split('-')`

			`# country code goes first`
			`return lang_parts[1].lower() + '-' + lang_parts[0].lower()`
add duckduckgo images engine 2017-05-21 05:33:08 +02:00

			`def request(query, params):`

[fix] fix duckduckgo engine - remove paging support: a "vqd" parameter is required between each request. This parameter is uniq for each request - update the URL (no redirect), use the POST method - language support: works if there is no more than request per minute, otherwise it is ignored ! 2020-10-09 15:01:40 +02:00			`params['url'] = url`
			`params['method'] = 'POST'`
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00
[fix] fix duckduckgo engine - remove paging support: a "vqd" parameter is required between each request. This parameter is uniq for each request - update the URL (no redirect), use the POST method - language support: works if there is no more than request per minute, otherwise it is ignored ! 2020-10-09 15:01:40 +02:00			`params['data']['q'] = query`
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00
			`# The API is not documented, so we do some reverse engineering and emulate`
			`# what https://lite.duckduckgo.com/lite/ does when you press "next Page"`
			`# link again and again ..`

			`params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'`

			`# initial page does not have an offset`
			`if params['pageno'] == 2:`
			`# second page does have an offset of 30`
			`offset = (params['pageno'] - 1) * 30`
			`params['data']['s'] = offset`
			`params['data']['dc'] = offset + 1`

			`elif params['pageno'] > 2:`
			`# third and following pages do have an offset of 30 + n*50`
			`offset = 30 + (params['pageno'] - 2) * 50`
			`params['data']['s'] = offset`
			`params['data']['dc'] = offset + 1`

			`# initial page does not have additional data in the input form`
			`if params['pageno'] > 1:`
			`# request the second page (and more pages) needs 'o' and 'api' arguments`
			`params['data']['o'] = 'json'`
			`params['data']['api'] = 'd.js'`

			`# initial page does not have additional data in the input form`
			`if params['pageno'] > 2:`
			`# request the third page (and more pages) some more arguments`
			`params['data']['nextParams'] = ''`
			`params['data']['v'] = ''`
			`params['data']['vqd'] = ''`
[fix] multilingual duckduckgo only works if both country and language are set 2016-06-03 07:14:23 +02:00
refactor engine's search language handling Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine. 2018-03-01 05:30:48 +01:00			`region_code = get_region_code(params['language'], supported_languages)`
[fix] fix duckduckgo engine - remove paging support: a "vqd" parameter is required between each request. This parameter is uniq for each request - update the URL (no redirect), use the POST method - language support: works if there is no more than request per minute, otherwise it is ignored ! 2020-10-09 15:01:40 +02:00			`if region_code:`
			`params['data']['kl'] = region_code`
			`params['cookies']['kl'] = region_code`
add time range search for duckduckgo 2016-07-18 16:15:37 +02:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`params['data']['df'] = ''`
[fix] duckduckgo engine: "!ddg !g" do not redirect to google * searx understand "!ddg !g time" as : send "!g time" to DDG * !g a DDG bang for Google: DDG return a HTTP redirect to Google This commit adds a the allows_redirect param not to follow HTTP redirect. The DDG engine returns a empty result as before without HTTP redirect. 2021-02-09 12:07:19 +01:00			`if params['time_range'] in time_range_dict:`
			`params['data']['df'] = time_range_dict[params['time_range']]`
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`params['cookies']['df'] = time_range_dict[params['time_range']]`
[fix] duckduckgo engine: "!ddg !g" do not redirect to google * searx understand "!ddg !g time" as : send "!g time" to DDG * !g a DDG bang for Google: DDG return a HTTP redirect to Google This commit adds a the allows_redirect param not to follow HTTP redirect. The DDG engine returns a empty result as before without HTTP redirect. 2021-02-09 12:07:19 +01:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`logger.debug("param data: %s", params['data'])`
			`logger.debug("param cookies: %s", params['cookies'])`
[enh] initial commit 2013-10-14 23:09:13 +02:00			`return params`

rewrite duckduckgo engine and add comments 2014-09-02 17:14:57 +02:00			`# get response from search-request`
[enh] initial commit 2013-10-14 23:09:13 +02:00			`def response(resp):`
[fix] duckduckgo engine: "!ddg !g" do not redirect to google * searx understand "!ddg !g time" as : send "!g time" to DDG * !g a DDG bang for Google: DDG return a HTTP redirect to Google This commit adds a the allows_redirect param not to follow HTTP redirect. The DDG engine returns a empty result as before without HTTP redirect. 2021-02-09 12:07:19 +01:00
[mod] duckduckgo engine: better support of the language preference After the main request, send a second to https://duckduckgo.com/t/sl_h See https://github.com/searx/searx/issues/2259 2021-02-09 14:36:43 +01:00			`headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])`
			`get(url_ping, headers=headers_ping)`
[mod] ddg engine mods 2014-03-21 16:33:17 +01:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`if resp.status_code == 303:`
			`return []`

[mod] duckduckgo engine: better support of the language preference After the main request, send a second to https://duckduckgo.com/t/sl_h See https://github.com/searx/searx/issues/2259 2021-02-09 14:36:43 +01:00			`results = []`
[mod] ddg engine mods 2014-03-21 16:33:17 +01:00			`doc = fromstring(resp.text)`
rewrite duckduckgo engine and add comments 2014-09-02 17:14:57 +02:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')`
			`if not len(result_table) >= 3:`
			`# no more results`
			`return []`
			`result_table = result_table[2]`

			`tr_rows = eval_xpath(result_table, './/tr')`

			`# In the last <tr> is the form of the 'previous/next page' links`
			`tr_rows = tr_rows[:-1]`

			`len_tr_rows = len(tr_rows)`
			`offset = 0`

			`while len_tr_rows >= offset + 4:`

			`# assemble table rows we need to scrap`
			`tr_title = tr_rows[offset]`
			`tr_content = tr_rows[offset + 1]`
			`offset += 4`

			`# ignore sponsored Adds <tr class="result-sponsored">`
			`if tr_content.get('class') == 'result-sponsored':`
[enh] engine types 2013-10-15 19:11:43 +02:00			`continue`
rewrite duckduckgo engine and add comments 2014-09-02 17:14:57 +02:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)`
			`if a_tag is None:`
			`continue`
rewrite duckduckgo engine and add comments 2014-09-02 17:14:57 +02:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)`
			`if td_content is None:`
			`continue`
[mod] ddg engine mods 2014-03-21 16:33:17 +01:00
[mod] engine duckduckgo - use DuckDuckGo-Lite Implement a scrapper for DuckDuckGo-Lite [1]. The existing DuckDuckGo [2] engine does not support paging. DuckDuckgo-Lite is much faster, less verbose and does have a paging option (reversed engineered from the input form of [1]). [1] https://lite.duckduckgo.com/lite [2] https://duckduckgo.com/ Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-09-30 16:40:00 +02:00			`results.append({`
			`'title': a_tag.text_content(),`
			`'content': extract_text(td_content),`
			`'url': a_tag.get('href'),`
			`})`
add correction support for duckduckgo 2020-06-13 23:42:16 +02:00
[enh] engine types 2013-10-15 19:11:43 +02:00			`return results`
[mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. 2016-11-06 03:51:38 +01:00
			`# get supported languages from their site`
tests for _fetch_supported_languages in engines and refactor method to make it testable without making requests 2016-12-15 07:34:43 +01:00			`def _fetch_supported_languages(resp):`
[mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. 2016-11-06 03:51:38 +01:00
			`# response is a js file with regions as an embedded object`
tests for _fetch_supported_languages in engines and refactor method to make it testable without making requests 2016-12-15 07:34:43 +01:00			`response_page = resp.text`
[mod] fetch supported languages for several engines utils/fetch_languages.py gets languages supported by each engine and generates engines_languages.json with each engine's supported language. 2016-11-06 03:51:38 +01:00			`response_page = response_page[response_page.find('regions:{') + 8:]`
			`response_page = response_page[:response_page.find('}') + 1]`

			`regions_json = loads(response_page)`
			`supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())`

update engines_languages.json and languages.py Also, fix fetch_languages.py so it can run on python3. 2017-10-10 23:52:41 +02:00			`return list(supported_languages)`