From 57996b12fcbe193e89537c63abd1a75c3c65a25a Mon Sep 17 00:00:00 2001 From: dalf Date: Fri, 20 Feb 2015 12:34:13 +0100 Subject: [PATCH] [fix] update yahoo engine according to the web site changes --- searx/engines/yahoo.py | 11 +-- searx/tests/engines/test_yahoo.py | 121 +++++++++++++++--------------- 2 files changed, 65 insertions(+), 67 deletions(-) diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 161f7513b..11663a415 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' # specific xpath variables -results_xpath = '//div[@class="res"]' +results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" url_xpath = './/h3/a/@href' title_xpath = './/h3/a' -content_xpath = './/div[@class="abstr"]' -suggestion_xpath = '//div[@id="satat"]//a' +content_xpath = './/div[@class="compText aAbs"]' +suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" # remove yahoo-specific tracking-url @@ -91,11 +91,12 @@ def response(resp): 'content': content}) # if no suggestion found, return results - if not dom.xpath(suggestion_xpath): + suggestions = dom.xpath(suggestion_xpath) + if not suggestions: return results # parse suggestion - for suggestion in dom.xpath(suggestion_xpath): + for suggestion in suggestions: # append suggestion results.append({'suggestion': extract_text(suggestion)}) diff --git a/searx/tests/engines/test_yahoo.py b/searx/tests/engines/test_yahoo.py index e5c78701d..cdd6fda29 100644 --- a/searx/tests/engines/test_yahoo.py +++ b/searx/tests/engines/test_yahoo.py @@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase): self.assertEqual(yahoo.response(response), []) html = """ -
-
-

- - This is the title - +
    +
  1. +
    + - www.test.com -
    - This is the content +
    +

    This is the content +

    -
    -

    Also Try

    - - - - - - -
    - - - This is the suggestion - - -
    +
  2. +
  3. +
    + +
    +

    This is the second content

    +
    +
  4. +
+
+
+

Also Try

+ + + + + +
This is the suggestion +
+
""" response = mock.Mock(text=html) results = yahoo.response(response) + print results self.assertEqual(type(results), list) - self.assertEqual(len(results), 2) + self.assertEqual(len(results), 3) self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['url'], 'https://this.is.the.url/') self.assertEqual(results[0]['content'], 'This is the content') - self.assertEqual(results[1]['suggestion'], 'This is the suggestion') + self.assertEqual(results[1]['title'], 'This is the second title') + self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/') + self.assertEqual(results[1]['content'], 'This is the second content') + self.assertEqual(results[2]['suggestion'], 'This is the suggestion') html = """ -
-
-

- - This is the title - +
    +
  1. +
    + - www.test.com -
    - This is the content -
    -
    -
    - - www.test.com -
    - This is the content -
    -
    -
    -
    -

    -

    -
    - www.test.com -
    - This is the content +
    +

    This is the content +

    +
  2. +
""" response = mock.Mock(text=html) results = yahoo.response(response)