1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-17 18:00:12 +01:00

[fix] update xpaths for new google results page

This commit is contained in:
Marc Abonce Seguin 2019-06-26 00:45:20 -05:00
parent 1bb46e5e37
commit ccaf6ca02c
2 changed files with 44 additions and 94 deletions

View File

@ -107,13 +107,12 @@ images_path = '/images'
supported_languages_url = 'https://www.google.com/preferences?#languages' supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables # specific xpath variables
results_xpath = '//div[@class="g"]' results_xpath = '//div[contains(@class, "ZINbbc")]'
url_xpath = './/h3/a/@href' url_xpath = './/div[@class="kCrYT"][1]/a/@href'
title_xpath = './/h3' title_xpath = './/div[@class="kCrYT"][1]/a/div[1]'
content_xpath = './/span[@class="st"]' content_xpath = './/div[@class="kCrYT"][2]//div[contains(@class, "BNeawe")]//div[contains(@class, "BNeawe")]'
content_misc_xpath = './/div[@class="f slp"]' suggestion_xpath = '//div[contains(@class, "ZINbbc")][last()]//div[@class="rVLSBd"]/a//div[contains(@class, "BNeawe")]'
suggestion_xpath = '//p[@class="_Bmc"]' spelling_suggestion_xpath = '//div[@id="scc"]//a'
spelling_suggestion_xpath = '//a[@class="spell"]'
# map : detail location # map : detail location
map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()' map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()'
@ -199,10 +198,6 @@ def request(query, params):
params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept-Language'] = language + ',' + language + '-' + country
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
# Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse
params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)"
"AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1")
params['google_hostname'] = google_hostname params['google_hostname'] = google_hostname
return params return params
@ -274,9 +269,7 @@ def response(resp):
content = extract_text_from_dom(result, content_xpath) content = extract_text_from_dom(result, content_xpath)
if content is None: if content is None:
continue continue
content_misc = extract_text_from_dom(result, content_misc_xpath)
if content_misc is not None:
content = content_misc + "<br />" + content
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,

View File

@ -58,93 +58,50 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(google.response(response), []) self.assertEqual(google.response(response), [])
html = """ html = """
<div class="g"> <div class="ZINbbc xpd O9g5cc uUPGi">
<h3 class="r"> <div>
<a href="http://this.should.be.the.link/"> <div class="kCrYT">
<b>This</b> is <b>the</b> title <a href="/url?q=http://this.should.be.the.link/">
</a> <div class="BNeawe">
</h3> <b>This</b> is <b>the</b> title
<div class="s">
<div class="kv" style="margin-bottom:2px">
<cite>
<b>test</b>.psychologies.com/
</cite>
<div class="_nBb">
<div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
<span class="_O0">
</span>
</div> </div>
<div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"> <div class="BNeawe">
<ul> http://website
<li class="_Ykb"> </div>
<a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent </a>
.com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/"> </div>
En cache <div class="kCrYT">
</a> <div>
</li> <div class="BNeawe">
<li class="_Ykb"> <div>
<a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/"> <div class="BNeawe">
Pages similaires This should be the content.
</a> </div>
</li> </div>
</ul>
</div> </div>
</div> </div>
</div> </div>
<span class="st"> </div>
This should be the content. </p>
</span> <div class="ZINbbc xpd O9g5cc uUPGi">
<br> <div>
<div class="osl"> <div class="kCrYT">
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/"> <span>
Test Personnalité <div class="BNeawe">
</a> - Related searches
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/"> </div>
Tests - Moi </span>
</a> - </div>
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple"> <div class="rVLSBd">
Test Couple <a>
</a> <div>
- <div class="BNeawe">
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour"> suggestion title
Test Amour </div>
</div>
</a> </a>
</div> </div>
</div> </div>
</div>
<div class="g">
<h3 class="r">
<a href="http://www.google.com/images?q=toto">
<b>This</b>
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="http://www.google.com/search?q=toto">
<b>This</b> is
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="">
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="/url?q=url">
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<p class="_Bmc" style="margin:3px 8px">
<a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
suggestion <b>title</b>
</a>
</p> </p>
""" """
response = self.mock_response(html) response = self.mock_response(html)