mirror of
https://github.com/searxng/searxng.git
synced 2026-04-15 13:31:17 +00:00
[feat] engines: add support for aol.com (#5882)
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
8
docs/dev/engines/online/aol.rst
Normal file
8
docs/dev/engines/online/aol.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
.. _aol engine:
|
||||
|
||||
===
|
||||
AOL
|
||||
===
|
||||
|
||||
.. automodule:: searx.engines.aol
|
||||
:members:
|
||||
208
searx/engines/aol.py
Normal file
208
searx/engines/aol.py
Normal file
@@ -0,0 +1,208 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""AOL supports WEB, image, and video search. Internally, it uses the Bing
|
||||
index.
|
||||
|
||||
AOL doesn't seem to support setting the language via request parameters, instead
|
||||
the results are based on the URL. For example, there is
|
||||
|
||||
- `search.aol.com <https://search.aol.com>`_ for English results
|
||||
- `suche.aol.de <https://suche.aol.de>`_ for German results
|
||||
|
||||
However, AOL offers its services only in a few regions:
|
||||
|
||||
- en-US: search.aol.com
|
||||
- de-DE: suche.aol.de
|
||||
- fr-FR: recherche.aol.fr
|
||||
- en-GB: search.aol.co.uk
|
||||
- en-CA: search.aol.ca
|
||||
|
||||
In order to still offer sufficient support for language and region, the `search
|
||||
keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the
|
||||
search term (AOL is basically just a proxy for Bing).
|
||||
|
||||
.. _search keywords:
|
||||
https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a
|
||||
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, unquote_plus
|
||||
import typing as t
|
||||
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import eval_xpath_list, eval_xpath, extract_text
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://www.aol.com",
|
||||
"wikidata_id": "Q2407",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
categories = ["general"]
|
||||
search_type = "search" # supported: search, image, video
|
||||
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
results_per_page = 10
|
||||
|
||||
|
||||
base_url = "https://search.aol.com"
|
||||
time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"}
|
||||
safesearch_map = {0: "p", 1: "r", 2: "i"}
|
||||
|
||||
|
||||
def init(_):
|
||||
if search_type not in ("search", "image", "video"):
|
||||
raise ValueError(f"unsupported search type {search_type}")
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
|
||||
language, region = (params["searxng_locale"].split("-") + [None])[:2]
|
||||
if language and language != "all":
|
||||
query = f"{query} language:{language}"
|
||||
if region:
|
||||
query = f"{query} loc:{region}"
|
||||
|
||||
args: dict[str, str | int | None] = {
|
||||
"q": query,
|
||||
"b": params["pageno"] * results_per_page + 1, # page is 1-indexed
|
||||
"pz": results_per_page,
|
||||
}
|
||||
|
||||
if params["time_range"]:
|
||||
args["fr2"] = "time"
|
||||
args["age"] = params["time_range"]
|
||||
else:
|
||||
args["fr2"] = "sb-top-search"
|
||||
|
||||
params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}"
|
||||
params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}"
|
||||
logger.debug(params)
|
||||
|
||||
|
||||
def _deobfuscate_url(obfuscated_url: str) -> str | None:
|
||||
# URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-" # pylint: disable=line-too-long
|
||||
if not obfuscated_url:
|
||||
return None
|
||||
|
||||
for part in obfuscated_url.split("/"):
|
||||
if part.startswith("RU="):
|
||||
return unquote_plus(part[3:])
|
||||
# pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link
|
||||
return obfuscated_url
|
||||
|
||||
|
||||
def _general_results(doc: html.HtmlElement) -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"):
|
||||
obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href"))
|
||||
if not obfuscated_url:
|
||||
continue
|
||||
|
||||
url = _deobfuscate_url(obfuscated_url)
|
||||
if not url:
|
||||
continue
|
||||
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=url,
|
||||
title=extract_text(eval_xpath(result, ".//h3/a")) or "",
|
||||
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "",
|
||||
thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "",
|
||||
)
|
||||
)
|
||||
return res
|
||||
|
||||
|
||||
def _video_results(doc: html.HtmlElement) -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"):
|
||||
obfuscated_url = extract_text(eval_xpath(result, ".//a/@href"))
|
||||
if not obfuscated_url:
|
||||
continue
|
||||
|
||||
url = _deobfuscate_url(obfuscated_url)
|
||||
if not url:
|
||||
continue
|
||||
|
||||
published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]"))
|
||||
try:
|
||||
published_date = parser.parse(published_date_raw or "")
|
||||
except parser.ParserError:
|
||||
published_date = None
|
||||
|
||||
res.add(
|
||||
res.types.LegacyResult(
|
||||
{
|
||||
"template": "videos.html",
|
||||
"url": url,
|
||||
"title": extract_text(eval_xpath(result, ".//h3")),
|
||||
"content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")),
|
||||
"thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")),
|
||||
"length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")),
|
||||
"publishedDate": published_date,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _image_results(doc: html.HtmlElement) -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"):
|
||||
obfuscated_url = extract_text(eval_xpath(result, "./a/@href"))
|
||||
if not obfuscated_url:
|
||||
continue
|
||||
|
||||
url = _deobfuscate_url(obfuscated_url)
|
||||
if not url:
|
||||
continue
|
||||
|
||||
res.add(
|
||||
res.types.LegacyResult(
|
||||
{
|
||||
"template": "images.html",
|
||||
# results don't have an extra URL, only the image source
|
||||
"url": url,
|
||||
"title": extract_text(eval_xpath(result, ".//a/@aria-label")),
|
||||
"thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")),
|
||||
"img_src": url,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
doc = html.fromstring(resp.text)
|
||||
|
||||
match search_type:
|
||||
case "search":
|
||||
results = _general_results(doc)
|
||||
case "image":
|
||||
results = _image_results(doc)
|
||||
case "video":
|
||||
results = _video_results(doc)
|
||||
case _:
|
||||
raise ValueError("unsupported search type")
|
||||
|
||||
for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"):
|
||||
results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)}))
|
||||
|
||||
return results
|
||||
@@ -421,6 +421,24 @@ engines:
|
||||
shortcut: conda
|
||||
disabled: true
|
||||
|
||||
- name: aol
|
||||
engine: aol
|
||||
search_type: search
|
||||
categories: [general]
|
||||
shortcut: aol
|
||||
|
||||
- name: aol images
|
||||
engine: aol
|
||||
search_type: image
|
||||
categories: [images]
|
||||
shortcut: aoli
|
||||
|
||||
- name: aol videos
|
||||
engine: aol
|
||||
search_type: video
|
||||
categories: [videos]
|
||||
shortcut: aolv
|
||||
|
||||
- name: arch linux wiki
|
||||
engine: archlinux
|
||||
shortcut: al
|
||||
|
||||
Reference in New Issue
Block a user