2 Commits

Author SHA1 Message Date
Markus Heiser
0dc032435a [chore] make data.traits - add regions from karmasearch
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-03-25 19:45:46 +01:00
Bnyro
bd8106e40e [feat] engines: add karmasearch engine (brave mirror) 2026-03-25 19:45:46 +01:00
4 changed files with 411 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
.. _karmasearch engine:
===========
Karmasearch
===========
.. automodule:: searx.engines.karmasearch
:members:

View File

@@ -5740,6 +5740,186 @@
"zu-ZA": "ZA"
}
},
"karmasearch": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"karmasearch images": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"karmasearch news": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"karmasearch videos": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"mojeek": {
"all_locale": null,
"custom": {

View File

@@ -0,0 +1,198 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Karmasearch uses Brave's index, so the results should be the same as Brave's.
However, the advantages of this engine are:
- it has less strict rate-limits
- it has a JSON API, so it's less likely to break
"""
from datetime import datetime
from urllib.parse import urlencode
import typing as t
from dateutil import parser
from searx.enginelib.traits import EngineTraits
from searx.utils import html_to_text
from searx.result_types import EngineResults, MainResult
from searx.result_types._base import LegacyResult
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://karmasearch.org",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://api.karmasearch.org"
categories = ["web", "general"]
search_type = "web" # supported: web, images, videos, news
# all types except "images" support pagination
paging = True
safesearch = True
time_range_support = True
safe_search_map = {0: "off", 1: "moderate", 2: "strict"}
time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
def init(_):
if search_type not in ("web", "images", "videos", "news"):
raise ValueError(f"invalid search type: {search_type}")
def request(query: str, params: "OnlineParams") -> None:
engine_region: str = traits.get_region(params["searxng_locale"]) or "en-US"
args: dict[str, str | int] = {
"searchTerm": query,
"adultFilter": safe_search_map[params["safesearch"]],
"pageNumber": params["pageno"],
"country": engine_region.split("-")[-1],
"userLanguage": "en", # UI language: en, es or fr / no effect on search results
"market": engine_region,
}
if params["time_range"]:
args["freshness"] = time_range_map[params["time_range"]]
params["url"] = f"{base_url}/search/{search_type}?{urlencode(args)}"
def _parse_date(date_string: str) -> datetime | None:
try:
return parser.parse(date_string)
except parser.ParserError:
return None
def _parse_general(result: dict[str, str]):
return MainResult(
url=result["url"],
title=result["title"],
content=html_to_text(result["description"]),
thumbnail=result.get("thumbnail", ""),
)
def _parse_news(result: dict[str, str]) -> LegacyResult:
return LegacyResult(
{
"url": result["url"],
"title": result["title"],
"content": html_to_text(result["description"]),
"thumbnail": result.get("thumbnail"),
"publishedDate": _parse_date(result.get("age", "")),
}
)
def _parse_videos(result: dict[str, t.Any]) -> LegacyResult:
return LegacyResult(
{
"template": "videos.html",
"url": result["url"],
"title": result["title"],
"content": html_to_text(result["description"]),
"thumbnail": result.get("thumbnail"),
"publishedDate": _parse_date(result.get("age", "")),
"length": result.get("video", {}).get("duration"),
}
)
def _parse_images(result: dict[str, t.Any]) -> LegacyResult:
return LegacyResult(
{
"template": "images.html",
"url": result["url"],
"title": result["title"],
"content": "",
"img_src": result.get("properties", {}).get("url"),
"thumbnail_src": result.get("thumbnail", {}).get("src"),
}
)
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
for result in resp.json()["results"]:
# hide sponsored results
if result.get("sponsored", False):
continue
if "videos" in result:
for videos_result in result["videos"]:
res.add(_parse_videos(videos_result))
continue
if "news" in result:
for news_result in result["news"]:
res.add(_parse_news(news_result))
continue
if search_type == "news":
res.add(_parse_news(result))
elif search_type == "videos":
res.add(_parse_videos(result))
elif search_type == "images":
res.add(_parse_images(result))
else:
res.add(_parse_general(result))
return res
def fetch_traits(engine_traits: EngineTraits):
"""Fetch :ref:`languages <brave languages>` and :ref:`regions <brave
regions>` from Brave."""
# pylint: disable=import-outside-toplevel, too-many-branches
from lxml import html
import babel
from searx.locales import region_tag
from searx.network import get # see https://github.com/searxng/searxng/issues/762
# from searx.engines.xpath import extract_text
from searx.utils import gen_useragent
headers = {
"Accept-Encoding": "gzip, deflate",
"Cache-Control": "no-cache",
"DNT": "1",
"Connection": "keep-alive",
"Accept-Language": "en,en-US;q=0.7,en;q=0.3",
"User-Agent": gen_useragent(),
}
resp = get("https://karmasearch.org/settings", headers=headers, timeout=5)
if not resp.ok:
raise RuntimeError("Response from Brave languages is not OK.")
dom = html.fromstring(resp.text)
for option in dom.xpath("//select[@name='country']/option"):
country_tag: str = option.get("value", "")
try:
sxng_tag = region_tag(babel.Locale.parse(country_tag, sep="-"))
except babel.UnknownLocaleError:
# silently ignore unknown languages
continue
# print("%-20s: %s <-- %s" % (extract_text(option), country_tag, sxng_tag))
conflict = engine_traits.regions.get(sxng_tag)
if conflict:
if conflict != country_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, country_tag))
continue
engine_traits.regions[sxng_tag] = country_tag

View File

@@ -1206,6 +1206,31 @@ engines:
timeout: 3.0
disabled: true
- name: karmasearch
engine: karmasearch
categories: [general, web]
search_type: web
shortcut: ka
- name: karmasearch images
engine: karmasearch
categories: [images, web]
search_type: images
shortcut: kai
paging: false
- name: karmasearch videos
engine: karmasearch
categories: [general, web]
search_type: videos
shortcut: kav
- name: karmasearch news
engine: karmasearch
categories: [news, web]
search_type: news
shortcut: kan
- name: kickass
engine: kickass
base_url: