mirror of
https://github.com/searxng/searxng.git
synced 2026-04-17 22:41:16 +00:00
Compare commits
247 Commits
fb089ae297
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8299a4c37 | ||
|
|
4c16b247d4 | ||
|
|
871ed47f89 | ||
|
|
fc1e1e9c3e | ||
|
|
aad1ab5e56 | ||
|
|
8579974f5e | ||
|
|
ae0b0e56ac | ||
|
|
4f9c80160f | ||
|
|
ee66b070a9 | ||
|
|
9e08a6771f | ||
|
|
919aeae42a | ||
|
|
20fc6fe80d | ||
|
|
fb68010f72 | ||
|
|
e90c036ed6 | ||
|
|
e094ed3976 | ||
|
|
7737a0da1a | ||
|
|
67af4894d4 | ||
|
|
08ef7a63d7 | ||
|
|
346a467077 | ||
|
|
e12b722ddc | ||
|
|
24948350f6 | ||
|
|
474b0a55b0 | ||
|
|
69f04d59dc | ||
|
|
7adace694c | ||
|
|
e92f6b70a0 | ||
|
|
c980fa1efc | ||
|
|
53141be387 | ||
|
|
953933d472 | ||
|
|
bab3879cba | ||
|
|
7ac4ff39fe | ||
|
|
ea6f491c93 | ||
|
|
5ecf7515dc | ||
|
|
c8208fa8bb | ||
|
|
40e5ba9fdc | ||
|
|
cf5389afd4 | ||
|
|
ae51c349fd | ||
|
|
6b9856d643 | ||
|
|
e58516daf5 | ||
|
|
265858ee2b | ||
|
|
d7a83b8d8a | ||
|
|
e734424006 | ||
|
|
fe1d6d9c48 | ||
|
|
0dc032435a | ||
|
|
bd8106e40e | ||
|
|
541c6c3cb1 | ||
|
|
02ba387865 | ||
|
|
8c1570af1c | ||
|
|
c52d23f262 | ||
|
|
ae4b73039f | ||
|
|
220c42c8e9 | ||
|
|
99ec6f296e | ||
|
|
054174a19d | ||
|
|
5e8255f28a | ||
|
|
3dc4d5daa8 | ||
|
|
924fc52f54 | ||
|
|
8d44ff51e2 | ||
|
|
f8056b5e44 | ||
|
|
71cea1d87f | ||
|
|
c4f51aa4ac | ||
|
|
2c1ce3bd37 | ||
|
|
6c7e9c1976 | ||
|
|
8ad72872b5 | ||
|
|
2bf5f00e7d | ||
|
|
5ab3ef774b | ||
|
|
3810dc9d1c | ||
|
|
c589b56d69 | ||
|
|
6521190bb4 | ||
|
|
2bb8ac17c6 | ||
|
|
4c4ed4b198 | ||
|
|
3c1f68c59e | ||
|
|
23fb76f08f | ||
|
|
867a43ac1f | ||
|
|
3d3a78f3aa | ||
|
|
3a032d63a9 | ||
|
|
e30d490429 | ||
|
|
8b95b2058b | ||
|
|
d4954a0646 | ||
|
|
a563127a26 | ||
|
|
0716de6bc8 | ||
|
|
68ff08f224 | ||
|
|
c7ba2158f9 | ||
|
|
c1b211aeea | ||
|
|
08d2b92b2a | ||
|
|
7cc4eedc1a | ||
|
|
25e457c8d5 | ||
|
|
56e565a582 | ||
|
|
380f1c4a49 | ||
|
|
b5c1c28048 | ||
|
|
fc53162eec | ||
|
|
2b03a61832 | ||
|
|
a9f3baefe6 | ||
|
|
d1f9409afc | ||
|
|
f6e360a134 | ||
|
|
bc31c29d8a | ||
|
|
65ae6ad902 | ||
|
|
dd98f761ad | ||
|
|
162c04a1c4 | ||
|
|
e084139ca9 | ||
|
|
8fc549574f | ||
|
|
0eb4970621 | ||
|
|
5a72560319 | ||
|
|
5e7aae3f16 | ||
|
|
62b153f5c1 | ||
|
|
a2108ce2e5 | ||
|
|
31cc7ef7f0 | ||
|
|
2728331362 | ||
|
|
ebb300424c | ||
|
|
0c284b5b09 | ||
|
|
8e9ed5f9be | ||
|
|
d6c80e4dd1 | ||
|
|
2756341b21 | ||
|
|
c3e3d2d85d | ||
|
|
4964d664f0 | ||
|
|
845a78daf8 | ||
|
|
191818b865 | ||
|
|
5054e69844 | ||
|
|
029b74e4f5 | ||
|
|
2e6eeb1d79 | ||
|
|
490f28f0a6 | ||
|
|
cc39cf7df3 | ||
|
|
76f0712319 | ||
|
|
89a63114c4 | ||
|
|
9754e7b4e2 | ||
|
|
93ac6ceb35 | ||
|
|
4e6e8425ca | ||
|
|
062b1320a2 | ||
|
|
95f95e96c4 | ||
|
|
8f824d34a8 | ||
|
|
4027ff5699 | ||
|
|
17544140fb | ||
|
|
8e824017dc | ||
|
|
da9c0815ac | ||
|
|
39ac4d4387 | ||
|
|
51ded5e690 | ||
|
|
97e572728f | ||
|
|
a2db6f6500 | ||
|
|
61d5456852 | ||
|
|
0277ce9bca | ||
|
|
970f2b8430 | ||
|
|
b5bb27f231 | ||
|
|
578e59a54e | ||
|
|
9ab8c36161 | ||
|
|
bd69e9c7ab | ||
|
|
aeb3c0e274 | ||
|
|
9f5dedabba | ||
|
|
f7a608703d | ||
|
|
92d37152c2 | ||
|
|
ad42b553bf | ||
|
|
1c292dd9a4 | ||
|
|
2ad404a3bf | ||
|
|
ca67782626 | ||
|
|
0a4ea004cf | ||
|
|
15133c2f25 | ||
|
|
a7f15f4289 | ||
|
|
ac4b1275a3 | ||
|
|
bf2e7f8fc5 | ||
|
|
cdbf41bb9b | ||
|
|
966988e360 | ||
|
|
a9877aba3e | ||
|
|
da6ab39049 | ||
|
|
5271c3b9e1 | ||
|
|
eea1892863 | ||
|
|
c97d4d9b6c | ||
|
|
38de71b3e0 | ||
|
|
a77d5d1258 | ||
|
|
68944b4484 | ||
|
|
c60f59609e | ||
|
|
8473b030c5 | ||
|
|
9dd3baf45c | ||
|
|
410996df9e | ||
|
|
313fda426c | ||
|
|
277be51bc0 | ||
|
|
2d9f213ca8 | ||
|
|
97814c62a8 | ||
|
|
ee4943ebd7 | ||
|
|
086c1533a4 | ||
|
|
55c01c0757 | ||
|
|
279e098168 | ||
|
|
c4f1b216d1 | ||
|
|
b93a68bb48 | ||
|
|
3f80eb80d0 | ||
|
|
697f53ecde | ||
|
|
cf74e1d9e9 | ||
|
|
44405bd03c | ||
|
|
26e275222b | ||
|
|
ae48f50245 | ||
|
|
b83e88ea78 | ||
|
|
a684b91fe7 | ||
|
|
27d965234a | ||
|
|
c769c194d6 | ||
|
|
65186c3301 | ||
|
|
f6a428b60d | ||
|
|
3d88876a32 | ||
|
|
09bedef409 | ||
|
|
a5c946a321 | ||
|
|
29042d8e5a | ||
|
|
c57db45672 | ||
|
|
9491b514c9 | ||
|
|
320c317719 | ||
|
|
abae17e6fc | ||
|
|
3baf5c38fc | ||
|
|
ce46f30739 | ||
|
|
65a95539f1 | ||
|
|
874dc3f5ea | ||
|
|
7941719371 | ||
|
|
fa9729226b | ||
|
|
9df177af85 | ||
|
|
f45123356b | ||
|
|
8851f4d6b1 | ||
|
|
f954423101 | ||
|
|
95e63ac32d | ||
|
|
fc6e59d3ec | ||
|
|
da45859f32 | ||
|
|
8bf600cc62 | ||
|
|
aa607a379a | ||
|
|
6ebd3f4d35 | ||
|
|
9072c77aea | ||
|
|
c32b8100c3 | ||
|
|
f93257941e | ||
|
|
896863802e | ||
|
|
920b40253c | ||
|
|
07440e3332 | ||
|
|
1827dfc071 | ||
|
|
c46aecd4e3 | ||
|
|
21bf8a6973 | ||
|
|
f5475ba782 | ||
|
|
265f15498c | ||
|
|
666409ec7e | ||
|
|
b719d559b6 | ||
|
|
9d3ec9a2a2 | ||
|
|
74ec225ad1 | ||
|
|
b5a1a092f1 | ||
|
|
ddc6d68114 | ||
|
|
32eb84d6d3 | ||
|
|
da6c635ea2 | ||
|
|
e34c356e64 | ||
|
|
7017393647 | ||
|
|
aa49f5b933 | ||
|
|
3f91ac47e6 | ||
|
|
8c631b92ce | ||
|
|
0ebac144f5 | ||
|
|
5e0e1c6b31 | ||
|
|
3c7545c6ce | ||
|
|
aba839195b | ||
|
|
1f6ea41272 | ||
|
|
5450d22796 | ||
|
|
1174fde1f3 |
65
.github/ISSUE_TEMPLATE/bug-report.md
vendored
65
.github/ISSUE_TEMPLATE/bug-report.md
vendored
@@ -1,39 +1,50 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Report a bug in SearXNG
|
||||
title: ''
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
name: "Bug report"
|
||||
about: Report a bug in SearXNG"
|
||||
labels: ["bug"]
|
||||
type: "bug"
|
||||
---
|
||||
<!-- PLEASE FILL THESE FIELDS, IT REALLY HELPS THE MAINTAINERS OF SearXNG -->
|
||||
|
||||
**Version of SearXNG, commit number if you are using on master branch and stipulate if you forked SearXNG**
|
||||
<!-- If you are running on master branch using git execute this command
|
||||
in order to fetch the latest commit ID:
|
||||
```
|
||||
git log -1
|
||||
```
|
||||
If you are using searxng-docker then look at the bottom of the SearXNG page
|
||||
and check for the version after "Powered by SearXNG"
|
||||
_Replace this placeholder with a meaningful and precise description of the bug._
|
||||
|
||||
Please also stipulate if you are using a forked version of SearXNG and
|
||||
include a link to the fork source code.
|
||||
<!-- FILL IN THESE FIELDS .. and delete the comments after reading.
|
||||
|
||||
Use Markdown for formatting -> https://www.markdowntools.io/cheat-sheet
|
||||
-->
|
||||
**How did you install SearXNG?**
|
||||
<!-- Did you install SearXNG using the official wiki or using searxng-docker
|
||||
or manually by executing the searx/webapp.py file? -->
|
||||
**What happened?**
|
||||
<!-- A clear and concise description of what the bug is. -->
|
||||
|
||||
**How To Reproduce**
|
||||
<!-- How can we reproduce this issue? (as minimally and as precisely as possible) -->
|
||||
### How To Reproduce?
|
||||
|
||||
<!-- How can we reproduce this issue? (as minimally and as precisely as
|
||||
possible) -->
|
||||
|
||||
### Expected behavior
|
||||
|
||||
**Expected behavior**
|
||||
<!-- A clear and concise description of what you expected to happen. -->
|
||||
|
||||
**Screenshots & Logs**
|
||||
### Screenshots & Logs
|
||||
|
||||
<!-- If applicable, add screenshots, logs to help explain your problem. -->
|
||||
|
||||
**Additional context**
|
||||
### Version of SearXNG
|
||||
|
||||
<!-- Commit number if you are using on master branch and stipulate if you forked
|
||||
SearXNG -->
|
||||
|
||||
<!-- Look at the bottom of the SearXNG page and check for the version after
|
||||
"Powered by SearXNG" If you are using a forked version of SearXNG include a
|
||||
link to the fork source code. -->
|
||||
|
||||
### How did you install SearXNG?
|
||||
|
||||
<!-- Did you install SearXNG using the official documentation or using
|
||||
searxng-docker? -->
|
||||
|
||||
### Additional context
|
||||
|
||||
<!-- Add any other context about the problem here. -->
|
||||
|
||||
### Code of Conduct
|
||||
|
||||
[AI Policy]: https://github.com/searxng/searxng/blob/master/AI_POLICY.rst
|
||||
|
||||
- [ ] I read the [AI Policy] and hereby confirm that this issue conforms with the policy.
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/config.yml
vendored
2
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,4 +1,4 @@
|
||||
blank_issues_enabled: true
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Questions & Answers (Q&A)
|
||||
url: https://github.com/searxng/searxng/discussions/categories/q-a
|
||||
|
||||
61
.github/ISSUE_TEMPLATE/engine-request.md
vendored
61
.github/ISSUE_TEMPLATE/engine-request.md
vendored
@@ -1,31 +1,46 @@
|
||||
---
|
||||
name: Engine request
|
||||
about: Request a new engine in SearXNG
|
||||
title: ''
|
||||
labels: enhancement, engine request
|
||||
assignees: ''
|
||||
|
||||
name: Engine request"
|
||||
about: Request a new engine in SearXNG"
|
||||
labels: ["engine request"]
|
||||
type: "feature"
|
||||
---
|
||||
<!-- PLEASE FILL THESE FIELDS, IT REALLY HELPS THE MAINTAINERS OF SearXNG -->
|
||||
|
||||
**Working URL to the engine**
|
||||
<!-- Please check if the engine is responding correctly before submitting it. -->
|
||||
<!-- FILL IN THESE FIELDS .. and delete the comments after reading.
|
||||
|
||||
**Why do you want to add this engine?**
|
||||
<!-- What's special about this engine? Is it open source or libre? -->
|
||||
Use Markdown for formatting -> https://www.markdowntools.io/cheat-sheet
|
||||
-->
|
||||
|
||||
**Features of this engine**
|
||||
<!-- Features of this engine: Doesn't track its users, fast, easy to integrate, ... -->
|
||||
### Working URL to the engine
|
||||
|
||||
**How can SearXNG fetch the information from this engine?**
|
||||
<!-- List API URL, example code (using the correct markdown) and more
|
||||
that could be useful for the developers in order to implement this engine.
|
||||
If you don't know what to write, let this part blank. -->
|
||||
<!-- Please check if the engine is responding correctly before submitting -->
|
||||
|
||||
**Applicable category of this engine**
|
||||
<!-- Where should this new engine fit in SearXNG? Current categories in SearXNG:
|
||||
general, files, images, it, map, music, news, science, social media and videos.
|
||||
You can add multiple categories at the same time. -->
|
||||
### Why do you want to add this engine?
|
||||
|
||||
**Additional context**
|
||||
<!-- Add any other context about this engine here. -->
|
||||
<!-- What's special about this engine? -->
|
||||
|
||||
### Features of this engine
|
||||
|
||||
<!-- Features of this engine: Serves special content, is fast, is easy to
|
||||
integrate, ... ? -->
|
||||
|
||||
### How can SearXNG fetch results from this engine?
|
||||
|
||||
<!-- List API URL, example code and more that could be useful for the developers
|
||||
in order to implement this engine. If you don't know what to write, let
|
||||
this part blank. -->
|
||||
|
||||
### Applicable category of this engine
|
||||
|
||||
<!-- Where should this new engine fit in SearXNG? Current categories in
|
||||
SearXNG: general, files, images, it, map, music, news, science, social
|
||||
media and videos. -->
|
||||
|
||||
### Additional context
|
||||
|
||||
<!-- Add any other context about the problem here. -->
|
||||
|
||||
### Code of Conduct
|
||||
|
||||
[AI Policy]: https://github.com/searxng/searxng/blob/master/AI_POLICY.rst
|
||||
|
||||
- [ ] I read the [AI Policy] and hereby confirm that this issue conforms with the policy.
|
||||
|
||||
37
.github/ISSUE_TEMPLATE/feature-request.md
vendored
37
.github/ISSUE_TEMPLATE/feature-request.md
vendored
@@ -1,21 +1,32 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Request a new feature in SearXNG
|
||||
title: ''
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
|
||||
name: "Feature request"
|
||||
about: "Request a new feature in SearXNG"
|
||||
labels: ["new feature"]
|
||||
type: "feature"
|
||||
---
|
||||
<!-- PLEASE FILL THESE FIELDS, IT REALLY HELPS THE MAINTAINERS OF SearXNG -->
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
|
||||
_Replace this placeholder with a concise description of the feature._
|
||||
|
||||
<!-- FILL IN THESE FIELDS .. and delete the comments after reading.
|
||||
|
||||
Use Markdown for formatting -> https://www.markdowntools.io/cheat-sheet
|
||||
-->
|
||||
|
||||
### Is your feature request related to a problem?
|
||||
|
||||
<!-- A clear and concise description of what the problem is. Ex. I'm always
|
||||
frustrated when [...] -->
|
||||
|
||||
### Describe the solution you'd like
|
||||
|
||||
**Describe the solution you'd like**
|
||||
<!-- A clear and concise description of what you want to happen. -->
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
### Describe alternatives you've considered
|
||||
|
||||
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
|
||||
|
||||
**Additional context**
|
||||
<!-- Add any other context or screenshots about the feature request here. -->
|
||||
### Code of Conduct
|
||||
|
||||
[AI Policy]: https://github.com/searxng/searxng/blob/master/AI_POLICY.rst
|
||||
|
||||
- [ ] I read the [AI Policy] and hereby confirm that this issue conforms with the policy.
|
||||
|
||||
47
.github/workflows/checker.yml
vendored
47
.github/workflows/checker.yml
vendored
@@ -1,47 +0,0 @@
|
||||
---
|
||||
name: Checker
|
||||
|
||||
# yamllint disable-line rule:truthy
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 4 * * 5"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: "3.14"
|
||||
|
||||
jobs:
|
||||
search:
|
||||
if: github.repository_owner == 'searxng' || github.event_name == 'workflow_dispatch'
|
||||
name: Search
|
||||
runs-on: ubuntu-24.04-arm
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
with:
|
||||
key: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-"
|
||||
path: "./local"
|
||||
|
||||
- name: Setup venv
|
||||
run: make V=1 install
|
||||
|
||||
- name: Search checker
|
||||
run: make search.checker
|
||||
38
.github/workflows/container.yml
vendored
38
.github/workflows/container.yml
vendored
@@ -56,13 +56,8 @@ jobs:
|
||||
# yamllint disable rule:line-length
|
||||
- name: Setup podman
|
||||
env:
|
||||
PODMAN_VERSION: "v5.6.2"
|
||||
PODMAN_VERSION: "v5.7.1"
|
||||
run: |
|
||||
# dpkg man-db trigger is very slow on GHA runners
|
||||
# https://github.com/actions/runner-images/issues/10977
|
||||
# https://github.com/actions/runner/issues/4030
|
||||
sudo rm -f /var/lib/man-db/auto-update
|
||||
|
||||
sudo apt-get purge -y podman runc crun conmon
|
||||
|
||||
curl -fsSLO "https://github.com/mgoltzsche/podman-static/releases/download/${{ env.PODMAN_VERSION }}/podman-linux-${{ matrix.march }}.tar.gz"
|
||||
@@ -78,21 +73,22 @@ jobs:
|
||||
# yamllint enable rule:line-length
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
fetch-depth: "0"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-"
|
||||
restore-keys: |
|
||||
python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-
|
||||
path: "./local/"
|
||||
|
||||
- name: Get date
|
||||
@@ -100,20 +96,20 @@ jobs:
|
||||
run: echo "date=$(date +'%Y%m%d')" >>$GITHUB_OUTPUT
|
||||
|
||||
- name: Setup cache container
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "container-${{ matrix.arch }}-${{ steps.date.outputs.date }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: |
|
||||
"container-${{ matrix.arch }}-${{ steps.date.outputs.date }}-"
|
||||
"container-${{ matrix.arch }}-"
|
||||
container-${{ matrix.arch }}-${{ steps.date.outputs.date }}-
|
||||
container-${{ matrix.arch }}-
|
||||
path: "/var/tmp/buildah-cache-*/*"
|
||||
|
||||
- if: ${{ matrix.emulation }}
|
||||
name: Setup QEMU
|
||||
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
|
||||
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
|
||||
|
||||
- name: Login to GHCR
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: "ghcr.io"
|
||||
username: "${{ github.repository_owner }}"
|
||||
@@ -145,16 +141,16 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
|
||||
- if: ${{ matrix.emulation }}
|
||||
name: Setup QEMU
|
||||
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
|
||||
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
|
||||
|
||||
- name: Login to GHCR
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: "ghcr.io"
|
||||
username: "${{ github.repository_owner }}"
|
||||
@@ -179,19 +175,19 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Login to GHCR
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: "ghcr.io"
|
||||
username: "${{ github.repository_owner }}"
|
||||
password: "${{ secrets.GITHUB_TOKEN }}"
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: "docker.io"
|
||||
username: "${{ secrets.DOCKER_USER }}"
|
||||
|
||||
12
.github/workflows/data-update.yml
vendored
12
.github/workflows/data-update.yml
vendored
@@ -33,6 +33,7 @@ jobs:
|
||||
- update_engine_traits.py
|
||||
- update_wikidata_units.py
|
||||
- update_engine_descriptions.py
|
||||
- update_gsa_useragents.py
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -40,20 +41,21 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-"
|
||||
restore-keys: |
|
||||
python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-
|
||||
path: "./local/"
|
||||
|
||||
- name: Setup venv
|
||||
@@ -64,7 +66,7 @@ jobs:
|
||||
|
||||
- name: Create PR
|
||||
id: cpr
|
||||
uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9
|
||||
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
|
||||
with:
|
||||
author: "searxng-bot <searxng-bot@users.noreply.github.com>"
|
||||
committer: "searxng-bot <searxng-bot@users.noreply.github.com>"
|
||||
|
||||
11
.github/workflows/documentation.yml
vendored
11
.github/workflows/documentation.yml
vendored
@@ -32,21 +32,22 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
fetch-depth: "0"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-"
|
||||
restore-keys: |
|
||||
python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-
|
||||
path: "./local/"
|
||||
|
||||
- name: Setup venv
|
||||
@@ -57,7 +58,7 @@ jobs:
|
||||
|
||||
- if: github.ref_name == 'master'
|
||||
name: Release
|
||||
uses: JamesIves/github-pages-deploy-action@4a3abc783e1a24aeb44c16e869ad83caf6b4cc23 # v4.7.4
|
||||
uses: JamesIves/github-pages-deploy-action@d92aa235d04922e8f08b40ce78cc5442fcfbfa2f # v4.8.0
|
||||
with:
|
||||
folder: "dist/docs"
|
||||
branch: "gh-pages"
|
||||
|
||||
23
.github/workflows/integration.yml
vendored
23
.github/workflows/integration.yml
vendored
@@ -27,7 +27,6 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
- "3.12"
|
||||
- "3.13"
|
||||
@@ -35,20 +34,21 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "${{ matrix.python-version }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "python-${{ matrix.python-version }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ matrix.python-version }}-${{ runner.arch }}-"
|
||||
restore-keys: |
|
||||
python-${{ matrix.python-version }}-${{ runner.arch }}-
|
||||
path: "./local/"
|
||||
|
||||
- name: Setup venv
|
||||
@@ -62,31 +62,32 @@ jobs:
|
||||
runs-on: ubuntu-24.04-arm
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
|
||||
with:
|
||||
node-version-file: "./.nvmrc"
|
||||
|
||||
- name: Setup cache Node.js
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "nodejs-${{ runner.arch }}-${{ hashFiles('./.nvmrc', './package.json') }}"
|
||||
path: "./client/simple/node_modules/"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-"
|
||||
restore-keys: |
|
||||
python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-
|
||||
path: "./local/"
|
||||
|
||||
- name: Setup venv
|
||||
|
||||
22
.github/workflows/l10n.yml
vendored
22
.github/workflows/l10n.yml
vendored
@@ -35,21 +35,22 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
token: "${{ secrets.WEBLATE_GITHUB_TOKEN }}"
|
||||
fetch-depth: "0"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-"
|
||||
restore-keys: |
|
||||
python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-
|
||||
path: "./local/"
|
||||
|
||||
- name: Setup venv
|
||||
@@ -82,21 +83,22 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
token: "${{ secrets.WEBLATE_GITHUB_TOKEN }}"
|
||||
fetch-depth: "0"
|
||||
|
||||
- name: Setup cache Python
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
key: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-${{ hashFiles('./requirements*.txt') }}"
|
||||
restore-keys: "python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-"
|
||||
restore-keys: |
|
||||
python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-
|
||||
path: "./local/"
|
||||
|
||||
- name: Setup venv
|
||||
@@ -117,7 +119,7 @@ jobs:
|
||||
|
||||
- name: Create PR
|
||||
id: cpr
|
||||
uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9
|
||||
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
|
||||
with:
|
||||
author: "searxng-bot <searxng-bot@users.noreply.github.com>"
|
||||
committer: "searxng-bot <searxng-bot@users.noreply.github.com>"
|
||||
@@ -130,7 +132,7 @@ jobs:
|
||||
body: |
|
||||
[l10n] update translations from Weblate
|
||||
labels: |
|
||||
translation
|
||||
area:i18n
|
||||
|
||||
- name: Display information
|
||||
run: |
|
||||
|
||||
6
.github/workflows/security.yml
vendored
6
.github/workflows/security.yml
vendored
@@ -24,12 +24,12 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Sync GHCS from Docker Scout
|
||||
uses: docker/scout-action@f8c776824083494ab0d56b8105ba2ca85c86e4de # v1.18.2
|
||||
uses: docker/scout-action@bacf462e8d090c09660de30a6ccc718035f961e3 # v1.20.4
|
||||
with:
|
||||
organization: "searxng"
|
||||
dockerhub-user: "${{ secrets.DOCKER_USER }}"
|
||||
@@ -41,6 +41,6 @@ jobs:
|
||||
write-comment: "false"
|
||||
|
||||
- name: Upload SARIFs
|
||||
uses: github/codeql-action/upload-sarif@fdbfb4d2750291e159f0156def62b853c2798ca2 # v4.31.5
|
||||
uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||
with:
|
||||
sarif_file: "./scout.sarif"
|
||||
|
||||
20
AI_POLICY.rst
Normal file
20
AI_POLICY.rst
Normal file
@@ -0,0 +1,20 @@
|
||||
.. SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
AI Policy
|
||||
=========
|
||||
|
||||
Restrictions on Generative AI Usage
|
||||
-----------------------------------
|
||||
- **All AI usage in any form must be disclosed.** You must state the tool you used (e.g. Claude Code, Cursor, Amp) along with the extent that the work was AI-assisted.
|
||||
- **The human-in-the-loop must fully understand all code.** If you use generative AI tools as an aid in developing code or documentation changes, ensure that you fully understand the proposed changes and can explain why they are the correct approach.
|
||||
- **AI should never be the main author of the PR.** AI may be used as a tool to help with developing, but the human contribution to the code changes should always be reasonably larger than the part written by AI. For example, you should be the one that decides about the structure of the PR, not the LLM.
|
||||
- **Issues and PR descriptions must be fully human-written.** Do not post output from Large Language Models or similar generative AI as comments on any of our discussion forums (e.g. GitHub Issues, Matrix, ...), as such comments tend to be formulaic and low content. If you're a not a native English speaker, using AI for translating self-written issue texts to English is okay, but please keep the wording as close as possible to the original wording.
|
||||
- **Bad AI drivers will be denounced.** People who produce bad contributions that are clearly AI (slop) will be blocked for all future contributions.
|
||||
|
||||
There are Humans Here
|
||||
---------------------
|
||||
Every discussion, issue, and pull request is read and reviewed by humans. It is a boundary point at which people interact with each other and the work done. It is rude and disrespectful to approach this boundary with low-effort, unqualified work, since it puts the burden of validation on the maintainer.
|
||||
|
||||
It takes a lot of maintainer time and energy to review AI-generated contributions! Sending the output of an LLM to open source project maintainers extracts work from them in the form of design and code review, so we call this kind of contribution an "extractive contribution".
|
||||
|
||||
The *golden rule* is that a contribution should be worth more to the project than the time it takes to review it, which is usually not the case if large parts of your PR were written by LLMs.
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
.. _Quickstart guide: https://docs.searxng.org/dev/quickstart.html
|
||||
.. _Commits guide: https://docs.searxng.org/dev/commits.html
|
||||
.. _AI Policy: https://github.com/searxng/searxng/blob/master/AI_POLICY.rst
|
||||
.. _Weblate: https://translate.codeberg.org/projects/searxng/searxng/
|
||||
.. _GitHub Codespaces: https://docs.github.com/en/codespaces/overview
|
||||
.. _120 hours per month: https://github.com/settings/billing
|
||||
@@ -90,3 +91,8 @@ rules in this project are:
|
||||
- Prefer fewer arguments.
|
||||
- Do not add obvious comments to code.
|
||||
- Do not comment out code, delete lines instead.
|
||||
|
||||
AI Policy
|
||||
~~~~~~~~~
|
||||
|
||||
For our policy on the use of AI tools, please read `AI Policy`_.
|
||||
|
||||
9
Makefile
9
Makefile
@@ -17,7 +17,6 @@ help:
|
||||
@echo 'install - developer install of SearxNG into virtualenv'
|
||||
@echo 'uninstall - uninstall developer installation'
|
||||
@echo 'clean - clean up working tree'
|
||||
@echo 'search.checker - check search engines'
|
||||
@echo 'test - run shell & CI tests'
|
||||
@echo 'test.shell - test shell scripts'
|
||||
@echo 'ci.test - run CI tests'
|
||||
@@ -39,12 +38,6 @@ clean: py.clean docs.clean node.clean nvm.clean go.clean test.clean
|
||||
$(Q)find . -name '*~' -exec rm -f {} +
|
||||
$(Q)find . -name '*.bak' -exec rm -f {} +
|
||||
|
||||
PHONY += search.checker search.checker.%
|
||||
search.checker: install
|
||||
$(Q)./manage pyenv.cmd searxng-checker -v
|
||||
|
||||
search.checker.%: install
|
||||
$(Q)./manage pyenv.cmd searxng-checker -v "$(subst _, ,$(patsubst search.checker.%,%,$@))"
|
||||
|
||||
PHONY += test ci.test test.shell
|
||||
test: test.yamllint test.black test.pyright_modified test.pylint test.unit test.robot test.rst test.shell test.shfmt
|
||||
@@ -70,7 +63,7 @@ format: format.python format.shell
|
||||
# wrap ./manage script
|
||||
|
||||
MANAGE += weblate.translations.commit weblate.push.translations
|
||||
MANAGE += data.all data.traits data.useragents data.locales data.currencies
|
||||
MANAGE += data.all data.traits data.useragents data.gsa_useragents data.locales data.currencies
|
||||
MANAGE += docs.html docs.live docs.gh-pages docs.prebuild docs.clean
|
||||
MANAGE += podman.build
|
||||
MANAGE += docker.build docker.buildx
|
||||
|
||||
@@ -1,25 +1,34 @@
|
||||
## What does this PR do?
|
||||
<!-- FILL IN THESE FIELDS .. and delete the comments after reading.
|
||||
|
||||
<!-- MANDATORY -->
|
||||
Use Markdown for formatting -> https://www.markdowntools.io/cheat-sheet
|
||||
-->
|
||||
|
||||
<!-- explain the changes in your PR, algorithms, design, architecture -->
|
||||
### What does this PR do?
|
||||
|
||||
## Why is this change important?
|
||||
<!-- Explain the motivation and changes in your pull request. -->
|
||||
|
||||
<!-- MANDATORY -->
|
||||
### How to test this PR locally?
|
||||
|
||||
<!-- explain the motivation behind your PR -->
|
||||
<!-- Commands to run the tests or instructions to test the changes. Are there
|
||||
any edge cases (environment, language, or other contexts) to take into
|
||||
account? -->
|
||||
|
||||
## How to test this PR locally?
|
||||
|
||||
<!-- commands to run the tests or instructions to test the changes -->
|
||||
|
||||
## Author's checklist
|
||||
|
||||
<!-- additional notes for reviewers -->
|
||||
|
||||
## Related issues
|
||||
### Related issues
|
||||
|
||||
<!--
|
||||
Closes #234
|
||||
Closes: #234
|
||||
-->
|
||||
|
||||
### Code of Conduct
|
||||
|
||||
<!-- ⚠️ Bad AI drivers will be denounced: People who produce bad contributions
|
||||
that are clearly AI (slop) will be blocked for all future contributions.
|
||||
-->
|
||||
|
||||
[AI Policy]: https://github.com/searxng/searxng/blob/master/AI_POLICY.rst
|
||||
|
||||
- [ ] **I hereby confirm that this PR conforms with the [AI Policy].**
|
||||
|
||||
If I have used AI tools for working on the changes in this PR, I will
|
||||
attach a list of all AI tools I used and how I used them. I hereby confirm
|
||||
that I haven't used any other tools than the ones I mention below.
|
||||
|
||||
@@ -46,11 +46,9 @@ Further information on *how-to* can be found `here <https://docs.searxng.org/adm
|
||||
Connect
|
||||
=======
|
||||
|
||||
If you have questions or want to connect with others in the community,
|
||||
we have two official channels:
|
||||
If you have questions or want to connect with others in the community:
|
||||
|
||||
- `#searxng:matrix.org <https://matrix.to/#/#searxng:matrix.org>`_
|
||||
- `#searxng @ libera.chat <https://web.libera.chat/?channel=#searxng>`_ (bridged to Matrix)
|
||||
|
||||
Contributing
|
||||
============
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
"extends": ["stylelint-config-standard-less"],
|
||||
"rules": {
|
||||
"at-rule-no-vendor-prefix": null,
|
||||
"at-rule-prelude-no-invalid": null,
|
||||
"declaration-empty-line-before": null,
|
||||
"declaration-property-value-no-unknown": null,
|
||||
"no-invalid-position-at-import-rule": null,
|
||||
"prettier/prettier": true,
|
||||
"property-no-vendor-prefix": null,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"$schema": "https://biomejs.dev/schemas/2.3.8/schema.json",
|
||||
"$schema": "./node_modules/@biomejs/biome/configuration_schema.json",
|
||||
"files": {
|
||||
"ignoreUnknown": true,
|
||||
"includes": ["**", "!node_modules"]
|
||||
@@ -31,6 +31,8 @@
|
||||
"complexity": {
|
||||
"noForEach": "error",
|
||||
"noImplicitCoercions": "error",
|
||||
"noUselessCatchBinding": "error",
|
||||
"noUselessUndefined": "error",
|
||||
"useSimplifiedLogicExpression": "error"
|
||||
},
|
||||
"correctness": {
|
||||
@@ -41,20 +43,25 @@
|
||||
},
|
||||
"nursery": {
|
||||
"noContinue": "warn",
|
||||
"noDeprecatedImports": "warn",
|
||||
"noEqualsToNull": "warn",
|
||||
"noFloatingPromises": "warn",
|
||||
"noForIn": "warn",
|
||||
"noImportCycles": "warn",
|
||||
"noIncrementDecrement": "warn",
|
||||
"noMisusedPromises": "warn",
|
||||
"noMultiAssign": "warn",
|
||||
"noMultiStr": "warn",
|
||||
"noNestedPromises": "warn",
|
||||
"noParametersOnlyUsedInRecursion": "warn",
|
||||
"noUselessCatchBinding": "warn",
|
||||
"noUselessUndefined": "warn",
|
||||
"noRedundantDefaultExport": "warn",
|
||||
"noReturnAssign": "warn",
|
||||
"noUselessReturn": "off",
|
||||
"useAwaitThenable": "off",
|
||||
"useConsistentEnumValueType": "warn",
|
||||
"useDestructuring": "warn",
|
||||
"useExhaustiveSwitchCases": "warn",
|
||||
"useExplicitType": "warn",
|
||||
"useFind": "warn"
|
||||
"useExplicitType": "off",
|
||||
"useFind": "warn",
|
||||
"useRegexpExec": "warn"
|
||||
},
|
||||
"performance": {
|
||||
"noAwaitInLoops": "error",
|
||||
@@ -138,8 +145,10 @@
|
||||
"noAlert": "error",
|
||||
"noBitwiseOperators": "error",
|
||||
"noConstantBinaryExpressions": "error",
|
||||
"noDeprecatedImports": "error",
|
||||
"noEmptyBlockStatements": "error",
|
||||
"noEvolvingTypes": "error",
|
||||
"noImportCycles": "error",
|
||||
"noUnassignedVariables": "error",
|
||||
"noVar": "error",
|
||||
"useNumberToFixedDigitsArgument": "error",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
/*
|
||||
this file is generated automatically by searxng_extra/update/update_pygments.py
|
||||
using pygments version 2.19.2:
|
||||
using pygments version 2.20.0:
|
||||
|
||||
./manage templates.simple.pygments
|
||||
*/
|
||||
|
||||
1802
client/simple/package-lock.json
generated
1802
client/simple/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -23,28 +23,27 @@
|
||||
"not dead"
|
||||
],
|
||||
"dependencies": {
|
||||
"ionicons": "~8.0.13",
|
||||
"ionicons": "^8.0.13",
|
||||
"normalize.css": "8.0.1",
|
||||
"ol": "~10.7.0",
|
||||
"ol": "^10.9.0",
|
||||
"swiped-events": "1.2.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "2.3.8",
|
||||
"@types/node": "~24.10.1",
|
||||
"browserslist": "~4.28.0",
|
||||
"browserslist-to-esbuild": "~2.1.1",
|
||||
"edge.js": "~6.3.0",
|
||||
"less": "~4.4.2",
|
||||
"lightningcss": "~1.30.2",
|
||||
"mathjs": "~15.0.0",
|
||||
"@biomejs/biome": "2.4.12",
|
||||
"@types/node": "^25.6.0",
|
||||
"browserslist": "^4.28.2",
|
||||
"browserslist-to-esbuild": "^2.1.1",
|
||||
"edge.js": "^6.5.0",
|
||||
"less": "^4.6.4",
|
||||
"mathjs": "^15.2.0",
|
||||
"sharp": "~0.34.5",
|
||||
"sort-package-json": "~3.5.0",
|
||||
"stylelint": "~16.26.0",
|
||||
"stylelint-config-standard-less": "~3.0.1",
|
||||
"stylelint-prettier": "~5.0.3",
|
||||
"svgo": "~4.0.0",
|
||||
"typescript": "~5.9.3",
|
||||
"vite": "npm:rolldown-vite@7.2.7",
|
||||
"vite-bundle-analyzer": "~1.2.3"
|
||||
"sort-package-json": "^3.6.1",
|
||||
"stylelint": "^17.8.0",
|
||||
"stylelint-config-standard-less": "^4.1.0",
|
||||
"stylelint-prettier": "^5.0.3",
|
||||
"svgo": "^4.0.1",
|
||||
"typescript": "~6.0.3",
|
||||
"vite": "^8.0.8",
|
||||
"vite-bundle-analyzer": "^1.3.7"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ export abstract class Plugin {
|
||||
protected constructor(id: string) {
|
||||
this.id = id;
|
||||
|
||||
void this.invoke();
|
||||
queueMicrotask(() => this.invoke());
|
||||
}
|
||||
|
||||
private async invoke(): Promise<void> {
|
||||
|
||||
@@ -44,8 +44,6 @@ const fetchResults = async (qInput: HTMLInputElement, query: string): Promise<vo
|
||||
|
||||
const form = document.querySelector<HTMLFormElement>("#search");
|
||||
form?.submit();
|
||||
|
||||
autocomplete.classList.remove("open");
|
||||
});
|
||||
|
||||
fragment.append(li);
|
||||
@@ -80,6 +78,11 @@ listen("input", qInput, () => {
|
||||
const autocomplete: HTMLElement | null = document.querySelector<HTMLElement>(".autocomplete");
|
||||
const autocompleteList: HTMLUListElement | null = document.querySelector<HTMLUListElement>(".autocomplete ul");
|
||||
if (autocompleteList) {
|
||||
listen("keydown", qInput, (event: KeyboardEvent) => {
|
||||
if (event.key === "Escape") {
|
||||
autocomplete?.classList.remove("open");
|
||||
}
|
||||
});
|
||||
listen("keyup", qInput, (event: KeyboardEvent) => {
|
||||
const listItems = [...autocompleteList.children] as HTMLElement[];
|
||||
|
||||
@@ -105,7 +108,6 @@ if (autocompleteList) {
|
||||
newCurrentIndex = (currentIndex + 1) % listItems.length;
|
||||
break;
|
||||
}
|
||||
case "Tab":
|
||||
case "Enter":
|
||||
if (autocomplete) {
|
||||
autocomplete.classList.remove("open");
|
||||
@@ -129,4 +131,12 @@ if (autocompleteList) {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
listen("blur", qInput, () => {
|
||||
autocomplete?.classList.remove("open");
|
||||
});
|
||||
|
||||
listen("focus", qInput, () => {
|
||||
autocomplete?.classList.add("open");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -190,9 +190,7 @@ const highlightResult =
|
||||
|
||||
let next: HTMLElement | undefined;
|
||||
|
||||
if (typeof effectiveWhich !== "string") {
|
||||
next = effectiveWhich;
|
||||
} else {
|
||||
if (typeof effectiveWhich === "string") {
|
||||
switch (effectiveWhich) {
|
||||
case "visible": {
|
||||
const top = document.documentElement.scrollTop || document.body.scrollTop;
|
||||
@@ -220,8 +218,10 @@ const highlightResult =
|
||||
// biome-ignore lint/complexity/noUselessSwitchCase: fallthrough is intended
|
||||
case "top":
|
||||
default:
|
||||
next = results[0];
|
||||
[next] = results;
|
||||
}
|
||||
} else {
|
||||
next = effectiveWhich;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
@@ -343,7 +343,7 @@ const initHelpContent = (divElement: HTMLElement, keyBindings: typeof baseKeyBin
|
||||
const categories: Record<string, KeyBinding[]> = {};
|
||||
|
||||
for (const binding of Object.values(keyBindings)) {
|
||||
const cat = binding.cat;
|
||||
const { cat } = binding;
|
||||
categories[cat] ??= [];
|
||||
categories[cat].push(binding);
|
||||
}
|
||||
@@ -400,7 +400,7 @@ const toggleHelp = (keyBindings: typeof baseKeyBinding): void => {
|
||||
className: "dialog-modal"
|
||||
});
|
||||
initHelpContent(helpPanel, keyBindings);
|
||||
const body = document.getElementsByTagName("body")[0];
|
||||
const [body] = document.getElementsByTagName("body");
|
||||
if (body) {
|
||||
body.appendChild(helpPanel);
|
||||
}
|
||||
|
||||
@@ -70,8 +70,7 @@ listen("click", "#copy-hash", async function (this: HTMLElement) {
|
||||
}
|
||||
}
|
||||
|
||||
const copiedText = this.dataset.copiedText;
|
||||
if (copiedText) {
|
||||
this.innerText = copiedText;
|
||||
if (this.dataset.copiedText) {
|
||||
this.innerText = this.dataset.copiedText;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -135,9 +135,8 @@ listen("click", "#copy_url", async function (this: HTMLElement) {
|
||||
}
|
||||
}
|
||||
|
||||
const copiedText = this.dataset.copiedText;
|
||||
if (copiedText) {
|
||||
this.innerText = copiedText;
|
||||
if (this.dataset.copiedText) {
|
||||
this.innerText = this.dataset.copiedText;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -39,9 +39,11 @@ listen("input", searchInput, () => {
|
||||
searchReset.classList.toggle("empty", searchInput.value.length === 0);
|
||||
});
|
||||
|
||||
listen("click", searchReset, () => {
|
||||
searchReset.classList.add("empty");
|
||||
listen("click", searchReset, (event: MouseEvent) => {
|
||||
event.preventDefault();
|
||||
searchInput.value = "";
|
||||
searchInput.focus();
|
||||
searchReset.classList.add("empty");
|
||||
});
|
||||
|
||||
for (const button of categoryButtons) {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import "ol/ol.css?inline";
|
||||
import "ol/ol.css";
|
||||
import { Feature, Map as OlMap, View } from "ol";
|
||||
import { GeoJSON } from "ol/format";
|
||||
import { Point } from "ol/geom";
|
||||
@@ -23,10 +23,10 @@ export default class MapView extends Plugin {
|
||||
}
|
||||
|
||||
protected async run(): Promise<void> {
|
||||
const { leafletTarget: target, mapLon, mapLat, mapGeojson } = this.map.dataset;
|
||||
const { leafletTarget: target, mapLon = "0", mapLat = "0", mapGeojson } = this.map.dataset;
|
||||
|
||||
const lon = Number.parseFloat(mapLon || "0");
|
||||
const lat = Number.parseFloat(mapLat || "0");
|
||||
const lon = Number.parseFloat(mapLon);
|
||||
const lat = Number.parseFloat(mapLat);
|
||||
const view = new View({ maxZoom: 16, enableRotation: false });
|
||||
const map = new OlMap({
|
||||
target: target,
|
||||
@@ -77,7 +77,10 @@ export default class MapView extends Plugin {
|
||||
|
||||
map.addLayer(geoLayer);
|
||||
|
||||
view.fit(geoSource.getExtent(), { padding: [20, 20, 20, 20] });
|
||||
const geoSourceExtent = geoSource.getExtent();
|
||||
if (geoSourceExtent) {
|
||||
view.fit(geoSourceExtent, { padding: [20, 20, 20, 20] });
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Failed to create GeoJSON layer:", error);
|
||||
}
|
||||
|
||||
@@ -123,10 +123,10 @@ export const ready = (callback: () => void, options?: ReadyOptions): void => {
|
||||
}
|
||||
}
|
||||
|
||||
if (document.readyState !== "loading") {
|
||||
callback();
|
||||
} else {
|
||||
if (document.readyState === "loading") {
|
||||
listen("DOMContentLoaded", document, callback, { once: true });
|
||||
} else {
|
||||
callback();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
.stats_endpoint {
|
||||
.github-issue-button {
|
||||
display: block;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
.issue-hide {
|
||||
display: none;
|
||||
}
|
||||
|
||||
input[type="checked"] {
|
||||
position: absolute;
|
||||
}
|
||||
|
||||
label {
|
||||
margin: 1rem 1rem 1rem 0;
|
||||
}
|
||||
|
||||
.step_content {
|
||||
margin: 1rem 1rem 1rem 2rem;
|
||||
}
|
||||
|
||||
.step1,
|
||||
.step2 {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
.step1_delay {
|
||||
transition: visibility 0s linear 4s;
|
||||
}
|
||||
|
||||
#step1:checked ~ .step1,
|
||||
#step2:checked ~ .step2 {
|
||||
visibility: visible;
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@
|
||||
}
|
||||
|
||||
table {
|
||||
word-break: break-word;
|
||||
word-break: break-all;
|
||||
table-layout: fixed;
|
||||
width: 100%;
|
||||
background-color: var(--color-result-keyvalue-table);
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
font-size: 0.9rem;
|
||||
margin: 0.25rem 0 0 0;
|
||||
padding: 0;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
line-height: 1.24;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
@import "animations.less";
|
||||
@import "embedded.less";
|
||||
@import "info.less";
|
||||
@import "new_issue.less";
|
||||
@import "stats.less";
|
||||
@import "result_templates.less";
|
||||
@import "weather.less";
|
||||
@@ -188,7 +187,7 @@ article[data-vim-selected].category-social {
|
||||
|
||||
h3 {
|
||||
font-size: 1.2rem;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
margin: 0.4rem 0 0.4rem 0;
|
||||
padding: 0;
|
||||
|
||||
@@ -222,7 +221,7 @@ article[data-vim-selected].category-social {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
max-width: 54em;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
line-height: 1.24;
|
||||
|
||||
.highlight {
|
||||
@@ -366,7 +365,7 @@ article[data-vim-selected].category-social {
|
||||
font-size: 0.9rem;
|
||||
margin: 0.25rem 0 0 0;
|
||||
padding: 0;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
line-height: 1.24;
|
||||
font-style: italic;
|
||||
}
|
||||
@@ -571,13 +570,16 @@ article[data-vim-selected].category-social {
|
||||
|
||||
#suggestions {
|
||||
.wrapper {
|
||||
display: flex;
|
||||
flex-flow: column;
|
||||
justify-content: flex-end;
|
||||
padding-left: 0;
|
||||
margin: 0;
|
||||
list-style-position: inside;
|
||||
|
||||
li::marker {
|
||||
color: var(--color-result-link-font);
|
||||
}
|
||||
|
||||
form {
|
||||
display: inline-block;
|
||||
flex: 1 1 50%;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -585,8 +587,8 @@ article[data-vim-selected].category-social {
|
||||
#suggestions,
|
||||
#infoboxes {
|
||||
input {
|
||||
padding: 0;
|
||||
margin: 3px;
|
||||
padding: 3px;
|
||||
margin: 0;
|
||||
font-size: 0.9em;
|
||||
display: inline-block;
|
||||
background: transparent;
|
||||
@@ -688,7 +690,7 @@ summary.title {
|
||||
|
||||
#sidebar {
|
||||
grid-area: sidebar;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
color: var(--color-sidebar-font);
|
||||
|
||||
.infobox {
|
||||
@@ -1029,10 +1031,6 @@ summary.title {
|
||||
/ 100%;
|
||||
gap: 0;
|
||||
|
||||
#sidebar {
|
||||
display: none;
|
||||
}
|
||||
|
||||
#urls {
|
||||
margin: 0;
|
||||
display: flex;
|
||||
|
||||
@@ -610,7 +610,7 @@ td:hover .engine-tooltip,
|
||||
.stacked-bar-chart-base();
|
||||
|
||||
background: var(--color-base-font);
|
||||
border: 1px solid rgba(var(--color-base-font-rgb), 0.9);
|
||||
border: 1px solid rgb(var(--color-base-font-rgb), 0.9);
|
||||
padding: 0.3rem 0;
|
||||
}
|
||||
|
||||
@@ -618,7 +618,7 @@ td:hover .engine-tooltip,
|
||||
.stacked-bar-chart-base();
|
||||
|
||||
background: transparent;
|
||||
border: 1px solid rgba(var(--color-base-font-rgb), 0.3);
|
||||
border: 1px solid rgb(var(--color-base-font-rgb), 0.3);
|
||||
padding: 0.3rem 0;
|
||||
}
|
||||
|
||||
@@ -626,7 +626,7 @@ td:hover .engine-tooltip,
|
||||
.stacked-bar-chart-base();
|
||||
|
||||
background: transparent;
|
||||
border-bottom: 1px dotted rgba(var(--color-base-font-rgb), 0.5);
|
||||
border-bottom: 1px dotted rgb(var(--color-base-font-rgb), 0.5);
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
@@ -634,7 +634,7 @@ td:hover .engine-tooltip,
|
||||
.stacked-bar-chart-base();
|
||||
|
||||
background: transparent;
|
||||
border-left: 1px solid rgba(var(--color-base-font-rgb), 0.9);
|
||||
border-left: 1px solid rgb(var(--color-base-font-rgb), 0.9);
|
||||
padding: 0.4rem 0;
|
||||
width: 1px;
|
||||
}
|
||||
|
||||
@@ -18,8 +18,10 @@ export type Src2Dest = {
|
||||
* Convert a list of SVG files to PNG.
|
||||
*
|
||||
* @param items - Array of SVG files (src: SVG, dest:PNG) to convert.
|
||||
* @param width - (optional) width of the PNG pictures
|
||||
* @param height - (optional) height of the PNG pictures.
|
||||
*/
|
||||
export const svg2png = (items: Src2Dest[]): void => {
|
||||
export const svg2png = (items: Src2Dest[], width?: number, height?: number): void => {
|
||||
for (const item of items) {
|
||||
fs.mkdirSync(path.dirname(item.dest), { recursive: true });
|
||||
|
||||
@@ -29,6 +31,9 @@ export const svg2png = (items: Src2Dest[]): void => {
|
||||
compressionLevel: 9,
|
||||
palette: true
|
||||
})
|
||||
.resize(width, height, {
|
||||
fit: "contain"
|
||||
})
|
||||
.toFile(item.dest)
|
||||
.then((info) => {
|
||||
console.log(`[svg2png] created ${item.dest} -- bytes: ${info.size}, w:${info.width}px, h:${info.height}px`);
|
||||
|
||||
@@ -17,13 +17,15 @@ import { type Src2Dest, svg2png, svg2svg } from "./img.ts";
|
||||
* Vite plugin to convert a list of SVG files to PNG.
|
||||
*
|
||||
* @param items - Array of SVG files (src: SVG, dest:PNG) to convert.
|
||||
* @param width - (optional) width of the PNG picture
|
||||
* @param height - (optional) height of the PNG picture
|
||||
*/
|
||||
export const plg_svg2png = (items: Src2Dest[]): Plugin => {
|
||||
export const plg_svg2png = (items: Src2Dest[], width?: number, height?: number): Plugin => {
|
||||
return {
|
||||
name: "searxng-simple-svg2png",
|
||||
apply: "build",
|
||||
writeBundle: () => {
|
||||
svg2png(items);
|
||||
svg2png(items, width, height);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
"noUnusedParameters": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
|
||||
"baseUrl": ".",
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
|
||||
"types": ["vite/client"]
|
||||
},
|
||||
|
||||
@@ -34,7 +34,7 @@ const svg2svg_favicon_opts: Config = {
|
||||
};
|
||||
|
||||
export default {
|
||||
base: "/static/themes/simple/",
|
||||
base: "./",
|
||||
publicDir: "static/",
|
||||
|
||||
build: {
|
||||
@@ -74,6 +74,9 @@ export default {
|
||||
.normalize("NFD")
|
||||
.replace(/[^a-zA-Z0-9.-]/g, "_")
|
||||
.toLowerCase();
|
||||
},
|
||||
comments: {
|
||||
legal: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,6 +132,28 @@ export default {
|
||||
}
|
||||
]),
|
||||
|
||||
// SearXNG PWA Icons (static)
|
||||
plg_svg2png(
|
||||
[
|
||||
{
|
||||
src: `${PATH.brand}/searxng-wordmark.svg`,
|
||||
dest: `${PATH.dist}/img/512.png`
|
||||
}
|
||||
],
|
||||
512,
|
||||
512
|
||||
),
|
||||
plg_svg2png(
|
||||
[
|
||||
{
|
||||
src: `${PATH.brand}/searxng-wordmark.svg`,
|
||||
dest: `${PATH.dist}/img/192.png`
|
||||
}
|
||||
],
|
||||
192,
|
||||
192
|
||||
),
|
||||
|
||||
// -- svg
|
||||
plg_svg2svg(
|
||||
[
|
||||
|
||||
15
container/.env.example
Normal file
15
container/.env.example
Normal file
@@ -0,0 +1,15 @@
|
||||
# Read the documentation before using the `docker-compose.yml` file:
|
||||
# https://docs.searxng.org/admin/installation-docker.html
|
||||
#
|
||||
# Additional ENVs:
|
||||
# https://docs.searxng.org/admin/settings/settings_general.html#settings-general
|
||||
# https://docs.searxng.org/admin/settings/settings_server.html#settings-server
|
||||
|
||||
# Use a specific version tag. E.g. "latest" or "2026.3.25-541c6c3cb".
|
||||
#SEARXNG_VERSION=latest
|
||||
|
||||
# Listen to a specific address.
|
||||
#SEARXNG_HOST=[::]
|
||||
|
||||
# Listen to a specific port.
|
||||
#SEARXNG_PORT=8080
|
||||
@@ -15,17 +15,17 @@ ARG VCS_URL="unknown"
|
||||
ARG VCS_REVISION="unknown"
|
||||
|
||||
LABEL org.opencontainers.image.created="$CREATED" \
|
||||
org.opencontainers.image.description="SearXNG is a metasearch engine. Users are neither tracked nor profiled." \
|
||||
org.opencontainers.image.documentation="https://docs.searxng.org/admin/installation-docker" \
|
||||
org.opencontainers.image.licenses="AGPL-3.0-or-later" \
|
||||
org.opencontainers.image.revision="$VCS_REVISION" \
|
||||
org.opencontainers.image.source="$VCS_URL" \
|
||||
org.opencontainers.image.title="SearXNG" \
|
||||
org.opencontainers.image.url="https://searxng.org" \
|
||||
org.opencontainers.image.version="$VERSION"
|
||||
org.opencontainers.image.description="SearXNG is a metasearch engine. Users are neither tracked nor profiled." \
|
||||
org.opencontainers.image.documentation="https://docs.searxng.org/admin/installation-docker" \
|
||||
org.opencontainers.image.licenses="AGPL-3.0-or-later" \
|
||||
org.opencontainers.image.revision="$VCS_REVISION" \
|
||||
org.opencontainers.image.source="$VCS_URL" \
|
||||
org.opencontainers.image.title="SearXNG" \
|
||||
org.opencontainers.image.url="https://searxng.org" \
|
||||
org.opencontainers.image.version="$VERSION"
|
||||
|
||||
ENV SEARXNG_VERSION="$VERSION" \
|
||||
SEARXNG_SETTINGS_PATH="$CONFIG_PATH/settings.yml" \
|
||||
ENV __SEARXNG_VERSION="$VERSION" \
|
||||
__SEARXNG_SETTINGS_PATH="$__SEARXNG_CONFIG_PATH/settings.yml" \
|
||||
GRANIAN_PROCESS_NAME="searxng" \
|
||||
GRANIAN_INTERFACE="wsgi" \
|
||||
GRANIAN_HOST="::" \
|
||||
@@ -36,8 +36,8 @@ ENV SEARXNG_VERSION="$VERSION" \
|
||||
GRANIAN_BLOCKING_THREADS_IDLE_TIMEOUT="5m"
|
||||
|
||||
# "*_PATH" ENVs are defined in base images
|
||||
VOLUME $CONFIG_PATH
|
||||
VOLUME $DATA_PATH
|
||||
VOLUME $__SEARXNG_CONFIG_PATH
|
||||
VOLUME $__SEARXNG_DATA_PATH
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
|
||||
28
container/docker-compose.yml
Normal file
28
container/docker-compose.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
# Read the documentation before using the `docker-compose.yml` file:
|
||||
# https://docs.searxng.org/admin/installation-docker.html
|
||||
|
||||
name: searxng
|
||||
|
||||
services:
|
||||
core:
|
||||
container_name: searxng-core
|
||||
image: docker.io/searxng/searxng:${SEARXNG_VERSION:-latest}
|
||||
restart: always
|
||||
ports:
|
||||
- ${SEARXNG_HOST:+${SEARXNG_HOST}:}${SEARXNG_PORT:-8080}:${SEARXNG_PORT:-8080}
|
||||
env_file: ./.env
|
||||
volumes:
|
||||
- ./core-config/:/etc/searxng/:Z
|
||||
- core-data:/var/cache/searxng/
|
||||
|
||||
valkey:
|
||||
container_name: searxng-valkey
|
||||
image: docker.io/valkey/valkey:9-alpine
|
||||
command: valkey-server --save 30 1 --loglevel warning
|
||||
restart: always
|
||||
volumes:
|
||||
- valkey-data:/data/
|
||||
|
||||
volumes:
|
||||
core-data:
|
||||
valkey-data:
|
||||
@@ -117,16 +117,22 @@ EOF
|
||||
}
|
||||
|
||||
cat <<EOF
|
||||
SearXNG $SEARXNG_VERSION
|
||||
SearXNG $__SEARXNG_VERSION
|
||||
EOF
|
||||
|
||||
# Check for volume mounts
|
||||
volume_handler "$CONFIG_PATH"
|
||||
volume_handler "$DATA_PATH"
|
||||
volume_handler "$__SEARXNG_CONFIG_PATH"
|
||||
volume_handler "$__SEARXNG_DATA_PATH"
|
||||
|
||||
# Check for files
|
||||
config_handler "$SEARXNG_SETTINGS_PATH" "/usr/local/searxng/searx/settings.yml"
|
||||
config_handler "$__SEARXNG_SETTINGS_PATH" "/usr/local/searxng/searx/settings.yml"
|
||||
|
||||
update-ca-certificates
|
||||
# root only features
|
||||
if [ "$(id -u)" -eq 0 ]; then
|
||||
update-ca-certificates
|
||||
fi
|
||||
|
||||
# ENVs aliases
|
||||
export GRANIAN_PORT="${SEARXNG_PORT:-$GRANIAN_PORT}"
|
||||
|
||||
exec /usr/local/searxng/.venv/bin/granian searx.webapp:app
|
||||
|
||||
@@ -9,7 +9,6 @@ Installation container
|
||||
.. _Podman rootless containers: https://github.com/containers/podman/blob/main/docs/tutorials/rootless_tutorial.md
|
||||
.. _DockerHub mirror: https://hub.docker.com/r/searxng/searxng
|
||||
.. _GHCR mirror: https://ghcr.io/searxng/searxng
|
||||
.. _Docker compose: https://github.com/searxng/searxng-docker
|
||||
|
||||
.. sidebar:: info
|
||||
|
||||
@@ -27,7 +26,7 @@ Installation container
|
||||
`Docker 101`_ before proceeding.
|
||||
|
||||
Container images are the basis for deployments in containerized environments,
|
||||
`Docker compose`_, Kubernetes and more.
|
||||
Compose, Kubernetes and more.
|
||||
|
||||
.. _Container installation:
|
||||
|
||||
@@ -55,10 +54,10 @@ In the case of Docker, you need to add the user running the container to the
|
||||
In the case of Podman, no additional steps are generally required, but there
|
||||
are some considerations when running `Podman rootless containers`_.
|
||||
|
||||
.. _Container pulling images:
|
||||
.. _Container registries:
|
||||
|
||||
Pulling images
|
||||
--------------
|
||||
Registries
|
||||
----------
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -70,26 +69,115 @@ The official images are mirrored at:
|
||||
- `DockerHub mirror`_
|
||||
- `GHCR mirror`_ (GitHub Container Registry)
|
||||
|
||||
Pull the latest image:
|
||||
.. _Container compose instancing:
|
||||
|
||||
Compose instancing
|
||||
==================
|
||||
|
||||
This is the recommended way to deploy SearXNG in a containerized environment.
|
||||
Compose templates allow you to define container configurations in a
|
||||
declarative manner.
|
||||
|
||||
.. _Container compose instancing setup:
|
||||
|
||||
Setup
|
||||
-----
|
||||
|
||||
1. Create the environment:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker pull docker.io/searxng/searxng:latest
|
||||
# Create the environment and configuration directories
|
||||
$ mkdir -p ./searxng/core-config/
|
||||
$ cd ./searxng/
|
||||
|
||||
\.\. or if you want to lock in to a specific version:
|
||||
# Fetch the latest compose template
|
||||
$ curl -fsSL \
|
||||
-O https://raw.githubusercontent.com/searxng/searxng/master/container/docker-compose.yml \
|
||||
-O https://raw.githubusercontent.com/searxng/searxng/master/container/.env.example
|
||||
|
||||
2. Copy the ``.env.example`` file and edit the values as needed:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker pull docker.io/searxng/searxng:2025.8.1-3d96414
|
||||
$ cp -i .env.example .env
|
||||
|
||||
.. _Container instancing:
|
||||
# nano or your preferred text editor...
|
||||
$ nano .env
|
||||
|
||||
Instancing
|
||||
==========
|
||||
3. Start & stop the services:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker compose up -d
|
||||
$ docker compose down
|
||||
|
||||
4. Setup your settings in ``core-config/settings.yml`` according to your preferences.
|
||||
|
||||
.. _Container compose instancing maintenance:
|
||||
|
||||
Management
|
||||
----------
|
||||
|
||||
.. important::
|
||||
|
||||
Remember to review the new templates for any changes that may affect your
|
||||
deployment, and update the ``.env`` file accordingly.
|
||||
|
||||
To update the templates to their latest versions:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker compose down
|
||||
$ curl -fsSLO \
|
||||
https://raw.githubusercontent.com/searxng/searxng/master/container/docker-compose.yml \
|
||||
https://raw.githubusercontent.com/searxng/searxng/master/container/.env.example
|
||||
$ docker compose up -d
|
||||
|
||||
To update the services to their latest versions:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker compose down
|
||||
$ docker compose pull
|
||||
$ docker compose up -d
|
||||
|
||||
List running services:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker compose ps
|
||||
NAME IMAGE ... CREATED STATUS PORTS
|
||||
searxng-core ... ... 3 minutes ago Up 3 minutes 0.0.0.0:8080->8080/tcp
|
||||
searxng-valkey ... ... 3 minutes ago Up 3 minutes 6379/tcp
|
||||
|
||||
Print a service container logs:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker compose logs -f core
|
||||
|
||||
Access a service container shell (troubleshooting):
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker compose exec -it --user root core /bin/sh -l
|
||||
/usr/local/searxng #
|
||||
|
||||
Stop and remove the services:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker compose down
|
||||
|
||||
.. _Container manual instancing:
|
||||
|
||||
Manual instancing
|
||||
=================
|
||||
|
||||
This section is intended for advanced users who need custom deployments. We
|
||||
recommend using `Docker compose`_, which provides a preconfigured environment
|
||||
with sensible defaults.
|
||||
recommend using `Container compose instancing`_, which provides a preconfigured
|
||||
environment with sensible defaults.
|
||||
|
||||
Basic container instancing example:
|
||||
|
||||
@@ -121,12 +209,18 @@ List running containers:
|
||||
CONTAINER ID IMAGE ... CREATED PORTS NAMES
|
||||
1af574997e63 ... ... 3 minutes ago 0.0.0.0:8888->8080/tcp searxng
|
||||
|
||||
Print the container logs:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker container logs -f searxng
|
||||
|
||||
Access the container shell (troubleshooting):
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ docker container exec -it --user root searxng /bin/sh -l
|
||||
1af574997e63:/usr/local/searxng#
|
||||
/usr/local/searxng #
|
||||
|
||||
Stop and remove the container:
|
||||
|
||||
@@ -153,18 +247,12 @@ Environment variables
|
||||
The following environment variables can be configured:
|
||||
|
||||
- ``$SEARXNG_*``: Controls the SearXNG configuration options, look out for
|
||||
environment ``$SEARXNG_*`` in :ref:`settings server` and :ref:`settings
|
||||
general`.
|
||||
environment ``$SEARXNG_*`` in :ref:`settings server`, :ref:`settings
|
||||
general` and the :origin:`container/.env.example` template.
|
||||
- ``$GRANIAN_*``: Controls the :ref:`Granian server options <Granian configuration>`.
|
||||
- ``$FORCE_OWNERSHIP``: Ensures mounted volumes/files are owned by the
|
||||
``searxng:searxng`` user (default: ``true``)
|
||||
|
||||
Container internal paths (don't modify unless you know what you're doing):
|
||||
|
||||
- ``$CONFIG_PATH``: Path to the SearXNG configuration directory (default: ``/etc/searxng``)
|
||||
- ``$SEARXNG_SETTINGS_PATH``: Path to the SearXNG settings file (default: ``$CONFIG_PATH/settings.yml``)
|
||||
- ``$DATA_PATH``: Path to the SearXNG data directory (default: ``/var/cache/searxng``)
|
||||
|
||||
.. _Container custom certificates:
|
||||
|
||||
Custom certificates
|
||||
@@ -176,6 +264,8 @@ additional certificates as needed.
|
||||
They will be available on container (re)start or when running
|
||||
``update-ca-certificates`` in the container shell.
|
||||
|
||||
This requires the container to be running with ``root`` privileges.
|
||||
|
||||
.. _Container custom images:
|
||||
|
||||
Custom images
|
||||
@@ -199,3 +289,61 @@ container images are not officially supported):
|
||||
localhost/searxng/searxng builder ... About a minute ago 524 MB
|
||||
ghcr.io/searxng/base searxng-builder ... 2 days ago 378 MB
|
||||
ghcr.io/searxng/base searxng ... 2 days ago 42.2 MB
|
||||
|
||||
Migrate from ``searxng-docker``
|
||||
===============================
|
||||
|
||||
We expect the following source directory structure:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
.
|
||||
└── searxng-docker
|
||||
├── searxng
|
||||
│ ├── favicons.toml
|
||||
│ ├── limiter.toml
|
||||
│ ├── settings.yml
|
||||
│ └── ...
|
||||
├── .env
|
||||
├── Caddyfile
|
||||
├── docker-compose.yml
|
||||
└── ...
|
||||
|
||||
Create a brand new environment outside ``searxng-docker`` directory, following
|
||||
`Container compose instancing setup`_.
|
||||
|
||||
Once up and running, stop the services and move the configuration files from
|
||||
the old mount to the new one:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
$ mv ./searxng-docker/searxng/* ./searxng/core-config/
|
||||
|
||||
If using Valkey features like bot protection (limiter), you will need to update
|
||||
the URL hostname in :origin:`searx/settings.yml` file to ``valkey`` or
|
||||
``searxng-valkey``.
|
||||
|
||||
If you have any environment variables in the old ``.env`` file, make
|
||||
sure to add them to the new one.
|
||||
|
||||
Consider setting up a reverse proxy if exposing the instance to the public.
|
||||
|
||||
You should end with the following directory structure:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
.
|
||||
├── searxng
|
||||
│ ├── core-config
|
||||
│ │ ├── favicons.toml
|
||||
│ │ ├── limiter.toml
|
||||
│ │ ├── settings.yml
|
||||
│ │ └── ...
|
||||
│ ├── .env.example
|
||||
│ ├── .env
|
||||
│ └── docker-compose.yml
|
||||
└── searxng-docker
|
||||
└── ...
|
||||
|
||||
If everything is working on the new environment, you can remove the old
|
||||
``searxng-docker`` directory and its contents.
|
||||
|
||||
@@ -124,14 +124,17 @@ engine is shown. Most of the options have a default value or even are optional.
|
||||
|
||||
``api_key`` : optional
|
||||
In a few cases, using an API needs the use of a secret key. How to obtain them
|
||||
is described in the file.
|
||||
is described in the file. Engines that require an API key are set to
|
||||
``inactive: true`` by default. To enable such an engine, provide the API key
|
||||
and set ``inactive: false``.
|
||||
|
||||
``disabled`` : optional
|
||||
To disable by default the engine, but not deleting it. It will allow the user
|
||||
to manually activate it in the settings.
|
||||
|
||||
``inactive``: optional
|
||||
Remove the engine from the settings (*disabled & removed*).
|
||||
Remove the engine from the settings (*disabled & removed*). This defaults to ``true`` for engines
|
||||
that require an API key, please see the ``api_key`` section if you want to enable such an engine.
|
||||
|
||||
``language`` : optional
|
||||
If you want to use another language for a specific engine, you can define it
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
- ``360search``
|
||||
- ``baidu``
|
||||
- ``bing``
|
||||
- ``brave``
|
||||
- ``dbpedia``
|
||||
- ``duckduckgo``
|
||||
@@ -47,7 +48,6 @@
|
||||
- ``seznam``
|
||||
- ``sogou``
|
||||
- ``startpage``
|
||||
- ``stract``
|
||||
- ``swisscows``
|
||||
- ``wikipedia``
|
||||
- ``yandex``
|
||||
|
||||
@@ -119,8 +119,12 @@ ${fedora_build}
|
||||
pip install -U pip
|
||||
pip install -U setuptools
|
||||
pip install -U wheel
|
||||
|
||||
# additional packages required for installation
|
||||
pip install -U pyyaml
|
||||
pip install -U msgspec
|
||||
pip install -U typing-extensions
|
||||
pip install -U pybind11
|
||||
|
||||
# jump to SearXNG's working tree and install SearXNG into virtualenv
|
||||
(${SERVICE_USER})$ cd \"$SEARXNG_SRC\"
|
||||
|
||||
8
docs/dev/engines/online/aol.rst
Normal file
8
docs/dev/engines/online/aol.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
.. _aol engine:
|
||||
|
||||
===
|
||||
AOL
|
||||
===
|
||||
|
||||
.. automodule:: searx.engines.aol
|
||||
:members:
|
||||
@@ -1,5 +1,3 @@
|
||||
.. _brave engine:
|
||||
|
||||
=============
|
||||
Brave Engines
|
||||
=============
|
||||
@@ -9,5 +7,26 @@ Brave Engines
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
Brave offers two different engines for SearXNG:
|
||||
|
||||
1. The standard engine (``brave``) uses the web interface.
|
||||
2. The API engine (``braveapi``) uses the official REST API.
|
||||
|
||||
.. _brave engine:
|
||||
|
||||
Brave Standard Engine
|
||||
---------------------
|
||||
|
||||
.. automodule:: searx.engines.brave
|
||||
:members:
|
||||
:members:
|
||||
|
||||
.. _braveapi engine:
|
||||
|
||||
Brave API Engine
|
||||
----------------
|
||||
|
||||
.. automodule:: searx.engines.braveapi
|
||||
:members:
|
||||
|
||||
The API engine requires an API key from Brave. This can be obtained from the
|
||||
`API Dashboard <https://api-dashboard.search.brave.com/>`_.
|
||||
|
||||
8
docs/dev/engines/online/karmasearch.rst
Normal file
8
docs/dev/engines/online/karmasearch.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
.. _karmasearch engine:
|
||||
|
||||
===========
|
||||
Karmasearch
|
||||
===========
|
||||
|
||||
.. automodule:: searx.engines.karmasearch
|
||||
:members:
|
||||
@@ -1,13 +0,0 @@
|
||||
.. _seekr engine:
|
||||
|
||||
=============
|
||||
Seekr Engines
|
||||
=============
|
||||
|
||||
.. contents:: Contents
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
.. automodule:: searx.engines.seekr
|
||||
:members:
|
||||
@@ -291,38 +291,6 @@ Pylint_ is known as one of the best source-code, bug and quality checker for the
|
||||
Python programming language. The pylint profile used in the SearXNG project is
|
||||
found in project's root folder :origin:`.pylintrc`.
|
||||
|
||||
.. _make search.checker:
|
||||
|
||||
``make search.checker.{engine name}``
|
||||
=====================================
|
||||
|
||||
To check all engines::
|
||||
|
||||
make search.checker
|
||||
|
||||
To check a engine with whitespace in the name like *google news* replace space
|
||||
by underline::
|
||||
|
||||
make search.checker.google_news
|
||||
|
||||
To see HTTP requests and more use SEARXNG_DEBUG::
|
||||
|
||||
make SEARXNG_DEBUG=1 search.checker.google_news
|
||||
|
||||
.. _3xx: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes#3xx_redirection
|
||||
|
||||
To filter out HTTP redirects (3xx_)::
|
||||
|
||||
make SEARXNG_DEBUG=1 search.checker.google_news | grep -A1 "HTTP/1.1\" 3[0-9][0-9]"
|
||||
...
|
||||
Engine google news Checking
|
||||
https://news.google.com:443 "GET /search?q=life&hl=en&lr=lang_en&ie=utf8&oe=utf8&ceid=US%3Aen&gl=US HTTP/1.1" 302 0
|
||||
https://news.google.com:443 "GET /search?q=life&hl=en-US&lr=lang_en&ie=utf8&oe=utf8&ceid=US:en&gl=US HTTP/1.1" 200 None
|
||||
--
|
||||
https://news.google.com:443 "GET /search?q=computer&hl=en&lr=lang_en&ie=utf8&oe=utf8&ceid=US%3Aen&gl=US HTTP/1.1" 302 0
|
||||
https://news.google.com:443 "GET /search?q=computer&hl=en-US&lr=lang_en&ie=utf8&oe=utf8&ceid=US:en&gl=US HTTP/1.1" 200 None
|
||||
--
|
||||
|
||||
.. _make themes:
|
||||
|
||||
``make themes.*``
|
||||
|
||||
@@ -4,15 +4,33 @@
|
||||
Search API
|
||||
==========
|
||||
|
||||
The search supports both ``GET`` and ``POST``.
|
||||
SearXNG supports querying via a simple HTTP API.
|
||||
Two endpoints, ``/`` and ``/search``, are supported for both GET and POST methods.
|
||||
The GET method expects parameters as URL query parameters, while the POST method expects parameters as form data.
|
||||
|
||||
Furthermore, two endpoints ``/`` and ``/search`` are available for querying.
|
||||
If you want to consume the results as JSON, CSV, or RSS, you need to set the
|
||||
``format`` parameter accordingly. Supported formats are defined in ``settings.yml``, under the ``search`` section.
|
||||
Requesting an unset format will return a 403 Forbidden error. Be aware that many public instances have these formats disabled.
|
||||
|
||||
|
||||
Endpoints:
|
||||
|
||||
``GET /``
|
||||
|
||||
``GET /search``
|
||||
|
||||
``POST /``
|
||||
``POST /search``
|
||||
|
||||
example cURL calls:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
curl 'https://searx.example.org/search?q=searxng&format=json'
|
||||
|
||||
curl -X POST 'https://searx.example.org/search' -d 'q=searxng&format=csv'
|
||||
|
||||
curl -L -X POST -d 'q=searxng&format=json' 'https://searx.example.org/'
|
||||
|
||||
Parameters
|
||||
==========
|
||||
|
||||
@@ -69,7 +87,7 @@ Parameters
|
||||
|
||||
``autocomplete`` : default from :ref:`settings search`
|
||||
[ ``google``, ``dbpedia``, ``duckduckgo``, ``mwmbl``, ``startpage``,
|
||||
``wikipedia``, ``stract``, ``swisscows``, ``qwant`` ]
|
||||
``wikipedia``, ``swisscows``, ``qwant`` ]
|
||||
|
||||
Service which completes words as you type.
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[tools]
|
||||
# minimal version we support
|
||||
python = "3.10"
|
||||
node = "24.3.0"
|
||||
python = "3.11"
|
||||
node = "25"
|
||||
go = "1.24.5"
|
||||
shellcheck = "0.11.0"
|
||||
# python 3.10 uses 3.40.1 (on mac and win)
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
mock==5.2.0
|
||||
nose2[coverage_plugin]==0.15.1
|
||||
nose2[coverage_plugin]==0.16.0
|
||||
cov-core==1.15.0
|
||||
black==25.9.0
|
||||
pylint==4.0.3
|
||||
pylint==4.0.5
|
||||
splinter==0.21.0
|
||||
selenium==4.38.0
|
||||
Pallets-Sphinx-Themes==2.3.0
|
||||
Sphinx==8.2.3 ; python_version >= '3.11'
|
||||
Sphinx==8.1.3 ; python_version < '3.11'
|
||||
sphinx-issues==5.0.1
|
||||
selenium==4.43.0
|
||||
Pallets-Sphinx-Themes==2.5.0
|
||||
Sphinx==8.2.3
|
||||
sphinx-issues==6.0.0
|
||||
sphinx-jinja==2.0.2
|
||||
sphinx-tabs==3.4.7
|
||||
sphinxcontrib-programoutput==0.18
|
||||
sphinx-autobuild==2025.8.25 ; python_version >= '3.11'
|
||||
sphinx-autobuild==2024.10.3 ; python_version < '3.11'
|
||||
sphinx-tabs==3.5.0
|
||||
sphinxcontrib-programoutput==0.19
|
||||
sphinx-autobuild==2025.8.25
|
||||
sphinx-notfound-page==1.1.0
|
||||
myst-parser==4.0.1
|
||||
myst-parser==5.0.0
|
||||
linuxdoc==20240924
|
||||
aiounittest==1.5.0
|
||||
yamllint==1.37.1
|
||||
wlc==1.16.1
|
||||
yamllint==1.38.0
|
||||
wlc==1.17.2
|
||||
coloredlogs==15.0.1
|
||||
docutils>=0.21.2
|
||||
parameterized==0.9.0
|
||||
granian[reload]==2.6.0
|
||||
basedpyright==1.34.0
|
||||
types-lxml==2025.11.25
|
||||
granian[reload]==2.7.3
|
||||
basedpyright==1.39.2
|
||||
types-lxml==2026.2.16
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
granian==2.6.0
|
||||
granian[pname]==2.6.0
|
||||
granian==2.7.3
|
||||
granian[pname]==2.7.3
|
||||
|
||||
@@ -1,21 +1,19 @@
|
||||
certifi==2025.11.12
|
||||
babel==2.17.0
|
||||
certifi==2026.2.25
|
||||
babel==2.18.0
|
||||
flask-babel==4.0.0
|
||||
flask==3.1.2
|
||||
flask==3.1.3
|
||||
jinja2==3.1.6
|
||||
lxml==6.0.2
|
||||
pygments==2.19.2
|
||||
lxml==6.0.4
|
||||
pygments==2.20.0
|
||||
python-dateutil==2.9.0.post0
|
||||
pyyaml==6.0.3
|
||||
httpx[http2]==0.28.1
|
||||
httpx-socks[asyncio]==0.10.0
|
||||
sniffio==1.3.1
|
||||
valkey==6.1.1
|
||||
markdown-it-py==3.0.0
|
||||
fasttext-predict==0.9.2.4
|
||||
tomli==2.3.0; python_version < '3.11'
|
||||
msgspec==0.20.0
|
||||
typer-slim==0.20.0
|
||||
markdown-it-py==4.0.0
|
||||
msgspec==0.21.1
|
||||
typer==0.24.1
|
||||
isodate==0.7.2
|
||||
whitenoise==6.11.0
|
||||
whitenoise==6.12.0
|
||||
typing-extensions==4.15.0
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""This module implements functions needed for the autocompleter."""
|
||||
|
||||
# pylint: disable=use-dict-literal
|
||||
import string
|
||||
import random
|
||||
|
||||
import json
|
||||
import html
|
||||
import typing as t
|
||||
from urllib.parse import urlencode, quote_plus
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import lxml.etree
|
||||
import lxml.html
|
||||
@@ -16,7 +18,7 @@ from searx.engines import (
|
||||
engines,
|
||||
google,
|
||||
)
|
||||
from searx.network import get as http_get, post as http_post # pyright: ignore[reportUnknownVariableType]
|
||||
from searx.network import get as http_get, post as http_post
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
from searx.utils import extr, gen_useragent
|
||||
|
||||
@@ -54,6 +56,26 @@ def baidu(query: str, _sxng_locale: str) -> list[str]:
|
||||
return results
|
||||
|
||||
|
||||
def bing(query: str, _sxng_locale: str) -> list[str]:
|
||||
# bing search autocompleter
|
||||
base_url = "https://www.bing.com/AS/Suggestions?"
|
||||
# cvid has to be a 32 character long string consisting of numbers and uppsercase characters
|
||||
cvid = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(32))
|
||||
response = get(base_url + urlencode({'qry': query, 'csr': 1, 'cvid': cvid}))
|
||||
results: list[str] = []
|
||||
|
||||
if response.ok:
|
||||
data: dict[str, t.Any] = response.json()
|
||||
if 's' in data:
|
||||
for item in data['s']:
|
||||
completion: str = item['q']
|
||||
# bing uses PUA unicode characters to highlight parts of the query
|
||||
# we have to remove these manually (U+E000 and U+E001)
|
||||
completion = completion.replace("\ue000", "").replace("\ue001", "")
|
||||
results.append(completion)
|
||||
return results
|
||||
|
||||
|
||||
def brave(query: str, _sxng_locale: str) -> list[str]:
|
||||
# brave search autocompleter
|
||||
url = 'https://search.brave.com/api/suggest?'
|
||||
@@ -268,18 +290,6 @@ def startpage(query: str, sxng_locale: str) -> list[str]:
|
||||
return results
|
||||
|
||||
|
||||
def stract(query: str, _sxng_locale: str) -> list[str]:
|
||||
# stract autocompleter (beta)
|
||||
url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}"
|
||||
resp = post(url)
|
||||
results: list[str] = []
|
||||
|
||||
if resp.ok:
|
||||
results = [html.unescape(suggestion['raw']) for suggestion in resp.json()]
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def swisscows(query: str, _sxng_locale: str) -> list[str]:
|
||||
# swisscows autocompleter
|
||||
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
|
||||
@@ -344,6 +354,7 @@ def yandex(query: str, _sxng_locale: str) -> list[str]:
|
||||
backends: dict[str, t.Callable[[str, str], list[str]]] = {
|
||||
'360search': qihu360search,
|
||||
'baidu': baidu,
|
||||
'bing': bing,
|
||||
'brave': brave,
|
||||
'dbpedia': dbpedia,
|
||||
'duckduckgo': duckduckgo,
|
||||
@@ -355,7 +366,6 @@ backends: dict[str, t.Callable[[str, str], list[str]]] = {
|
||||
'seznam': seznam,
|
||||
'sogou': sogou,
|
||||
'startpage': startpage,
|
||||
'stract': stract,
|
||||
'swisscows': swisscows,
|
||||
'wikipedia': wikipedia,
|
||||
'yandex': yandex,
|
||||
|
||||
@@ -13,7 +13,7 @@ import copy
|
||||
import logging
|
||||
import pathlib
|
||||
|
||||
from ..compat import tomllib
|
||||
import tomllib
|
||||
|
||||
__all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg']
|
||||
|
||||
|
||||
@@ -18,6 +18,17 @@ class BrandCustom(msgspec.Struct, kw_only=True, forbid_unknown_fields=True):
|
||||
"""Custom entries in the footer of the WEB page: ``[title]: [link]``"""
|
||||
|
||||
|
||||
class ThemeColors(msgspec.Struct, kw_only=True, forbid_unknown_fields=True):
|
||||
"""Custom settings for theme colors in the brand section."""
|
||||
|
||||
theme_color_light: str = "#3050ff"
|
||||
background_color_light: str = "#fff"
|
||||
theme_color_dark: str = "#58f"
|
||||
background_color_dark: str = "#222428"
|
||||
theme_color_black: str = "#3050ff"
|
||||
background_color_black: str = "#000"
|
||||
|
||||
|
||||
class SettingsBrand(msgspec.Struct, kw_only=True, forbid_unknown_fields=True):
|
||||
"""Options for configuring brand properties.
|
||||
|
||||
@@ -54,6 +65,9 @@ class SettingsBrand(msgspec.Struct, kw_only=True, forbid_unknown_fields=True):
|
||||
:members:
|
||||
"""
|
||||
|
||||
pwa_colors: ThemeColors = msgspec.field(default_factory=ThemeColors)
|
||||
"""Custom settings for PWA colors."""
|
||||
|
||||
# new_issue_url is a hackish solution tailored for only one hoster (GH). As
|
||||
# long as we don't have a more general solution, we should support it in the
|
||||
# given function, but it should not be expanded further.
|
||||
|
||||
@@ -1,25 +1,9 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Compatibility with older versions"""
|
||||
|
||||
# pylint: disable=unused-import
|
||||
|
||||
__all__ = [
|
||||
"tomllib",
|
||||
]
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
|
||||
# TOML (lib) compatibility
|
||||
# ------------------------
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
import tomllib
|
||||
else:
|
||||
import tomli as tomllib
|
||||
|
||||
|
||||
# limiter backward compatibility
|
||||
# ------------------------------
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ make data.all
|
||||
"""
|
||||
# pylint: disable=invalid-name
|
||||
|
||||
__all__ = ["ahmia_blacklist_loader", "data_dir", "get_cache"]
|
||||
__all__ = ["ahmia_blacklist_loader", "gsa_useragents_loader", "data_dir", "get_cache"]
|
||||
|
||||
import json
|
||||
import typing as t
|
||||
@@ -63,6 +63,7 @@ lazy_globals = {
|
||||
"ENGINE_TRAITS": None,
|
||||
"LOCALES": None,
|
||||
"TRACKER_PATTERNS": TrackerPatternsDB(),
|
||||
"GSA_USER_AGENTS": None,
|
||||
}
|
||||
|
||||
data_json_files = {
|
||||
@@ -105,3 +106,24 @@ def ahmia_blacklist_loader() -> list[str]:
|
||||
"""
|
||||
with open(data_dir / 'ahmia_blacklist.txt', encoding='utf-8') as f:
|
||||
return f.read().split()
|
||||
|
||||
|
||||
def gsa_useragents_loader() -> list[str]:
|
||||
"""Load data from `gsa_useragents.txt` and return a list of user agents
|
||||
suitable for Google. The user agents are fetched by::
|
||||
|
||||
searxng_extra/update/update_gsa_useragents.py
|
||||
|
||||
This function is used by :py:mod:`searx.engines.google`.
|
||||
|
||||
"""
|
||||
data = lazy_globals["GSA_USER_AGENTS"]
|
||||
if data is not None:
|
||||
return data
|
||||
|
||||
log.debug("init searx.data.%s", "GSA_USER_AGENTS")
|
||||
|
||||
with open(data_dir / 'gsa_useragents.txt', encoding='utf-8') as f:
|
||||
lazy_globals["GSA_USER_AGENTS"] = f.read().splitlines()
|
||||
|
||||
return lazy_globals["GSA_USER_AGENTS"]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
2661
searx/data/gsa_useragents.txt
Normal file
2661
searx/data/gsa_useragents.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,7 @@
|
||||
],
|
||||
"ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
|
||||
"versions": [
|
||||
"145.0",
|
||||
"144.0"
|
||||
"149.0",
|
||||
"148.0"
|
||||
]
|
||||
}
|
||||
@@ -1434,11 +1434,6 @@
|
||||
"symbol": "kg/(m s)",
|
||||
"to_si_factor": 1.0
|
||||
},
|
||||
"Q106653484": {
|
||||
"si_name": "Q106653452",
|
||||
"symbol": "kg/(m min)",
|
||||
"to_si_factor": 0.0166667
|
||||
},
|
||||
"Q106680668": {
|
||||
"si_name": "Q106680668",
|
||||
"symbol": "s⁻²",
|
||||
@@ -2145,17 +2140,17 @@
|
||||
"to_si_factor": 0.5424919
|
||||
},
|
||||
"Q107410794": {
|
||||
"si_name": "Q106653452",
|
||||
"si_name": "Q21016931",
|
||||
"symbol": "kg/(m d)",
|
||||
"to_si_factor": 1.15741e-05
|
||||
"to_si_factor": 1.1574074074074073e-05
|
||||
},
|
||||
"Q107410795": {
|
||||
"si_name": "Q106653452",
|
||||
"si_name": "Q21016931",
|
||||
"symbol": "kg/(m h)",
|
||||
"to_si_factor": 0.000277778
|
||||
"to_si_factor": 0.0002777777777777778
|
||||
},
|
||||
"Q107410801": {
|
||||
"si_name": "Q106653452",
|
||||
"si_name": "Q21016931",
|
||||
"symbol": "g/(cm s)",
|
||||
"to_si_factor": 0.1
|
||||
},
|
||||
@@ -2319,11 +2314,6 @@
|
||||
"symbol": "kJ/(kg K)",
|
||||
"to_si_factor": 1000.0
|
||||
},
|
||||
"Q108888186": {
|
||||
"si_name": "Q11570",
|
||||
"symbol": "eV/c²",
|
||||
"to_si_factor": 1.782661921627898e-36
|
||||
},
|
||||
"Q108888198": {
|
||||
"si_name": "Q11570",
|
||||
"symbol": "keV/c²",
|
||||
@@ -2544,6 +2534,11 @@
|
||||
"symbol": "m",
|
||||
"to_si_factor": 1.0
|
||||
},
|
||||
"Q11574": {
|
||||
"si_name": "Q11574",
|
||||
"symbol": "s",
|
||||
"to_si_factor": 1.0
|
||||
},
|
||||
"Q11579": {
|
||||
"si_name": "Q11579",
|
||||
"symbol": "K",
|
||||
@@ -3304,6 +3299,11 @@
|
||||
"symbol": "MT/S",
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q136718229": {
|
||||
"si_name": null,
|
||||
"symbol": "MtCO₂e",
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q1374438": {
|
||||
"si_name": "Q11574",
|
||||
"symbol": "ks",
|
||||
@@ -3474,11 +3474,6 @@
|
||||
"symbol": "μC",
|
||||
"to_si_factor": 1e-06
|
||||
},
|
||||
"Q162525": {
|
||||
"si_name": null,
|
||||
"symbol": "°E",
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q1628990": {
|
||||
"si_name": "Q12831618",
|
||||
"symbol": "hph",
|
||||
@@ -3726,7 +3721,7 @@
|
||||
},
|
||||
"Q191118": {
|
||||
"si_name": "Q11570",
|
||||
"symbol": "t",
|
||||
"symbol": "MT",
|
||||
"to_si_factor": 1000.0
|
||||
},
|
||||
"Q1913097": {
|
||||
@@ -4394,6 +4389,11 @@
|
||||
"symbol": "m²",
|
||||
"to_si_factor": 1.0
|
||||
},
|
||||
"Q25376902": {
|
||||
"si_name": null,
|
||||
"symbol": "Mbp",
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q25377184": {
|
||||
"si_name": "Q25377184",
|
||||
"symbol": "kg/m²",
|
||||
@@ -4544,11 +4544,6 @@
|
||||
"symbol": "mm²/s",
|
||||
"to_si_factor": 1e-06
|
||||
},
|
||||
"Q26162557": {
|
||||
"si_name": "Q21016931",
|
||||
"symbol": "cP",
|
||||
"to_si_factor": 0.001
|
||||
},
|
||||
"Q26162587": {
|
||||
"si_name": "Q21016931",
|
||||
"symbol": "μPa s",
|
||||
@@ -5219,11 +5214,6 @@
|
||||
"symbol": "Kib",
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q3833309": {
|
||||
"si_name": null,
|
||||
"symbol": "£",
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q3858002": {
|
||||
"si_name": "Q25406",
|
||||
"symbol": "mA⋅h",
|
||||
@@ -5344,11 +5334,6 @@
|
||||
"symbol": "bhp EDR",
|
||||
"to_si_factor": 12.958174
|
||||
},
|
||||
"Q3984193": {
|
||||
"si_name": "Q25269",
|
||||
"symbol": "TeV",
|
||||
"to_si_factor": 1.602176634e-07
|
||||
},
|
||||
"Q39978339": {
|
||||
"si_name": "Q25377184",
|
||||
"symbol": "kg/cm²",
|
||||
|
||||
@@ -298,9 +298,9 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
|
||||
"""Using tor proxy (``true``) or not (``false``) for this engine."""
|
||||
|
||||
send_accept_language_header: bool
|
||||
"""When this option is activated, the language (locale) that is selected by
|
||||
the user is used to build and send a ``Accept-Language`` header in the
|
||||
request to the origin search engine."""
|
||||
"""When this option is activated (default), the language (locale) that is
|
||||
selected by the user is used to build and send a ``Accept-Language`` header
|
||||
in the request to the origin search engine."""
|
||||
|
||||
tokens: list[str]
|
||||
"""A list of secret tokens to make this engine *private*, more details see
|
||||
|
||||
@@ -9,15 +9,14 @@ To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
|
||||
used.
|
||||
"""
|
||||
|
||||
|
||||
import os
|
||||
import json
|
||||
import dataclasses
|
||||
import json
|
||||
import pathlib
|
||||
import types
|
||||
import typing as t
|
||||
import pathlib
|
||||
|
||||
from searx import locales
|
||||
from searx.data import data_dir, ENGINE_TRAITS
|
||||
from searx.data import ENGINE_TRAITS, data_dir
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from . import Engine
|
||||
@@ -77,7 +76,7 @@ class EngineTraits:
|
||||
language").
|
||||
"""
|
||||
|
||||
data_type: t.Literal['traits_v1'] = 'traits_v1'
|
||||
data_type: t.Literal["traits_v1"] = "traits_v1"
|
||||
"""Data type, default is 'traits_v1'.
|
||||
"""
|
||||
|
||||
@@ -85,7 +84,7 @@ class EngineTraits:
|
||||
"""A place to store engine's custom traits, not related to the SearXNG core.
|
||||
"""
|
||||
|
||||
def get_language(self, searxng_locale: str, default: t.Any = None):
|
||||
def get_language(self, searxng_locale: str, default: str | None = None) -> str | None:
|
||||
"""Return engine's language string that *best fits* to SearXNG's locale.
|
||||
|
||||
:param searxng_locale: SearXNG's internal representation of locale
|
||||
@@ -97,11 +96,11 @@ class EngineTraits:
|
||||
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
|
||||
which is determined from :py:obj:`EngineTraits.all_locale`.
|
||||
"""
|
||||
if searxng_locale == 'all' and self.all_locale is not None:
|
||||
if searxng_locale == "all" and self.all_locale is not None:
|
||||
return self.all_locale
|
||||
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
|
||||
|
||||
def get_region(self, searxng_locale: str, default: t.Any = None) -> t.Any:
|
||||
def get_region(self, searxng_locale: str, default: str | None = None) -> str | None:
|
||||
"""Return engine's region string that best fits to SearXNG's locale.
|
||||
|
||||
:param searxng_locale: SearXNG's internal representation of locale
|
||||
@@ -113,7 +112,7 @@ class EngineTraits:
|
||||
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
|
||||
which is determined from :py:obj:`EngineTraits.all_locale`.
|
||||
"""
|
||||
if searxng_locale == 'all' and self.all_locale is not None:
|
||||
if searxng_locale == "all" and self.all_locale is not None:
|
||||
return self.all_locale
|
||||
return locales.get_engine_locale(searxng_locale, self.regions, default=default)
|
||||
|
||||
@@ -125,14 +124,14 @@ class EngineTraits:
|
||||
For verification the functions :py:func:`EngineTraits.get_region` and
|
||||
:py:func:`EngineTraits.get_language` are used.
|
||||
"""
|
||||
if self.data_type == 'traits_v1':
|
||||
if self.data_type == "traits_v1":
|
||||
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
|
||||
|
||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||
raise TypeError("engine traits of type %s is unknown" % self.data_type)
|
||||
|
||||
def copy(self):
|
||||
"""Create a copy of the dataclass object."""
|
||||
return EngineTraits(**dataclasses.asdict(self)) # type: ignore
|
||||
return EngineTraits(**dataclasses.asdict(self))
|
||||
|
||||
@classmethod
|
||||
def fetch_traits(cls, engine: "Engine | types.ModuleType") -> "EngineTraits | None":
|
||||
@@ -141,7 +140,7 @@ class EngineTraits:
|
||||
function does not exists, ``None`` is returned.
|
||||
"""
|
||||
|
||||
fetch_traits = getattr(engine, 'fetch_traits', None)
|
||||
fetch_traits = getattr(engine, "fetch_traits", None)
|
||||
engine_traits = None
|
||||
|
||||
if fetch_traits:
|
||||
@@ -149,18 +148,18 @@ class EngineTraits:
|
||||
fetch_traits(engine_traits)
|
||||
return engine_traits
|
||||
|
||||
def set_traits(self, engine: "Engine | types.ModuleType"):
|
||||
def set_traits(self, engine: "Engine | types.ModuleType") -> None:
|
||||
"""Set traits from self object in a :py:obj:`.Engine` namespace.
|
||||
|
||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||
"""
|
||||
|
||||
if self.data_type == 'traits_v1':
|
||||
if self.data_type == "traits_v1":
|
||||
self._set_traits_v1(engine)
|
||||
else:
|
||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||
raise TypeError("engine traits of type %s is unknown" % self.data_type)
|
||||
|
||||
def _set_traits_v1(self, engine: "Engine | types.ModuleType"):
|
||||
def _set_traits_v1(self, engine: "Engine | types.ModuleType") -> None:
|
||||
# For an engine, when there is `language: ...` in the YAML settings the engine
|
||||
# does support only this one language (region)::
|
||||
#
|
||||
@@ -174,18 +173,18 @@ class EngineTraits:
|
||||
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
|
||||
|
||||
languages = traits.languages
|
||||
if hasattr(engine, 'language'):
|
||||
if hasattr(engine, "language"):
|
||||
if engine.language not in languages:
|
||||
raise ValueError(_msg % (engine.name, 'language', engine.language))
|
||||
raise ValueError(_msg % (engine.name, "language", engine.language))
|
||||
traits.languages = {engine.language: languages[engine.language]}
|
||||
|
||||
regions = traits.regions
|
||||
if hasattr(engine, 'region'):
|
||||
if hasattr(engine, "region"):
|
||||
if engine.region not in regions:
|
||||
raise ValueError(_msg % (engine.name, 'region', engine.region))
|
||||
raise ValueError(_msg % (engine.name, "region", engine.region))
|
||||
traits.regions = {engine.region: regions[engine.region]}
|
||||
|
||||
engine.language_support = bool(traits.languages or traits.regions) # type: ignore
|
||||
engine.language_support = bool(traits.languages or traits.regions)
|
||||
|
||||
# set the copied & modified traits in engine's namespace
|
||||
engine.traits = traits # pyright: ignore[reportAttributeAccessIssue]
|
||||
@@ -194,16 +193,16 @@ class EngineTraits:
|
||||
class EngineTraitsMap(dict[str, EngineTraits]):
|
||||
"""A python dictionary to map :class:`EngineTraits` by engine name."""
|
||||
|
||||
ENGINE_TRAITS_FILE: pathlib.Path = (data_dir / 'engine_traits.json').resolve()
|
||||
ENGINE_TRAITS_FILE: pathlib.Path = (data_dir / "engine_traits.json").resolve()
|
||||
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
|
||||
|
||||
def save_data(self):
|
||||
"""Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
|
||||
with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
|
||||
with open(self.ENGINE_TRAITS_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
|
||||
|
||||
@classmethod
|
||||
def from_data(cls) -> 'EngineTraitsMap':
|
||||
def from_data(cls) -> "EngineTraitsMap":
|
||||
"""Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
|
||||
obj = cls()
|
||||
for k, v in ENGINE_TRAITS.items():
|
||||
@@ -211,8 +210,10 @@ class EngineTraitsMap(dict[str, EngineTraits]):
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
def fetch_traits(cls, log: t.Callable[[str], None]) -> 'EngineTraitsMap':
|
||||
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
|
||||
def fetch_traits(cls, log: t.Callable[[str], None]) -> "EngineTraitsMap":
|
||||
from searx import ( # pylint: disable=cyclic-import, import-outside-toplevel
|
||||
engines,
|
||||
)
|
||||
|
||||
names = list(engines.engines)
|
||||
names.sort()
|
||||
@@ -226,13 +227,13 @@ class EngineTraitsMap(dict[str, EngineTraits]):
|
||||
try:
|
||||
traits = EngineTraits.fetch_traits(engine)
|
||||
except Exception as exc:
|
||||
log("FATAL: while fetch_traits %s: %s" % (engine_name, exc))
|
||||
if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']:
|
||||
raise
|
||||
log("ERROR: while fetch_traits %s: %s" % (engine_name, exc))
|
||||
v = ENGINE_TRAITS.get(engine_name)
|
||||
if v:
|
||||
log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
|
||||
log("WARNING: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
|
||||
traits = EngineTraits(**v)
|
||||
else:
|
||||
log("WARNING: no old values available for ENGINE_TRAITS[%s], skipping" % engine_name)
|
||||
|
||||
if traits is not None:
|
||||
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
|
||||
@@ -247,7 +248,7 @@ class EngineTraitsMap(dict[str, EngineTraits]):
|
||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||
"""
|
||||
|
||||
engine_traits = EngineTraits(data_type='traits_v1')
|
||||
engine_traits = EngineTraits(data_type="traits_v1")
|
||||
if engine.name in self.keys():
|
||||
engine_traits = self[engine.name]
|
||||
|
||||
|
||||
@@ -2,10 +2,18 @@
|
||||
# pylint: disable=invalid-name
|
||||
"""360Search search engine for searxng"""
|
||||
|
||||
import typing as t
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from searx import logger
|
||||
from searx.enginelib import EngineCache
|
||||
from searx.utils import extract_text
|
||||
from searx.network import get as http_get
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
|
||||
# Metadata
|
||||
about = {
|
||||
@@ -26,6 +34,35 @@ time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
|
||||
# Base URL
|
||||
base_url = "https://www.so.com"
|
||||
COOKIE_CACHE_KEY = "cookie"
|
||||
COOKIE_CACHE_EXPIRATION_SECONDS = 3600
|
||||
|
||||
CACHE: EngineCache
|
||||
"""Stores cookies from 360search to avoid re-fetching them on every request."""
|
||||
|
||||
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||
"""Initialization of the engine.
|
||||
|
||||
- Instantiate a cache for this engine (:py:obj:`CACHE`).
|
||||
|
||||
"""
|
||||
global CACHE # pylint: disable=global-statement
|
||||
# table name needs to be quoted to start with digits, so "cache" has been added to avoid sqlite complaining
|
||||
CACHE = EngineCache("cache" + engine_settings["name"])
|
||||
return True
|
||||
|
||||
|
||||
def get_cookie(url: str) -> str:
|
||||
cookie: str | None = CACHE.get(COOKIE_CACHE_KEY)
|
||||
if cookie:
|
||||
return cookie
|
||||
resp: SXNG_Response = http_get(url, timeout=10, allow_redirects=False)
|
||||
headers = resp.headers
|
||||
cookie = headers['set-cookie'].split(";")[0]
|
||||
CACHE.set(key=COOKIE_CACHE_KEY, value=cookie, expire=COOKIE_CACHE_EXPIRATION_SECONDS)
|
||||
|
||||
return cookie
|
||||
|
||||
|
||||
def request(query, params):
|
||||
@@ -36,8 +73,13 @@ def request(query, params):
|
||||
|
||||
if time_range_dict.get(params['time_range']):
|
||||
query_params["adv_t"] = time_range_dict.get(params['time_range'])
|
||||
|
||||
params["url"] = f"{base_url}/s?{urlencode(query_params)}"
|
||||
# get token by calling the query page
|
||||
logger.debug("querying url: %s", params["url"])
|
||||
cookie = get_cookie(params["url"])
|
||||
logger.debug("obtained cookie: %s", cookie)
|
||||
params['headers'] = {'Cookie': cookie}
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool
|
||||
"inactive": False,
|
||||
"about": {},
|
||||
"using_tor_proxy": False,
|
||||
"send_accept_language_header": False,
|
||||
"send_accept_language_header": True,
|
||||
"tokens": [],
|
||||
"max_page": 0,
|
||||
}
|
||||
|
||||
@@ -52,7 +52,6 @@ about = {
|
||||
|
||||
categories = []
|
||||
paging = True
|
||||
send_accept_language_header = True
|
||||
results_per_page = 10
|
||||
|
||||
base_url = "https://stock.adobe.com"
|
||||
|
||||
@@ -3,9 +3,14 @@
|
||||
Ahmia (Onions)
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
from urllib.parse import urlencode, urlparse, parse_qs
|
||||
from lxml.html import fromstring
|
||||
from searx.utils import gen_useragent, ElementType
|
||||
from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
|
||||
from searx.network import get
|
||||
from searx.enginelib import EngineCache
|
||||
|
||||
# about
|
||||
about = {
|
||||
@@ -23,6 +28,7 @@ paging = True
|
||||
page_size = 10
|
||||
|
||||
# search url
|
||||
base_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion'
|
||||
search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}'
|
||||
time_range_support = True
|
||||
time_range_dict = {'day': 1, 'week': 7, 'month': 30}
|
||||
@@ -34,10 +40,42 @@ title_xpath = './h4/a[1]'
|
||||
content_xpath = './/p[1]'
|
||||
correction_xpath = '//*[@id="didYouMean"]//a'
|
||||
number_of_results_xpath = '//*[@id="totalResults"]'
|
||||
name_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@name'
|
||||
value_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@value'
|
||||
|
||||
CACHE: EngineCache
|
||||
|
||||
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||
global CACHE # pylint: disable=global-statement
|
||||
CACHE = EngineCache(engine_settings["name"])
|
||||
return True
|
||||
|
||||
|
||||
def _get_tokens(dom: ElementType | None = None) -> str:
|
||||
"""
|
||||
The tokens are hidden in a hidden input field.
|
||||
They update every minute, but allow up to 1 hour old tokens to be used.
|
||||
To spend the least amount of requests, it is best to always get the newest
|
||||
tokens from each request. In worst case if it has expired, it would
|
||||
need to do a total of 2 requests (over tor, might be ridiculously slow).
|
||||
"""
|
||||
if dom is None:
|
||||
resp = get(base_url, headers={'User-Agent': gen_useragent()})
|
||||
dom = fromstring(resp.text)
|
||||
name_token = extract_text(dom.xpath(name_token_xpath))
|
||||
value_token = extract_text(dom.xpath(value_token_xpath))
|
||||
return f"{name_token}:{value_token}"
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
||||
token_str: str | None = CACHE.get('ahmia-tokens')
|
||||
if not token_str:
|
||||
token_str = _get_tokens()
|
||||
CACHE.set('ahmia-tokens', token_str, expire=60 * 60)
|
||||
name_token, value_token = token_str.split(":")
|
||||
|
||||
params['url'] = search_url.format(query=urlencode({'q': query, name_token: value_token}))
|
||||
|
||||
if params['time_range'] in time_range_dict:
|
||||
params['url'] += '&' + urlencode({'d': time_range_dict[params['time_range']]})
|
||||
@@ -77,4 +115,8 @@ def response(resp):
|
||||
except: # pylint: disable=bare-except
|
||||
pass
|
||||
|
||||
# Update the tokens to the newest ones
|
||||
token_str = _get_tokens(dom)
|
||||
CACHE.set('ahmia-tokens', token_str, expire=60 * 60)
|
||||
|
||||
return results
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
engine providing access to a variety of book resources (also via IPFS), created
|
||||
by a team of anonymous archivists (AnnaArchivist_).
|
||||
|
||||
.. _Anna's Archive: https://annas-archive.org/
|
||||
.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive
|
||||
.. _Anna's Archive: https://annas-archive.gl/
|
||||
.. _AnnaArchivist: https://software.annas-archive.gl/AnnaArchivist/annas-archive
|
||||
|
||||
Configuration
|
||||
=============
|
||||
@@ -34,18 +34,18 @@ Implementations
|
||||
===============
|
||||
|
||||
"""
|
||||
import typing as t
|
||||
|
||||
import random
|
||||
import typing as t
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from lxml import html
|
||||
from lxml.etree import ElementBase
|
||||
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.data import ENGINE_TRAITS
|
||||
from searx.exceptions import SearxEngineXPathException
|
||||
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import eval_xpath, eval_xpath_getindex, eval_xpath_list, extract_text
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
@@ -53,7 +53,7 @@ if t.TYPE_CHECKING:
|
||||
|
||||
# about
|
||||
about: dict[str, t.Any] = {
|
||||
"website": "https://annas-archive.org/",
|
||||
"website": "https://annas-archive.gl/",
|
||||
"wikidata_id": "Q115288326",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
@@ -66,7 +66,9 @@ categories = ["files", "books"]
|
||||
paging: bool = True
|
||||
|
||||
# search-url
|
||||
base_url: str = "https://annas-archive.org"
|
||||
base_url: list[str] | str = []
|
||||
"""List of Anna's archive domains or a single domain (as string)."""
|
||||
|
||||
aa_content: str = ""
|
||||
"""Anan's search form field **Content** / possible values::
|
||||
|
||||
@@ -78,7 +80,7 @@ To not filter use an empty string (default).
|
||||
aa_sort: str = ""
|
||||
"""Sort Anna's results, possible values::
|
||||
|
||||
newest, oldest, largest, smallest
|
||||
newest, oldest, largest, smallest, newest_added, oldest_added, random
|
||||
|
||||
To sort by *most relevant* use an empty string (default)."""
|
||||
|
||||
@@ -94,9 +96,13 @@ aa_ext: str = ""
|
||||
"""
|
||||
|
||||
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument
|
||||
def setup(_engine_settings: dict[str, t.Any]) -> bool:
|
||||
"""Check of engine's settings."""
|
||||
traits = EngineTraits(**ENGINE_TRAITS["annas archive"])
|
||||
|
||||
traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["annas archive"])
|
||||
|
||||
if not base_url:
|
||||
raise ValueError("missing required config `base_url`")
|
||||
|
||||
if aa_content and aa_content not in traits.custom["content"]:
|
||||
raise ValueError(f"invalid setting content: {aa_content}")
|
||||
@@ -110,6 +116,13 @@ def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-
|
||||
return True
|
||||
|
||||
|
||||
def _get_base_url_choice() -> str:
|
||||
if isinstance(base_url, list):
|
||||
return random.choice(base_url)
|
||||
|
||||
return base_url
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
lang = traits.get_language(params["searxng_locale"], traits.all_locale)
|
||||
args = {
|
||||
@@ -120,62 +133,130 @@ def request(query: str, params: "OnlineParams") -> None:
|
||||
"q": query,
|
||||
"page": params["pageno"],
|
||||
}
|
||||
# filter out None and empty values
|
||||
# filter out empty values
|
||||
filtered_args = dict((k, v) for k, v in args.items() if v)
|
||||
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
|
||||
|
||||
params["base_url"] = _get_base_url_choice()
|
||||
params["url"] = f"{params['base_url']}/search?{urlencode(filtered_args)}"
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
res = EngineResults()
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# The rendering of the WEB page is strange; positions of Anna's result page
|
||||
# are enclosed in SGML comments. These comments are *uncommented* by some
|
||||
# JS code, see query of class ".js-scroll-hidden" in Anna's HTML template:
|
||||
# https://annas-software.org/AnnaArchivist/annas-archive/-/blob/main/allthethings/templates/macros/md5_list.html
|
||||
# Each result is a div with class "flex" inside "js-aarecord-list-outer"
|
||||
# container. The "flex" filter excludes non-result div such as section
|
||||
# separators.
|
||||
for item in eval_xpath_list(
|
||||
dom,
|
||||
"//main//div[contains(@class, 'js-aarecord-list-outer')]/div[contains(@class, 'flex')]",
|
||||
):
|
||||
result = _get_result(item, resp.search_params["base_url"])
|
||||
if result is not None:
|
||||
res.add(res.types.Paper(**result))
|
||||
|
||||
for item in eval_xpath_list(dom, "//main//div[contains(@class, 'js-aarecord-list-outer')]/div"):
|
||||
try:
|
||||
kwargs: dict[str, t.Any] = _get_result(item)
|
||||
except SearxEngineXPathException:
|
||||
continue
|
||||
res.add(res.types.Paper(**kwargs))
|
||||
return res
|
||||
|
||||
|
||||
def _get_result(item: ElementBase) -> dict[str, t.Any]:
|
||||
return {
|
||||
"url": base_url + eval_xpath_getindex(item, "./a/@href", 0),
|
||||
"title": extract_text(eval_xpath(item, "./div//a[starts-with(@href, '/md5')]")),
|
||||
"authors": [extract_text(eval_xpath_getindex(item, ".//a[starts-with(@href, '/search')]", 0))],
|
||||
"publisher": extract_text(
|
||||
eval_xpath_getindex(item, ".//a[starts-with(@href, '/search')]", 1, default=None), allow_none=True
|
||||
),
|
||||
"content": extract_text(eval_xpath(item, ".//div[contains(@class, 'relative')]")),
|
||||
"thumbnail": extract_text(eval_xpath_getindex(item, ".//img/@src", 0, default=None), allow_none=True),
|
||||
def _get_result(item: ElementBase, base_url_choice: str) -> dict[str, t.Any] | None:
|
||||
# the first direct child "a" contains the link to the result page
|
||||
href_els = item.xpath("./a/@href")
|
||||
if not href_els:
|
||||
return None
|
||||
|
||||
# the link with class "js-vim-focus" is always the title link
|
||||
title_text = extract_text(
|
||||
xpath_results=eval_xpath(item, ".//a[contains(@class, 'js-vim-focus')]"),
|
||||
allow_none=True,
|
||||
)
|
||||
if not title_text:
|
||||
return None
|
||||
|
||||
result: dict[str, t.Any] = {
|
||||
"url": base_url_choice + href_els[0],
|
||||
"title": title_text,
|
||||
}
|
||||
|
||||
result["content"] = extract_text(
|
||||
xpath_results=eval_xpath_getindex(
|
||||
element=item,
|
||||
# the content is in a div with class "relative" and "line-clamp"
|
||||
xpath_spec=".//div[@class='relative']/div[contains(@class, 'line-clamp')]",
|
||||
index=0,
|
||||
default=None,
|
||||
),
|
||||
allow_none=True,
|
||||
)
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
result["thumbnail"] = eval_xpath_getindex(
|
||||
element=item,
|
||||
# the thumbnail is the src of the first img in the result item
|
||||
xpath_spec=".//img/@src",
|
||||
index=0,
|
||||
default=None,
|
||||
)
|
||||
|
||||
result["authors"] = [
|
||||
extract_text(
|
||||
xpath_results=eval_xpath_getindex(
|
||||
element=item,
|
||||
# identified by the "user-edit" icon
|
||||
xpath_spec=".//a[.//span[contains(@class, 'icon-[mdi--user-edit]')]]",
|
||||
index=0,
|
||||
default=None,
|
||||
),
|
||||
allow_none=True,
|
||||
)
|
||||
]
|
||||
|
||||
result["publisher"] = extract_text(
|
||||
xpath_results=eval_xpath_getindex(
|
||||
element=item,
|
||||
# identified by the "company" icon
|
||||
xpath_spec=".//a[.//span[contains(@class, 'icon-[mdi--company]')]]",
|
||||
index=0,
|
||||
default=None,
|
||||
),
|
||||
allow_none=True,
|
||||
)
|
||||
|
||||
tags_text = extract_text(
|
||||
xpath_results=eval_xpath_getindex(
|
||||
element=item,
|
||||
# the only one with "font-semibold" class
|
||||
xpath_spec=".//div[contains(@class, 'font-semibold')]",
|
||||
index=0,
|
||||
default=None,
|
||||
),
|
||||
allow_none=True,
|
||||
)
|
||||
if tags_text:
|
||||
result["tags"] = [tag.strip() for tag in tags_text.split("Save")[0].split("·") if tag.strip()]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits) -> None:
|
||||
"""Fetch languages and other search arguments from Anna's search form."""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
import babel
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
from searx.locales import language_tag
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
engine_traits.all_locale = ""
|
||||
engine_traits.custom["content"] = []
|
||||
engine_traits.custom["ext"] = []
|
||||
engine_traits.custom["sort"] = []
|
||||
|
||||
resp = get(base_url + "/search")
|
||||
resp = get(_get_base_url_choice() + "/search", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Anna's search page is not OK.")
|
||||
raise RuntimeError("Response from Anna's Archive is not OK.")
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# supported language codes
|
||||
|
||||
lang_map: dict[str, str] = {}
|
||||
for x in eval_xpath_list(dom, "//form//input[@name='lang']"):
|
||||
eng_lang = x.get("value")
|
||||
|
||||
208
searx/engines/aol.py
Normal file
208
searx/engines/aol.py
Normal file
@@ -0,0 +1,208 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""AOL supports WEB, image, and video search. Internally, it uses the Bing
|
||||
index.
|
||||
|
||||
AOL doesn't seem to support setting the language via request parameters, instead
|
||||
the results are based on the URL. For example, there is
|
||||
|
||||
- `search.aol.com <https://search.aol.com>`_ for English results
|
||||
- `suche.aol.de <https://suche.aol.de>`_ for German results
|
||||
|
||||
However, AOL offers its services only in a few regions:
|
||||
|
||||
- en-US: search.aol.com
|
||||
- de-DE: suche.aol.de
|
||||
- fr-FR: recherche.aol.fr
|
||||
- en-GB: search.aol.co.uk
|
||||
- en-CA: search.aol.ca
|
||||
|
||||
In order to still offer sufficient support for language and region, the `search
|
||||
keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the
|
||||
search term (AOL is basically just a proxy for Bing).
|
||||
|
||||
.. _search keywords:
|
||||
https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a
|
||||
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, unquote_plus
|
||||
import typing as t
|
||||
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import eval_xpath_list, eval_xpath, extract_text
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://www.aol.com",
|
||||
"wikidata_id": "Q2407",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
categories = ["general"]
|
||||
search_type = "search" # supported: search, image, video
|
||||
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
results_per_page = 10
|
||||
|
||||
|
||||
base_url = "https://search.aol.com"
|
||||
time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"}
|
||||
safesearch_map = {0: "p", 1: "r", 2: "i"}
|
||||
|
||||
|
||||
def init(_):
|
||||
if search_type not in ("search", "image", "video"):
|
||||
raise ValueError(f"unsupported search type {search_type}")
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
|
||||
language, region = (params["searxng_locale"].split("-") + [None])[:2]
|
||||
if language and language != "all":
|
||||
query = f"{query} language:{language}"
|
||||
if region:
|
||||
query = f"{query} loc:{region}"
|
||||
|
||||
args: dict[str, str | int | None] = {
|
||||
"q": query,
|
||||
"b": params["pageno"] * results_per_page + 1, # page is 1-indexed
|
||||
"pz": results_per_page,
|
||||
}
|
||||
|
||||
if params["time_range"]:
|
||||
args["fr2"] = "time"
|
||||
args["age"] = params["time_range"]
|
||||
else:
|
||||
args["fr2"] = "sb-top-search"
|
||||
|
||||
params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}"
|
||||
params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}"
|
||||
logger.debug(params)
|
||||
|
||||
|
||||
def _deobfuscate_url(obfuscated_url: str) -> str | None:
|
||||
# URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-" # pylint: disable=line-too-long
|
||||
if not obfuscated_url:
|
||||
return None
|
||||
|
||||
for part in obfuscated_url.split("/"):
|
||||
if part.startswith("RU="):
|
||||
return unquote_plus(part[3:])
|
||||
# pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link
|
||||
return obfuscated_url
|
||||
|
||||
|
||||
def _general_results(doc: html.HtmlElement) -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"):
|
||||
obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href"))
|
||||
if not obfuscated_url:
|
||||
continue
|
||||
|
||||
url = _deobfuscate_url(obfuscated_url)
|
||||
if not url:
|
||||
continue
|
||||
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=url,
|
||||
title=extract_text(eval_xpath(result, ".//h3/a")) or "",
|
||||
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "",
|
||||
thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "",
|
||||
)
|
||||
)
|
||||
return res
|
||||
|
||||
|
||||
def _video_results(doc: html.HtmlElement) -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"):
|
||||
obfuscated_url = extract_text(eval_xpath(result, ".//a/@href"))
|
||||
if not obfuscated_url:
|
||||
continue
|
||||
|
||||
url = _deobfuscate_url(obfuscated_url)
|
||||
if not url:
|
||||
continue
|
||||
|
||||
published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]"))
|
||||
try:
|
||||
published_date = parser.parse(published_date_raw or "")
|
||||
except parser.ParserError:
|
||||
published_date = None
|
||||
|
||||
res.add(
|
||||
res.types.LegacyResult(
|
||||
{
|
||||
"template": "videos.html",
|
||||
"url": url,
|
||||
"title": extract_text(eval_xpath(result, ".//h3")),
|
||||
"content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")),
|
||||
"thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")),
|
||||
"length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")),
|
||||
"publishedDate": published_date,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _image_results(doc: html.HtmlElement) -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"):
|
||||
obfuscated_url = extract_text(eval_xpath(result, "./a/@href"))
|
||||
if not obfuscated_url:
|
||||
continue
|
||||
|
||||
url = _deobfuscate_url(obfuscated_url)
|
||||
if not url:
|
||||
continue
|
||||
|
||||
res.add(
|
||||
res.types.LegacyResult(
|
||||
{
|
||||
"template": "images.html",
|
||||
# results don't have an extra URL, only the image source
|
||||
"url": url,
|
||||
"title": extract_text(eval_xpath(result, ".//a/@aria-label")),
|
||||
"thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")),
|
||||
"img_src": url,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
doc = html.fromstring(resp.text)
|
||||
|
||||
match search_type:
|
||||
case "search":
|
||||
results = _general_results(doc)
|
||||
case "image":
|
||||
results = _image_results(doc)
|
||||
case "video":
|
||||
results = _video_results(doc)
|
||||
case _:
|
||||
raise ValueError("unsupported search type")
|
||||
|
||||
for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"):
|
||||
results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)}))
|
||||
|
||||
return results
|
||||
@@ -9,55 +9,60 @@ Arch Wiki blocks access to it.
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin, urlparse
|
||||
import lxml
|
||||
import babel
|
||||
|
||||
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex, searxng_useragent
|
||||
import babel
|
||||
import lxml
|
||||
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.locales import language_tag
|
||||
|
||||
from searx.utils import (
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
searxng_useragent,
|
||||
)
|
||||
|
||||
about = {
|
||||
"website": 'https://wiki.archlinux.org/',
|
||||
"wikidata_id": 'Q101445877',
|
||||
"website": "https://wiki.archlinux.org/",
|
||||
"wikidata_id": "Q101445877",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it', 'software wikis']
|
||||
categories = ["it", "software wikis"]
|
||||
paging = True
|
||||
main_wiki = 'wiki.archlinux.org'
|
||||
main_wiki = "wiki.archlinux.org"
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
sxng_lang = params['searxng_locale'].split('-')[0]
|
||||
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
|
||||
title: str = traits.custom['title'].get(sxng_lang, 'Special:Search') # type: ignore
|
||||
base_url = 'https://' + netloc + '/index.php?'
|
||||
offset = (params['pageno'] - 1) * 20
|
||||
sxng_lang = params["searxng_locale"].split("-")[0]
|
||||
netloc: str = traits.custom["wiki_netloc"].get(sxng_lang, main_wiki) # type: ignore
|
||||
title: str = traits.custom["title"].get(sxng_lang, "Special:Search") # type: ignore
|
||||
base_url = "https://" + netloc + "/index.php?"
|
||||
offset = (params["pageno"] - 1) * 20
|
||||
|
||||
if netloc == main_wiki:
|
||||
eng_lang: str = traits.get_language(sxng_lang, 'English') # type: ignore
|
||||
query += ' (' + eng_lang + ')'
|
||||
eng_lang: str = traits.get_language(sxng_lang, "English") # type: ignore
|
||||
query += " (" + eng_lang + ")"
|
||||
# wiki.archlinux.org is protected by anubis
|
||||
# - https://github.com/searxng/searxng/issues/4646#issuecomment-2817848019
|
||||
params['headers']['User-Agent'] = searxng_useragent()
|
||||
elif netloc == 'wiki.archlinuxcn.org':
|
||||
base_url = 'https://' + netloc + '/wzh/index.php?'
|
||||
params["headers"]["User-Agent"] = searxng_useragent()
|
||||
elif netloc == "wiki.archlinuxcn.org":
|
||||
base_url = "https://" + netloc + "/wzh/index.php?"
|
||||
|
||||
args = {
|
||||
'search': query,
|
||||
'title': title,
|
||||
'limit': 20,
|
||||
'offset': offset,
|
||||
'profile': 'default',
|
||||
"search": query,
|
||||
"title": title,
|
||||
"limit": 20,
|
||||
"offset": offset,
|
||||
"profile": "default",
|
||||
}
|
||||
|
||||
params['url'] = base_url + urlencode(args)
|
||||
params["url"] = base_url + urlencode(args)
|
||||
return params
|
||||
|
||||
|
||||
@@ -67,18 +72,18 @@ def response(resp):
|
||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
||||
|
||||
# get the base URL for the language in which request was made
|
||||
sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
|
||||
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
|
||||
base_url = 'https://' + netloc + '/index.php?'
|
||||
sxng_lang = resp.search_params["searxng_locale"].split("-")[0]
|
||||
netloc: str = traits.custom["wiki_netloc"].get(sxng_lang, main_wiki) # type: ignore
|
||||
base_url = "https://" + netloc + "/index.php?"
|
||||
|
||||
for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
|
||||
link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
|
||||
content = extract_text(result.xpath('.//div[@class="searchresult"]'))
|
||||
results.append(
|
||||
{
|
||||
'url': urljoin(base_url, link.get('href')), # type: ignore
|
||||
'title': extract_text(link),
|
||||
'content': content,
|
||||
"url": urljoin(base_url, link.get("href")), # type: ignore
|
||||
"title": extract_text(link),
|
||||
"content": content,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -108,39 +113,39 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
"""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
engine_traits.custom['wiki_netloc'] = {}
|
||||
engine_traits.custom['title'] = {}
|
||||
engine_traits.custom["wiki_netloc"] = {}
|
||||
engine_traits.custom["title"] = {}
|
||||
|
||||
title_map = {
|
||||
'de': 'Spezial:Suche',
|
||||
'fa': 'ویژه:جستجو',
|
||||
'ja': '特別:検索',
|
||||
'zh': 'Special:搜索',
|
||||
"de": "Spezial:Suche",
|
||||
"fa": "ویژه:جستجو",
|
||||
"ja": "特別:検索",
|
||||
"zh": "Special:搜索",
|
||||
}
|
||||
|
||||
resp = get('https://wiki.archlinux.org/', timeout=3)
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from wiki.archlinux.org is not OK.")
|
||||
resp = get("https://wiki.archlinux.org/", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Arch Linux Wiki is not OK.")
|
||||
|
||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
||||
for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
|
||||
|
||||
sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
|
||||
sxng_tag = language_tag(babel.Locale.parse(a.get("lang"), sep="-"))
|
||||
# zh_Hans --> zh
|
||||
sxng_tag = sxng_tag.split('_')[0]
|
||||
sxng_tag = sxng_tag.split("_")[0]
|
||||
|
||||
netloc = urlparse(a.get('href')).netloc
|
||||
if netloc != 'wiki.archlinux.org':
|
||||
netloc = urlparse(a.get("href")).netloc
|
||||
if netloc != "wiki.archlinux.org":
|
||||
title = title_map.get(sxng_tag)
|
||||
if not title:
|
||||
print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
|
||||
continue
|
||||
engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
|
||||
engine_traits.custom['title'][sxng_tag] = title # type: ignore
|
||||
engine_traits.custom["wiki_netloc"][sxng_tag] = netloc
|
||||
engine_traits.custom["title"][sxng_tag] = title # type: ignore
|
||||
|
||||
eng_tag = extract_text(eval_xpath_list(a, ".//span"))
|
||||
engine_traits.languages[sxng_tag] = eng_tag # type: ignore
|
||||
|
||||
engine_traits.languages['en'] = 'English'
|
||||
engine_traits.languages["en"] = "English"
|
||||
|
||||
105
searx/engines/artstation.py
Normal file
105
searx/engines/artstation.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Artstation (images)"""
|
||||
|
||||
import re
|
||||
import typing as t
|
||||
from json import dumps
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.network import post
|
||||
from searx.enginelib import EngineCache
|
||||
|
||||
# Engine metadata
|
||||
about = {
|
||||
"website": 'https://www.artstation.com/',
|
||||
"wikidata_id": 'Q65551500',
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
# Engine configuration
|
||||
paging = True
|
||||
categories = ['images']
|
||||
results_per_page = 20
|
||||
|
||||
# Search URL
|
||||
base_url = "https://www.artstation.com/api/v2/search/projects.json"
|
||||
|
||||
# Cache keys & expiration
|
||||
CSRF_PUBLICKEY_CACHE = "public_csrf_token"
|
||||
CSRF_PRIVATEKEY_CACHE = "private_csrf_token"
|
||||
KEY_EXPIRATION_SECONDS = 3600
|
||||
|
||||
CACHE: EngineCache
|
||||
|
||||
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||
global CACHE # pylint: disable=global-statement
|
||||
CACHE = EngineCache(engine_settings["name"])
|
||||
return True
|
||||
|
||||
|
||||
def fetch_csrf_tokens():
|
||||
|
||||
public_token: str | None = CACHE.get(CSRF_PUBLICKEY_CACHE)
|
||||
private_token: str | None = CACHE.get(CSRF_PRIVATEKEY_CACHE)
|
||||
|
||||
if public_token and private_token:
|
||||
return public_token, private_token
|
||||
|
||||
resp = post("https://www.artstation.com/api/v2/csrf_protection/token.json")
|
||||
public_token = resp.json()["public_csrf_token"]
|
||||
private_token = resp.cookies["PRIVATE-CSRF-TOKEN"]
|
||||
|
||||
CACHE.set(key=CSRF_PUBLICKEY_CACHE, value=public_token, expire=KEY_EXPIRATION_SECONDS)
|
||||
CACHE.set(key=CSRF_PRIVATEKEY_CACHE, value=private_token, expire=KEY_EXPIRATION_SECONDS)
|
||||
|
||||
return public_token, private_token
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
public_token, private_token = fetch_csrf_tokens()
|
||||
|
||||
form_data = {
|
||||
"query": query,
|
||||
"page": params["pageno"],
|
||||
"per_page": results_per_page,
|
||||
"sorting": "relevance",
|
||||
"pro_first": 1,
|
||||
}
|
||||
|
||||
params["url"] = base_url
|
||||
params["method"] = 'POST'
|
||||
params['headers']['content-type'] = "application/json"
|
||||
params['headers']['PUBLIC-CSRF-TOKEN'] = public_token
|
||||
params["cookies"] = {"PRIVATE-CSRF-TOKEN": private_token}
|
||||
params['data'] = dumps(form_data)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp) -> EngineResults:
|
||||
results = EngineResults()
|
||||
search_res = resp.json()
|
||||
|
||||
for item in search_res["data"]:
|
||||
thumb = item["smaller_square_cover_url"]
|
||||
fullsize_image = re.sub(r'/\d{6,}/', '/', thumb).replace("smaller_square", "large")
|
||||
|
||||
results.add(
|
||||
results.types.LegacyResult(
|
||||
{
|
||||
"template": 'images.html',
|
||||
"title": item["title"],
|
||||
"url": item["url"],
|
||||
"author": f"{item['user']['username']} ({item['user']['full_name']})",
|
||||
"img_src": fullsize_image,
|
||||
"thumbnail_src": thumb,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -51,6 +51,7 @@ def request(query, params):
|
||||
}
|
||||
|
||||
params["url"] = f"{base_url}?{urlencode(query_params)}"
|
||||
params["headers"]["Referer"] = "https://www.bilibili.com"
|
||||
params["cookies"] = cookie
|
||||
|
||||
return params
|
||||
|
||||
@@ -1,197 +1,175 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""This is the implementation of the Bing-WEB engine. Some of this
|
||||
"""This is the implementation of the Bing-Web engine. Some of this
|
||||
implementations are shared by other engines:
|
||||
|
||||
- :ref:`bing images engine`
|
||||
- :ref:`bing news engine`
|
||||
- :ref:`bing videos engine`
|
||||
|
||||
On the `preference page`_ Bing offers a lot of languages an regions (see section
|
||||
LANGUAGE and COUNTRY/REGION). The Language is the language of the UI, we need
|
||||
in SearXNG to get the translations of data such as *"published last week"*.
|
||||
|
||||
There is a description of the official search-APIs_, unfortunately this is not
|
||||
the API we can use or that bing itself would use. You can look up some things
|
||||
in the API to get a better picture of bing, but the value specifications like
|
||||
the market codes are usually outdated or at least no longer used by bing itself.
|
||||
|
||||
The market codes have been harmonized and are identical for web, video and
|
||||
images. The news area has also been harmonized with the other categories. Only
|
||||
political adjustments still seem to be made -- for example, there is no news
|
||||
category for the Chinese market.
|
||||
|
||||
.. _preference page: https://www.bing.com/account/general
|
||||
.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
|
||||
.. note::
|
||||
|
||||
Some functionality (paging and time-range results) are not supported
|
||||
since they depend on JavaScript.
|
||||
"""
|
||||
# pylint: disable=too-many-branches, invalid-name
|
||||
|
||||
import base64
|
||||
import re
|
||||
import time
|
||||
import typing as t
|
||||
from urllib.parse import parse_qs, urlencode, urlparse
|
||||
from lxml import html
|
||||
|
||||
import babel
|
||||
import babel.languages
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
|
||||
from searx.locales import language_tag, region_tag
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.locales import region_tag
|
||||
from searx.utils import eval_xpath, eval_xpath_getindex, eval_xpath_list, extract_text
|
||||
|
||||
about = {
|
||||
"website": 'https://www.bing.com',
|
||||
"wikidata_id": 'Q182496',
|
||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about: dict[str, t.Any] = {
|
||||
"website": "https://www.bing.com",
|
||||
"wikidata_id": "Q182496",
|
||||
"official_api_documentation": "https://github.com/MicrosoftDocs/bing-docs",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'web']
|
||||
paging = True
|
||||
max_page = 200
|
||||
"""200 pages maximum (``&first=1991``)"""
|
||||
|
||||
time_range_support = True
|
||||
categories = ["general", "web"]
|
||||
safesearch = True
|
||||
"""Bing results are always SFW. To get NSFW links from bing some age
|
||||
verification by a cookie is needed / thats not possible in SearXNG.
|
||||
"""
|
||||
_safesearch_map: dict[int, str] = {
|
||||
0: "off",
|
||||
1: "moderate",
|
||||
2: "strict",
|
||||
}
|
||||
"""Filter results. 0: None, 1: Moderate, 2: Strict"""
|
||||
|
||||
base_url = 'https://www.bing.com/search'
|
||||
"""Bing (Web) search URL"""
|
||||
base_url = "https://www.bing.com/search"
|
||||
"""Bing-Web search URL"""
|
||||
|
||||
|
||||
def _page_offset(pageno):
|
||||
return (int(pageno) - 1) * 10 + 1
|
||||
def get_locale_params(engine_region: str | None) -> dict[str, str] | None:
|
||||
"""API documentation states the ``mkt`` parameter is *the
|
||||
recommended primary signal* for locale:
|
||||
|
||||
If known, you are encouraged to always specify the market.
|
||||
Specifying the market helps Bing route the request and return an
|
||||
appropriate and optimal response.
|
||||
|
||||
The ``mkt`` parameter takes a full ``<language>-<country>`` code.
|
||||
|
||||
This function is shared with :py:mod:`searx.engines.bing_images`,
|
||||
:py:mod:`searx.engines.bing_news`, and :py:mod:`searx.engines.bing_videos`.
|
||||
"""
|
||||
|
||||
if not engine_region or engine_region == "clear":
|
||||
return None
|
||||
|
||||
return {"mkt": engine_region}
|
||||
|
||||
|
||||
def set_bing_cookies(params, engine_language, engine_region):
|
||||
params['cookies']['_EDGE_CD'] = f'm={engine_region}&u={engine_language}'
|
||||
params['cookies']['_EDGE_S'] = f'mkt={engine_region}&ui={engine_language}'
|
||||
logger.debug("bing cookies: %s", params['cookies'])
|
||||
def override_accept_language(params: "OnlineParams", engine_region: str | None) -> None:
|
||||
"""Override the ``Accept-Language`` header.
|
||||
|
||||
The default header built by :py:class:`~searx.search.processors.online.OnlineProcessor`
|
||||
appends ``en;q=0.3`` as a fallback language::
|
||||
|
||||
Accept-Language: de,de-DE;q=0.7,en;q=0.3
|
||||
|
||||
Bing seems to better select the results locale based on the
|
||||
``Accept-Language`` value header.
|
||||
|
||||
This function is shared with :py:mod:`searx.engines.bing_images`,
|
||||
:py:mod:`searx.engines.bing_news`, and :py:mod:`searx.engines.bing_videos`.
|
||||
"""
|
||||
|
||||
if not engine_region or engine_region == "clear":
|
||||
return
|
||||
|
||||
lang = engine_region.split("-")[0]
|
||||
params["headers"]["Accept-Language"] = f"{engine_region},{lang};q=0.9"
|
||||
|
||||
|
||||
def request(query, params):
|
||||
def request(query: str, params: "OnlineParams") -> "OnlineParams":
|
||||
"""Assemble a Bing-Web request."""
|
||||
|
||||
engine_region = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
engine_language = traits.get_language(params['searxng_locale'], 'en') # type: ignore
|
||||
set_bing_cookies(params, engine_language, engine_region)
|
||||
engine_region = traits.get_region(params["searxng_locale"], traits.all_locale)
|
||||
|
||||
page = params.get('pageno', 1)
|
||||
query_params = {
|
||||
'q': query,
|
||||
# if arg 'pq' is missed, sometimes on page 4 we get results from page 1,
|
||||
# don't ask why it is only sometimes / its M$ and they have never been
|
||||
# deterministic ;)
|
||||
'pq': query,
|
||||
override_accept_language(params, engine_region)
|
||||
|
||||
query_params: dict[str, str | int] = {
|
||||
"q": query,
|
||||
"adlt": _safesearch_map.get(params.get("safesearch", 0), "off"),
|
||||
}
|
||||
|
||||
# To get correct page, arg first and this arg FORM is needed, the value PERE
|
||||
# is on page 2, on page 3 its PERE1 and on page 4 its PERE2 .. and so forth.
|
||||
# The 'first' arg should never send on page 1.
|
||||
locale_params = get_locale_params(engine_region)
|
||||
if locale_params:
|
||||
query_params.update(locale_params)
|
||||
|
||||
if page > 1:
|
||||
query_params['first'] = _page_offset(page) # see also arg FORM
|
||||
if page == 2:
|
||||
query_params['FORM'] = 'PERE'
|
||||
elif page > 2:
|
||||
query_params['FORM'] = 'PERE%s' % (page - 2)
|
||||
|
||||
params['url'] = f'{base_url}?{urlencode(query_params)}'
|
||||
|
||||
if params.get('time_range'):
|
||||
unix_day = int(time.time() / 86400)
|
||||
time_ranges = {'day': '1', 'week': '2', 'month': '3', 'year': f'5_{unix_day-365}_{unix_day}'}
|
||||
params['url'] += f'&filters=ex1:"ez{time_ranges[params["time_range"]]}"'
|
||||
params["url"] = f"{base_url}?{urlencode(query_params)}"
|
||||
|
||||
# in some regions where geoblocking is employed (e.g. China),
|
||||
# www.bing.com redirects to the regional version of Bing
|
||||
params['allow_redirects'] = True
|
||||
params["allow_redirects"] = True
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
# pylint: disable=too-many-locals
|
||||
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
|
||||
"""Get response from Bing-Web"""
|
||||
|
||||
results = []
|
||||
result_len = 0
|
||||
results: list[dict[str, t.Any]] = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results again if nothing is found yet
|
||||
|
||||
for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
|
||||
|
||||
link = eval_xpath_getindex(result, './/h2/a', 0, None)
|
||||
for item in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
|
||||
link = eval_xpath_getindex(item, ".//h2/a", 0, None)
|
||||
if link is None:
|
||||
continue
|
||||
url = link.attrib.get('href')
|
||||
|
||||
href = link.attrib.get("href", "")
|
||||
title = extract_text(link)
|
||||
|
||||
content = eval_xpath(result, './/p')
|
||||
for p in content:
|
||||
# Make sure that the element is free of:
|
||||
# <span class="algoSlug_icon" # data-priority="2">Web</span>
|
||||
for e in p.xpath('.//span[@class="algoSlug_icon"]'):
|
||||
e.getparent().remove(e)
|
||||
content = extract_text(content)
|
||||
if not href or not title:
|
||||
continue
|
||||
|
||||
# get the real URL
|
||||
if url.startswith('https://www.bing.com/ck/a?'):
|
||||
# get the first value of u parameter
|
||||
url_query = urlparse(url).query
|
||||
parsed_url_query = parse_qs(url_query)
|
||||
param_u = parsed_url_query["u"][0]
|
||||
# remove "a1" in front
|
||||
encoded_url = param_u[2:]
|
||||
# add padding
|
||||
encoded_url = encoded_url + '=' * (-len(encoded_url) % 4)
|
||||
# decode base64 encoded URL
|
||||
url = base64.urlsafe_b64decode(encoded_url).decode()
|
||||
# what about cn.bing.com, ..?
|
||||
if href.startswith("https://www.bing.com/ck/a?"):
|
||||
qs = parse_qs(urlparse(href).query)
|
||||
u_values = qs.get("u")
|
||||
if u_values:
|
||||
u_val = u_values[0]
|
||||
if u_val.startswith("a1"):
|
||||
encoded = u_val[2:]
|
||||
# base64url without padding
|
||||
encoded += "=" * (-len(encoded) % 4)
|
||||
href = base64.urlsafe_b64decode(encoded).decode("utf-8", errors="replace")
|
||||
|
||||
# append result
|
||||
results.append({'url': url, 'title': title, 'content': content})
|
||||
# remove decorative icons that Bing injects into <p> elements
|
||||
# (`<span class="algoSlug_icon">`)
|
||||
content_els = eval_xpath(item, ".//p")
|
||||
for p in content_els:
|
||||
for icon in p.xpath('.//span[@class="algoSlug_icon"]'):
|
||||
icon.getparent().remove(icon)
|
||||
content = extract_text(content_els)
|
||||
|
||||
results.append({"url": href, "title": title, "content": content})
|
||||
|
||||
# get number_of_results
|
||||
if results:
|
||||
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
||||
if "-" in result_len_container:
|
||||
start_str, result_len_container = re.split(r'-\d+', result_len_container)
|
||||
start = int(start_str)
|
||||
else:
|
||||
start = 1
|
||||
result_len_container = re.sub(r"[^0-9]", "", result_len_container)
|
||||
if result_len_container:
|
||||
results.append({"number_of_results": int(result_len_container)})
|
||||
|
||||
result_len_container = re.sub('[^0-9]', '', result_len_container)
|
||||
if len(result_len_container) > 0:
|
||||
result_len = int(result_len_container)
|
||||
|
||||
expected_start = _page_offset(resp.search_params.get("pageno", 1))
|
||||
|
||||
if expected_start != start:
|
||||
if expected_start > result_len:
|
||||
# Avoid reading more results than available.
|
||||
# For example, if there is 100 results from some search and we try to get results from 120 to 130,
|
||||
# Bing will send back the results from 0 to 10 and no error.
|
||||
# If we compare results count with the first parameter of the request we can avoid this "invalid"
|
||||
# results.
|
||||
return []
|
||||
|
||||
# Sometimes Bing will send back the first result page instead of the requested page as a rate limiting
|
||||
# measure.
|
||||
msg = f"Expected results to start at {expected_start}, but got results starting at {start}"
|
||||
raise SearxEngineAPIException(msg)
|
||||
|
||||
results.append({'number_of_results': result_len})
|
||||
return results
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
"""Fetch languages and regions from Bing-Web."""
|
||||
def fetch_traits(engine_traits: EngineTraits) -> None:
|
||||
"""Fetch regions from Bing-Web."""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
@@ -208,72 +186,38 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
"Cache-Control": "max-age=0",
|
||||
}
|
||||
|
||||
resp = get("https://www.bing.com/account/general", headers=headers)
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from bing is not OK.")
|
||||
resp = get("https://www.bing.com/account/general", headers=headers, timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Bing is not OK.")
|
||||
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# languages
|
||||
|
||||
engine_traits.languages['zh'] = 'zh-hans'
|
||||
|
||||
map_lang = {'prs': 'fa-AF', 'en': 'en-us'}
|
||||
bing_ui_lang_map = {
|
||||
# HINT: this list probably needs to be supplemented
|
||||
'en': 'us', # en --> en-us
|
||||
'da': 'dk', # da --> da-dk
|
||||
map_market_codes: dict[str, str] = {
|
||||
"zh-hk": "en-hk", # not sure why, but at Microslop this is the market code for Hongkong
|
||||
}
|
||||
|
||||
for href in eval_xpath(dom, '//div[@id="language-section-content"]//div[@class="languageItem"]/a/@href'):
|
||||
eng_lang = parse_qs(urlparse(href).query)['setlang'][0]
|
||||
babel_lang = map_lang.get(eng_lang, eng_lang)
|
||||
try:
|
||||
sxng_tag = language_tag(babel.Locale.parse(babel_lang.replace('-', '_')))
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: language (%s) is unknown by babel" % (babel_lang))
|
||||
continue
|
||||
# Language (e.g. 'en' or 'de') from https://www.bing.com/account/general
|
||||
# is converted by bing to 'en-us' or 'de-de'. But only if there is not
|
||||
# already a '-' delemitter in the language. For instance 'pt-PT' -->
|
||||
# 'pt-pt' and 'pt-br' --> 'pt-br'
|
||||
bing_ui_lang = eng_lang.lower()
|
||||
if '-' not in bing_ui_lang:
|
||||
bing_ui_lang = bing_ui_lang + '-' + bing_ui_lang_map.get(bing_ui_lang, bing_ui_lang)
|
||||
|
||||
conflict = engine_traits.languages.get(sxng_tag)
|
||||
if conflict:
|
||||
if conflict != bing_ui_lang:
|
||||
print(f"CONFLICT: babel {sxng_tag} --> {conflict}, {bing_ui_lang}")
|
||||
continue
|
||||
engine_traits.languages[sxng_tag] = bing_ui_lang
|
||||
|
||||
# regions (aka "market codes")
|
||||
|
||||
engine_traits.regions['zh-CN'] = 'zh-cn'
|
||||
|
||||
map_market_codes = {
|
||||
'zh-hk': 'en-hk', # not sure why, but at M$ this is the market code for Hongkong
|
||||
}
|
||||
for href in eval_xpath(dom, '//div[@id="region-section-content"]//div[@class="regionItem"]/a/@href'):
|
||||
cc_tag = parse_qs(urlparse(href).query)['cc'][0]
|
||||
if cc_tag == 'clear':
|
||||
cc_tag = parse_qs(urlparse(href).query)["cc"][0]
|
||||
if cc_tag == "clear":
|
||||
engine_traits.all_locale = cc_tag
|
||||
continue
|
||||
|
||||
# add market codes from official languages of the country ..
|
||||
for lang_tag in babel.languages.get_official_languages(cc_tag, de_facto=True):
|
||||
if lang_tag not in engine_traits.languages.keys():
|
||||
# print("ignore lang: %s <-- %s" % (cc_tag, lang_tag))
|
||||
continue
|
||||
lang_tag = lang_tag.split('_')[0] # zh_Hant --> zh
|
||||
lang_tag = lang_tag.split("_")[0] # zh_Hant --> zh
|
||||
market_code = f"{lang_tag}-{cc_tag}" # zh-tw
|
||||
|
||||
market_code = map_market_codes.get(market_code, market_code)
|
||||
sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, cc_tag.upper())))
|
||||
|
||||
try:
|
||||
sxng_tag = region_tag(babel.Locale.parse("%s_%s" % (lang_tag, cc_tag.upper())))
|
||||
except babel.UnknownLocaleError:
|
||||
# silently ignore unknown languages
|
||||
continue
|
||||
|
||||
conflict = engine_traits.regions.get(sxng_tag)
|
||||
if conflict:
|
||||
if conflict != market_code:
|
||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, market_code))
|
||||
continue
|
||||
continue
|
||||
|
||||
engine_traits.regions[sxng_tag] = market_code
|
||||
|
||||
@@ -1,96 +1,101 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Bing-Images: description see :py:obj:`searx.engines.bing`."""
|
||||
# pylint: disable=invalid-name
|
||||
|
||||
import json
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from lxml import html
|
||||
|
||||
from searx.engines.bing import set_bing_cookies
|
||||
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
|
||||
from searx.engines.bing import ( # pylint: disable=unused-import
|
||||
fetch_traits,
|
||||
get_locale_params,
|
||||
override_accept_language,
|
||||
)
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://www.bing.com/images',
|
||||
"wikidata_id": 'Q182496',
|
||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
|
||||
"website": "https://www.bing.com/images",
|
||||
"wikidata_id": "Q182496",
|
||||
"official_api_documentation": "https://github.com/MicrosoftDocs/bing-docs",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images', 'web']
|
||||
categories = ["images", "web"]
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
|
||||
base_url = 'https://www.bing.com/images/async'
|
||||
"""Bing (Images) search URL"""
|
||||
|
||||
time_map = {
|
||||
'day': 60 * 24,
|
||||
'week': 60 * 24 * 7,
|
||||
'month': 60 * 24 * 31,
|
||||
'year': 60 * 24 * 365,
|
||||
"day": 60 * 24,
|
||||
"week": 60 * 24 * 7,
|
||||
"month": 60 * 24 * 31,
|
||||
"year": 60 * 24 * 365,
|
||||
}
|
||||
|
||||
base_url = "https://www.bing.com/images/async"
|
||||
"""Bing-Image search URL"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
"""Assemble a Bing-Image request."""
|
||||
|
||||
engine_region = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
engine_language = traits.get_language(params['searxng_locale'], 'en') # type: ignore
|
||||
set_bing_cookies(params, engine_language, engine_region)
|
||||
engine_region = traits.get_region(params["searxng_locale"], traits.all_locale)
|
||||
|
||||
override_accept_language(params, engine_region)
|
||||
|
||||
# build URL query
|
||||
# - example: https://www.bing.com/images/async?q=foo&async=content&first=1&count=35
|
||||
# - example: https://www.bing.com/images/async?q=foo&async=1&first=1&count=35
|
||||
query_params = {
|
||||
'q': query,
|
||||
'async': '1',
|
||||
"q": query,
|
||||
"async": "1",
|
||||
# to simplify the page count lets use the default of 35 images per page
|
||||
'first': (int(params.get('pageno', 1)) - 1) * 35 + 1,
|
||||
'count': 35,
|
||||
"first": (int(params.get("pageno", 1)) - 1) * 35 + 1,
|
||||
"count": 35,
|
||||
}
|
||||
|
||||
locale_params = get_locale_params(engine_region)
|
||||
if locale_params:
|
||||
query_params.update(locale_params)
|
||||
|
||||
# time range
|
||||
# - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
|
||||
# - example: one year (525600 minutes) 'qft=filterui:age-lt525600'
|
||||
if params["time_range"]:
|
||||
query_params["qft"] = "filterui:age-lt%s" % time_map[params["time_range"]]
|
||||
|
||||
if params['time_range']:
|
||||
query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
|
||||
|
||||
params['url'] = base_url + '?' + urlencode(query_params)
|
||||
params["url"] = base_url + "?" + urlencode(query_params)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
"""Get response from Bing-Images"""
|
||||
"""Get response from Bing-Image"""
|
||||
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
|
||||
|
||||
metadata = result.xpath('.//a[@class="iusc"]/@m')
|
||||
if not metadata:
|
||||
continue
|
||||
|
||||
metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
|
||||
title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
|
||||
img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip().split(" · ")
|
||||
source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
|
||||
title = " ".join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
|
||||
img_format = " ".join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip().split(" · ")
|
||||
source = " ".join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
|
||||
results.append(
|
||||
{
|
||||
'template': 'images.html',
|
||||
'url': metadata['purl'],
|
||||
'thumbnail_src': metadata['turl'],
|
||||
'img_src': metadata['murl'],
|
||||
'content': metadata.get('desc'),
|
||||
'title': title,
|
||||
'source': source,
|
||||
'resolution': img_format[0],
|
||||
'img_format': img_format[1] if len(img_format) >= 2 else None,
|
||||
"template": "images.html",
|
||||
"url": metadata["purl"],
|
||||
"thumbnail_src": metadata["turl"],
|
||||
"img_src": metadata["murl"],
|
||||
"content": metadata.get("desc"),
|
||||
"title": title,
|
||||
"source": source,
|
||||
"resolution": img_format[0],
|
||||
"img_format": img_format[1] if len(img_format) >= 2 else None,
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
@@ -7,92 +7,90 @@
|
||||
|
||||
"""
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.engines.bing import set_bing_cookies
|
||||
from searx.engines.bing import (
|
||||
get_locale_params,
|
||||
override_accept_language,
|
||||
)
|
||||
from searx.utils import eval_xpath, eval_xpath_getindex, eval_xpath_list, extract_text
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://www.bing.com/news',
|
||||
"wikidata_id": 'Q2878637',
|
||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
|
||||
"website": "https://www.bing.com/news",
|
||||
"wikidata_id": "Q2878637",
|
||||
"official_api_documentation": "https://github.com/MicrosoftDocs/bing-docs",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'RSS',
|
||||
"results": "RSS",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['news']
|
||||
categories = ["news"]
|
||||
paging = True
|
||||
"""If go through the pages and there are actually no new results for another
|
||||
page, then bing returns the results from the last page again."""
|
||||
|
||||
time_range_support = True
|
||||
time_map = {
|
||||
'day': 'interval="4"',
|
||||
'week': 'interval="7"',
|
||||
'month': 'interval="9"',
|
||||
"day": 'interval="4"',
|
||||
"week": 'interval="7"',
|
||||
"month": 'interval="9"',
|
||||
}
|
||||
"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the
|
||||
difference of *last day* and *last week* in the result list is just marginally.
|
||||
Bing does not have news range ``year`` / we use ``month`` instead."""
|
||||
|
||||
base_url = 'https://www.bing.com/news/infinitescrollajax'
|
||||
base_url = "https://www.bing.com/news/infinitescrollajax"
|
||||
"""Bing (News) search URL"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
"""Assemble a Bing-News request."""
|
||||
|
||||
engine_region = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
engine_language = traits.get_language(params['searxng_locale'], 'en') # type: ignore
|
||||
set_bing_cookies(params, engine_language, engine_region)
|
||||
engine_region = traits.get_region(params["searxng_locale"], traits.all_locale)
|
||||
|
||||
override_accept_language(params, engine_region)
|
||||
|
||||
# build URL query
|
||||
#
|
||||
# example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
|
||||
|
||||
page = int(params.get('pageno', 1)) - 1
|
||||
# - example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
|
||||
page = int(params.get("pageno", 1)) - 1
|
||||
query_params = {
|
||||
'q': query,
|
||||
'InfiniteScroll': 1,
|
||||
"q": query,
|
||||
"InfiniteScroll": 1,
|
||||
# to simplify the page count lets use the default of 10 images per page
|
||||
'first': page * 10 + 1,
|
||||
'SFX': page,
|
||||
'form': 'PTFTNR',
|
||||
'setlang': engine_region.split('-')[0],
|
||||
'cc': engine_region.split('-')[-1],
|
||||
"first": page * 10 + 1,
|
||||
"SFX": page,
|
||||
"form": "PTFTNR",
|
||||
}
|
||||
|
||||
if params['time_range']:
|
||||
query_params['qft'] = time_map.get(params['time_range'], 'interval="9"')
|
||||
locale_params = get_locale_params(engine_region)
|
||||
if locale_params:
|
||||
query_params.update(locale_params)
|
||||
|
||||
params['url'] = base_url + '?' + urlencode(query_params)
|
||||
if params["time_range"]:
|
||||
query_params["qft"] = time_map.get(params["time_range"], 'interval="9"')
|
||||
|
||||
params["url"] = base_url + "?" + urlencode(query_params)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
"""Get response from Bing-Video"""
|
||||
results = []
|
||||
"""Parse the Bing-News response."""
|
||||
|
||||
if not resp.ok or not resp.text:
|
||||
return results
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for newsitem in eval_xpath_list(dom, '//div[contains(@class, "newsitem")]'):
|
||||
|
||||
link = eval_xpath_getindex(newsitem, './/a[@class="title"]', 0, None)
|
||||
if link is None:
|
||||
continue
|
||||
url = link.attrib.get('href')
|
||||
url = link.attrib.get("href")
|
||||
title = extract_text(link)
|
||||
content = extract_text(eval_xpath(newsitem, './/div[@class="snippet"]'))
|
||||
|
||||
@@ -100,31 +98,31 @@ def response(resp):
|
||||
source = eval_xpath_getindex(newsitem, './/div[contains(@class, "source")]', 0, None)
|
||||
if source is not None:
|
||||
for item in (
|
||||
eval_xpath_getindex(source, './/span[@aria-label]/@aria-label', 0, None),
|
||||
eval_xpath_getindex(source, ".//span[@aria-label]/@aria-label", 0, None),
|
||||
# eval_xpath_getindex(source, './/a', 0, None),
|
||||
# eval_xpath_getindex(source, './div/span', 3, None),
|
||||
link.attrib.get('data-author'),
|
||||
link.attrib.get("data-author"),
|
||||
):
|
||||
if item is not None:
|
||||
t = extract_text(item)
|
||||
if t and t.strip():
|
||||
metadata.append(t.strip())
|
||||
metadata = ' | '.join(metadata)
|
||||
metadata = " | ".join(metadata)
|
||||
|
||||
thumbnail = None
|
||||
imagelink = eval_xpath_getindex(newsitem, './/a[@class="imagelink"]//img', 0, None)
|
||||
if imagelink is not None:
|
||||
thumbnail = imagelink.attrib.get('src')
|
||||
thumbnail = imagelink.attrib.get("src")
|
||||
if not thumbnail.startswith("https://www.bing.com"):
|
||||
thumbnail = 'https://www.bing.com/' + thumbnail
|
||||
thumbnail = "https://www.bing.com/" + thumbnail
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'thumbnail': thumbnail,
|
||||
'metadata': metadata,
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"thumbnail": thumbnail,
|
||||
"metadata": metadata,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -148,4 +146,4 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
# bot.
|
||||
|
||||
# HINT: 'en-hk' is the region code it does not indicate the language en!!
|
||||
engine_traits.regions['zh-CN'] = 'en-hk'
|
||||
engine_traits.regions["zh-CN"] = "en-hk"
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=invalid-name
|
||||
"""Bing-Videos: description see :py:obj:`searx.engines.bing`."""
|
||||
|
||||
import json
|
||||
@@ -7,81 +6,90 @@ from urllib.parse import urlencode
|
||||
|
||||
from lxml import html
|
||||
|
||||
from searx.engines.bing import set_bing_cookies
|
||||
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
|
||||
from searx.engines.bing import ( # pylint: disable=unused-import
|
||||
fetch_traits,
|
||||
get_locale_params,
|
||||
override_accept_language,
|
||||
)
|
||||
from searx.engines.bing_images import time_map
|
||||
|
||||
from searx.utils import eval_xpath, eval_xpath_getindex
|
||||
|
||||
about = {
|
||||
"website": 'https://www.bing.com/videos',
|
||||
"wikidata_id": 'Q4914152',
|
||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
|
||||
"website": "https://www.bing.com/videos",
|
||||
"wikidata_id": "Q4914152",
|
||||
"official_api_documentation": "https://github.com/MicrosoftDocs/bing-docs",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'web']
|
||||
categories = ["videos", "web"]
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
|
||||
base_url = 'https://www.bing.com/videos/asyncv2'
|
||||
"""Bing (Videos) async search URL."""
|
||||
base_url = "https://www.bing.com/videos/asyncv2"
|
||||
"""Bing-Video search URL"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
"""Assemble a Bing-Video request."""
|
||||
|
||||
engine_region = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
engine_language = traits.get_language(params['searxng_locale'], 'en') # type: ignore
|
||||
set_bing_cookies(params, engine_language, engine_region)
|
||||
engine_region = traits.get_region(params["searxng_locale"], traits.all_locale)
|
||||
|
||||
override_accept_language(params, engine_region)
|
||||
|
||||
# build URL query
|
||||
#
|
||||
# example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
|
||||
|
||||
# - example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
|
||||
query_params = {
|
||||
'q': query,
|
||||
'async': 'content',
|
||||
# to simplify the page count lets use the default of 35 images per page
|
||||
'first': (int(params.get('pageno', 1)) - 1) * 35 + 1,
|
||||
'count': 35,
|
||||
"q": query,
|
||||
"async": "content",
|
||||
# to simplify the page count lets use the default of 35 videos per page
|
||||
"first": (int(params.get("pageno", 1)) - 1) * 35 + 1,
|
||||
"count": 35,
|
||||
}
|
||||
|
||||
locale_params = get_locale_params(engine_region)
|
||||
if locale_params:
|
||||
query_params.update(locale_params)
|
||||
|
||||
# time range
|
||||
#
|
||||
# example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR'
|
||||
# - example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR'
|
||||
if params["time_range"]:
|
||||
query_params["form"] = "VRFLTR"
|
||||
query_params["qft"] = " filterui:videoage-lt%s" % time_map[params["time_range"]]
|
||||
|
||||
if params['time_range']:
|
||||
query_params['form'] = 'VRFLTR'
|
||||
query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
|
||||
|
||||
params['url'] = base_url + '?' + urlencode(query_params)
|
||||
params["url"] = base_url + "?" + urlencode(query_params)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
"""Get response from Bing-Video"""
|
||||
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
|
||||
metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
|
||||
info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
|
||||
content = '{0} - {1}'.format(metadata['du'], info)
|
||||
thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
|
||||
for result in dom.xpath('//div[contains(@id, "mc_vtvc_video")]'):
|
||||
metadata = json.loads(eval_xpath_getindex(result, './/div[@class="vrhdata"]/@vrhm', index=0))
|
||||
info = " - ".join(eval_xpath(result, './/div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
|
||||
thumbnail = eval_xpath_getindex(
|
||||
result,
|
||||
'.//img[starts-with(@class, "rms")]/@data-src-hq',
|
||||
index=0,
|
||||
default=None,
|
||||
)
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': metadata['murl'],
|
||||
'thumbnail': thumbnail,
|
||||
'title': metadata.get('vt', ''),
|
||||
'content': content,
|
||||
'template': 'videos.html',
|
||||
"url": metadata["murl"],
|
||||
"thumbnail": thumbnail,
|
||||
"title": metadata.get("vt", ""),
|
||||
"content": info,
|
||||
"length": metadata["du"],
|
||||
"template": "videos.html",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
145
searx/engines/boardreader.py
Normal file
145
searx/engines/boardreader.py
Normal file
@@ -0,0 +1,145 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Boardreader (forum search)"""
|
||||
|
||||
import re
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
import typing as t
|
||||
import gettext
|
||||
import babel
|
||||
|
||||
from searx.locales import language_tag
|
||||
from searx.enginelib import EngineCache
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.engines.json_engine import safe_search_map
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.network import get, raise_for_httperror
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import extr, js_obj_str_to_python, html_to_text
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
|
||||
about = {
|
||||
"website": "https://boardreader.com",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = ["general", "social media"]
|
||||
paging = True
|
||||
time_range_support = True
|
||||
|
||||
base_url = "https://boardreader.com"
|
||||
time_range_map = {"day": "1", "week": "7", "month": "30", "year": "365"}
|
||||
|
||||
CACHE: EngineCache
|
||||
CACHE_SESSION_ID_KEY = "session_id_key"
|
||||
|
||||
KEYWORD_RE = re.compile(r"\[\/?Keyword\]")
|
||||
|
||||
|
||||
def init(engine_settings: dict[str, t.Any]) -> bool:
|
||||
global CACHE # pylint: disable=global-statement
|
||||
CACHE = EngineCache(engine_name=engine_settings["name"])
|
||||
return True
|
||||
|
||||
|
||||
def _get_session_id() -> str:
|
||||
cached: str | None = CACHE.get(CACHE_SESSION_ID_KEY)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
resp = get(base_url)
|
||||
if resp.status_code != 200:
|
||||
raise_for_httperror(resp)
|
||||
|
||||
session_id = extr(resp.text, "'currentSessionId', '", "'")
|
||||
if not session_id:
|
||||
raise SearxEngineAPIException("failed to obtain session id")
|
||||
|
||||
CACHE.set(CACHE_SESSION_ID_KEY, session_id)
|
||||
return session_id
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams"):
|
||||
session_id = _get_session_id()
|
||||
|
||||
language: str = traits.get_language(
|
||||
params["searxng_locale"], default="All"
|
||||
) # pyright: ignore[reportAssignmentType]
|
||||
args = {
|
||||
"query": query,
|
||||
"page": params["pageno"],
|
||||
"language": language,
|
||||
"session_id": session_id,
|
||||
}
|
||||
if params["time_range"]:
|
||||
args["period"] = safe_search_map[params["time_range"]] # pyright: ignore[reportArgumentType]
|
||||
|
||||
params["url"] = f"{base_url}/return.php?{urlencode(args)}"
|
||||
return params
|
||||
|
||||
|
||||
def _remove_keyword_marker(text: str) -> str:
|
||||
"""
|
||||
Convert text like "[Keyword]ABCDE[/Keyword]" to "ABCDE".
|
||||
"""
|
||||
return html_to_text(KEYWORD_RE.sub("", text))
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
result: dict[str, str]
|
||||
for result in resp.json()["SearchResults"]:
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
title=_remove_keyword_marker(result["Subject"]),
|
||||
content=_remove_keyword_marker(result["Text"]),
|
||||
url=result["Url"],
|
||||
publishedDate=datetime.strptime(result["Published"], "%Y-%m-%d %H:%M:%S"),
|
||||
metadata=gettext.gettext("Posted by {author}").format(author=result["Author"]),
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
# load main page to be able to find location of JavaScript source code
|
||||
resp = get(base_url)
|
||||
if resp.status_code != 200:
|
||||
raise_for_httperror(resp)
|
||||
|
||||
# load actual JavaScript code
|
||||
script_name = "main." + extr(resp.text, "main.", ".js") + ".js"
|
||||
script_resp = get(f"{base_url}/{script_name}")
|
||||
if script_resp.status_code != 200:
|
||||
raise_for_httperror(resp)
|
||||
|
||||
# find list of languages (JavaScript object)
|
||||
js_object_string = extr(script_resp.text, "languageValues=", "}],") + "}]"
|
||||
languages: list[dict[str, str]] = js_obj_str_to_python(js_object_string)
|
||||
|
||||
# finally, add all parsed languages to the engine traits
|
||||
language: dict[str, str]
|
||||
for language in languages:
|
||||
search_value = language["value"]
|
||||
for code in language["codes"]:
|
||||
try:
|
||||
locale = babel.Locale.parse(code)
|
||||
except babel.UnknownLocaleError:
|
||||
continue
|
||||
|
||||
sxng_lang = language_tag(locale)
|
||||
if sxng_lang not in engine_traits.languages:
|
||||
engine_traits.languages[sxng_lang] = search_value
|
||||
|
||||
# "All" is the search value to unset the search language
|
||||
engine_traits.all_locale = "All"
|
||||
@@ -117,29 +117,28 @@ Implementations
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import typing as t
|
||||
|
||||
from urllib.parse import (
|
||||
urlencode,
|
||||
urlparse,
|
||||
)
|
||||
|
||||
import json
|
||||
from dateutil import parser
|
||||
from lxml import html
|
||||
|
||||
from searx import locales
|
||||
from searx.utils import (
|
||||
extract_text,
|
||||
eval_xpath_list,
|
||||
eval_xpath_getindex,
|
||||
js_obj_str_to_python,
|
||||
js_obj_str_to_json_str,
|
||||
get_embeded_stream_url,
|
||||
)
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.result_types import EngineResults
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import (
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
get_embeded_stream_url,
|
||||
js_obj_str_to_json_str,
|
||||
js_obj_str_to_python,
|
||||
)
|
||||
|
||||
about = {
|
||||
"website": "https://search.brave.com/",
|
||||
@@ -172,7 +171,6 @@ the UI of Brave the user gets warned about this, since we can not warn the user
|
||||
in SearXNG, the spellchecking is disabled by default.
|
||||
"""
|
||||
|
||||
send_accept_language_header = True
|
||||
paging = False
|
||||
"""Brave only supports paging in :py:obj:`brave_category` ``search`` (UI
|
||||
category All) and in the goggles category."""
|
||||
@@ -214,6 +212,7 @@ def request(query: str, params: dict[str, t.Any]) -> None:
|
||||
if brave_category == "goggles":
|
||||
args["goggles_id"] = Goggles
|
||||
|
||||
params["headers"]["Accept-Encoding"] = "gzip, deflate"
|
||||
params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
|
||||
logger.debug("url %s", params["url"])
|
||||
|
||||
@@ -264,10 +263,10 @@ def extract_json_data(text: str) -> dict[str, t.Any]:
|
||||
|
||||
def response(resp: SXNG_Response) -> EngineResults:
|
||||
|
||||
if brave_category in ('search', 'goggles'):
|
||||
if brave_category in ("search", "goggles"):
|
||||
return _parse_search(resp)
|
||||
|
||||
if brave_category in ('news'):
|
||||
if brave_category in ("news"):
|
||||
return _parse_news(resp)
|
||||
|
||||
# Example script source containing the data:
|
||||
@@ -277,11 +276,11 @@ def response(resp: SXNG_Response) -> EngineResults:
|
||||
# data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
|
||||
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
json_data: dict[str, t.Any] = extract_json_data(resp.text)
|
||||
json_resp: dict[str, t.Any] = json_data['data'][1]["data"]['body']['response']
|
||||
json_resp: dict[str, t.Any] = json_data["data"][1]["data"]["body"]["response"]
|
||||
|
||||
if brave_category == 'images':
|
||||
if brave_category == "images":
|
||||
return _parse_images(json_resp)
|
||||
if brave_category == 'videos':
|
||||
if brave_category == "videos":
|
||||
return _parse_videos(json_resp)
|
||||
|
||||
raise ValueError(f"Unsupported brave category: {brave_category}")
|
||||
@@ -292,7 +291,6 @@ def _parse_search(resp: SXNG_Response) -> EngineResults:
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in eval_xpath_list(dom, "//div[contains(@class, 'snippet ')]"):
|
||||
|
||||
url: str | None = eval_xpath_getindex(result, ".//a/@href", 0, default=None)
|
||||
title_tag = eval_xpath_getindex(result, ".//div[contains(@class, 'title')]", 0, default=None)
|
||||
if url is None or title_tag is None or not urlparse(url).netloc: # partial url likely means it's an ad
|
||||
@@ -304,7 +302,12 @@ def _parse_search(resp: SXNG_Response) -> EngineResults:
|
||||
# there are other classes like 'site-name-content' we don't want to match,
|
||||
# however only using contains(@class, 'content') would e.g. also match `site-name-content`
|
||||
# thus, we explicitly also require the spaces as class separator
|
||||
_content = eval_xpath_getindex(result, ".//div[contains(concat(' ', @class, ' '), ' content ')]", 0, default="")
|
||||
_content = eval_xpath_getindex(
|
||||
result,
|
||||
".//div[contains(concat(' ', @class, ' '), ' content ')]",
|
||||
0,
|
||||
default="",
|
||||
)
|
||||
if len(_content):
|
||||
content = extract_text(_content) # type: ignore
|
||||
_pub_date = extract_text(
|
||||
@@ -327,7 +330,10 @@ def _parse_search(resp: SXNG_Response) -> EngineResults:
|
||||
res.add(item)
|
||||
|
||||
video_tag = eval_xpath_getindex(
|
||||
result, ".//div[contains(@class, 'video-snippet') and @data-macro='video']", 0, default=[]
|
||||
result,
|
||||
".//div[contains(@class, 'video-snippet') and @data-macro='video']",
|
||||
0,
|
||||
default=[],
|
||||
)
|
||||
if len(video_tag):
|
||||
# In my tests a video tag in the WEB search was most often not a
|
||||
@@ -337,6 +343,9 @@ def _parse_search(resp: SXNG_Response) -> EngineResults:
|
||||
item["iframe_src"] = iframe_src
|
||||
item["template"] = "videos.html"
|
||||
|
||||
for suggestion in eval_xpath_list(dom, "//a[contains(@class, 'related-query')]"):
|
||||
res.append(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@@ -345,7 +354,6 @@ def _parse_news(resp: SXNG_Response) -> EngineResults:
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in eval_xpath_list(dom, "//div[contains(@class, 'results')]//div[@data-type='news']"):
|
||||
|
||||
url = eval_xpath_getindex(result, ".//a[contains(@class, 'result-header')]/@href", 0, default=None)
|
||||
if url is None:
|
||||
continue
|
||||
@@ -414,23 +422,23 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
# pylint: disable=import-outside-toplevel, too-many-branches
|
||||
|
||||
import babel.languages
|
||||
from searx.locales import region_tag, language_tag
|
||||
|
||||
from searx.locales import language_tag, region_tag
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
engine_traits.custom["ui_lang"] = {}
|
||||
|
||||
lang_map = {'no': 'nb'} # norway
|
||||
lang_map = {"no": "nb"} # norway
|
||||
|
||||
# languages (UI)
|
||||
|
||||
resp = get('https://search.brave.com/settings')
|
||||
|
||||
resp = get("https://search.brave.com/settings", timeout=5)
|
||||
if not resp.ok:
|
||||
print("ERROR: response from Brave is not OK.")
|
||||
raise RuntimeError("Response from Brave languages is not OK.")
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for option in dom.xpath("//section//option[@value='en-us']/../option"):
|
||||
|
||||
ui_lang = option.get("value")
|
||||
try:
|
||||
l = babel.Locale.parse(ui_lang, sep="-")
|
||||
@@ -438,9 +446,8 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep="-"))
|
||||
else:
|
||||
sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep="-"))
|
||||
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
|
||||
# silently ignore unknown languages
|
||||
continue
|
||||
|
||||
conflict = engine_traits.custom["ui_lang"].get(sxng_tag) # type: ignore
|
||||
@@ -452,10 +459,12 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
# search regions of brave
|
||||
|
||||
resp = get("https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js")
|
||||
|
||||
resp = get(
|
||||
"https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js",
|
||||
timeout=5,
|
||||
)
|
||||
if not resp.ok:
|
||||
print("ERROR: response from Brave is not OK.")
|
||||
raise RuntimeError("Response from Brave regions is not OK.")
|
||||
|
||||
country_js = resp.text[resp.text.index("options:{all") + len("options:") :]
|
||||
country_js = country_js[: country_js.index("},k={default")]
|
||||
@@ -470,7 +479,11 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
# add official languages of the country ..
|
||||
for lang_tag in babel.languages.get_official_languages(country_tag, de_facto=True):
|
||||
lang_tag = lang_map.get(lang_tag, lang_tag)
|
||||
sxng_tag = region_tag(babel.Locale.parse("%s_%s" % (lang_tag, country_tag.upper())))
|
||||
try:
|
||||
sxng_tag = region_tag(babel.Locale.parse("%s_%s" % (lang_tag, country_tag.upper())))
|
||||
except babel.UnknownLocaleError:
|
||||
# silently ignore unknown languages
|
||||
continue
|
||||
# print("%-20s: %s <-- %s" % (v["label"], country_tag, sxng_tag))
|
||||
|
||||
conflict = engine_traits.regions.get(sxng_tag)
|
||||
|
||||
126
searx/engines/braveapi.py
Normal file
126
searx/engines/braveapi.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Engine to search using the Brave (WEB) Search API.
|
||||
|
||||
.. _Brave Search API: https://api-dashboard.search.brave.com/documentation
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following mandatory setting:
|
||||
|
||||
- :py:obj:`api_key`
|
||||
|
||||
Optional settings are:
|
||||
|
||||
- :py:obj:`results_per_page`
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: braveapi
|
||||
engine: braveapi
|
||||
api_key: 'YOUR-API-KEY' # required
|
||||
results_per_page: 20 # optional
|
||||
|
||||
The API supports paging and time filters.
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from dateutil import parser
|
||||
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://api.search.brave.com/",
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": "https://api-dashboard.search.brave.com/documentation",
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
api_key: str = ""
|
||||
"""API key for Brave Search API (required)."""
|
||||
|
||||
categories = ["general", "web"]
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
|
||||
results_per_page: int = 20
|
||||
"""Maximum number of results per page (default 20)."""
|
||||
|
||||
base_url = "https://api.search.brave.com/res/v1/web/search"
|
||||
"""Base URL for the Brave Search API."""
|
||||
|
||||
time_range_map = {"day": "past_day", "week": "past_week", "month": "past_month", "year": "past_year"}
|
||||
"""Mapping of SearXNG time ranges to Brave API time ranges."""
|
||||
|
||||
|
||||
def init(_):
|
||||
"""Initialize the engine."""
|
||||
if not api_key:
|
||||
raise SearxEngineAPIException("No API key provided")
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
"""Create the API request."""
|
||||
search_args: dict[str, str | int | None] = {
|
||||
"q": query,
|
||||
"count": results_per_page,
|
||||
"offset": (params["pageno"] - 1) * results_per_page,
|
||||
}
|
||||
|
||||
# Apply time filter if specified
|
||||
if params["time_range"]:
|
||||
search_args["time_range"] = time_range_map.get(params["time_range"])
|
||||
|
||||
# Apply SafeSearch if enabled
|
||||
if params["safesearch"]:
|
||||
search_args["safesearch"] = "strict"
|
||||
|
||||
params["url"] = f"{base_url}?{urlencode(search_args)}"
|
||||
params["headers"]["X-Subscription-Token"] = api_key
|
||||
|
||||
|
||||
def _extract_published_date(published_date_raw: str):
|
||||
"""Extract and parse the published date from the API response.
|
||||
|
||||
Args:
|
||||
published_date_raw: Raw date string from the API
|
||||
|
||||
Returns:
|
||||
Parsed datetime object or None if parsing fails
|
||||
"""
|
||||
if not published_date_raw:
|
||||
return None
|
||||
|
||||
try:
|
||||
return parser.parse(published_date_raw)
|
||||
except parser.ParserError:
|
||||
return None
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
"""Process the API response and return results."""
|
||||
res = EngineResults()
|
||||
data = resp.json()
|
||||
|
||||
for result in data.get("web", {}).get("results", []):
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=result["url"],
|
||||
title=result["title"],
|
||||
content=result.get("description", ""),
|
||||
publishedDate=_extract_published_date(result.get("age")),
|
||||
thumbnail=result.get("thumbnail", {}).get("src"),
|
||||
),
|
||||
)
|
||||
|
||||
return res
|
||||
59
searx/engines/cachy_os.py
Normal file
59
searx/engines/cachy_os.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""CachyOS (packages, it)"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
about = {
|
||||
"website": 'https://cachyos.org',
|
||||
"wikidata_id": "Q116777127",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
base_url = "https://packages.cachyos.org/api/search"
|
||||
categories = ['packages', 'it']
|
||||
paging = True
|
||||
results_per_page = 15
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query_params = {
|
||||
"search": query,
|
||||
"page_size": results_per_page,
|
||||
"current_page": params["pageno"],
|
||||
}
|
||||
|
||||
params["url"] = f"{base_url}?{urlencode(query_params)}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp) -> EngineResults:
|
||||
results = EngineResults()
|
||||
search_res = resp.json()
|
||||
|
||||
for item in search_res["packages"]:
|
||||
package_name = item["pkg_name"]
|
||||
arch = item["pkg_arch"]
|
||||
repo = item["repo_name"]
|
||||
|
||||
results.add(
|
||||
results.types.LegacyResult(
|
||||
{
|
||||
"template": 'packages.html',
|
||||
"url": f"https://packages.cachyos.org/package/{repo}/{arch}/{package_name}",
|
||||
"title": f"{package_name} ({repo})",
|
||||
"package_name": package_name,
|
||||
"publishedDate": datetime.fromtimestamp(item["pkg_builddate"]),
|
||||
"version": item["pkg_version"],
|
||||
"content": item["pkg_desc"],
|
||||
"tags": [arch],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -10,29 +10,33 @@ Dailymotion (Videos)
|
||||
|
||||
"""
|
||||
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlencode
|
||||
import time
|
||||
|
||||
import babel
|
||||
|
||||
from searx.network import get, raise_for_httperror # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.utils import html_to_text
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.locales import region_tag, language_tag
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.locales import language_tag, region_tag
|
||||
from searx.network import ( # see https://github.com/searxng/searxng/issues/762
|
||||
get,
|
||||
raise_for_httperror,
|
||||
)
|
||||
from searx.utils import html_to_text
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://www.dailymotion.com',
|
||||
"wikidata_id": 'Q769222',
|
||||
"official_api_documentation": 'https://www.dailymotion.com/developer',
|
||||
"website": "https://www.dailymotion.com",
|
||||
"wikidata_id": "Q769222",
|
||||
"official_api_documentation": "https://www.dailymotion.com/developer",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
categories = ["videos"]
|
||||
paging = True
|
||||
number_of_results = 10
|
||||
|
||||
@@ -46,8 +50,8 @@ time_delta_dict = {
|
||||
|
||||
safesearch = True
|
||||
safesearch_params = {
|
||||
2: {'is_created_for_kids': 'true'},
|
||||
1: {'is_created_for_kids': 'true'},
|
||||
2: {"is_created_for_kids": "true"},
|
||||
1: {"is_created_for_kids": "true"},
|
||||
0: {},
|
||||
}
|
||||
"""True if this video is "Created for Kids" / intends to target an audience
|
||||
@@ -55,9 +59,9 @@ under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
|
||||
"""
|
||||
|
||||
family_filter_map = {
|
||||
2: 'true',
|
||||
1: 'true',
|
||||
0: 'false',
|
||||
2: "true",
|
||||
1: "true",
|
||||
0: "false",
|
||||
}
|
||||
"""By default, the family filter is turned on. Setting this parameter to
|
||||
``false`` will stop filtering-out explicit content from searches and global
|
||||
@@ -65,21 +69,21 @@ contexts (``family_filter`` in `Global API Parameters`_ ).
|
||||
"""
|
||||
|
||||
result_fields = [
|
||||
'allow_embed',
|
||||
'description',
|
||||
'title',
|
||||
'created_time',
|
||||
'duration',
|
||||
'url',
|
||||
'thumbnail_360_url',
|
||||
'id',
|
||||
"allow_embed",
|
||||
"description",
|
||||
"title",
|
||||
"created_time",
|
||||
"duration",
|
||||
"url",
|
||||
"thumbnail_360_url",
|
||||
"id",
|
||||
]
|
||||
"""`Fields selection`_, by default, a few fields are returned. To request more
|
||||
specific fields, the ``fields`` parameter is used with the list of fields
|
||||
SearXNG needs in the response to build a video result list.
|
||||
"""
|
||||
|
||||
search_url = 'https://api.dailymotion.com/videos?'
|
||||
search_url = "https://api.dailymotion.com/videos?"
|
||||
"""URL to retrieve a list of videos.
|
||||
|
||||
- `REST GET`_
|
||||
@@ -96,42 +100,42 @@ def request(query, params):
|
||||
if not query:
|
||||
return False
|
||||
|
||||
eng_region: str = traits.get_region(params['searxng_locale'], 'en_US') # type: ignore
|
||||
eng_lang = traits.get_language(params['searxng_locale'], 'en')
|
||||
eng_region: str = traits.get_region(params["searxng_locale"], "en_US") # type: ignore
|
||||
eng_lang = traits.get_language(params["searxng_locale"], "en")
|
||||
|
||||
args = {
|
||||
'search': query,
|
||||
'family_filter': family_filter_map.get(params['safesearch'], 'false'),
|
||||
'thumbnail_ratio': 'original', # original|widescreen|square
|
||||
"search": query,
|
||||
"family_filter": family_filter_map.get(params["safesearch"], "false"),
|
||||
"thumbnail_ratio": "original", # original|widescreen|square
|
||||
# https://developers.dailymotion.com/api/#video-filters
|
||||
'languages': eng_lang,
|
||||
'page': params['pageno'],
|
||||
'password_protected': 'false',
|
||||
'private': 'false',
|
||||
'sort': 'relevance',
|
||||
'limit': number_of_results,
|
||||
'fields': ','.join(result_fields),
|
||||
"languages": eng_lang,
|
||||
"page": params["pageno"],
|
||||
"password_protected": "false",
|
||||
"private": "false",
|
||||
"sort": "relevance",
|
||||
"limit": number_of_results,
|
||||
"fields": ",".join(result_fields),
|
||||
}
|
||||
|
||||
args.update(safesearch_params.get(params['safesearch'], {}))
|
||||
args.update(safesearch_params.get(params["safesearch"], {}))
|
||||
|
||||
# Don't add localization and country arguments if the user does select a
|
||||
# language (:de, :en, ..)
|
||||
|
||||
if len(params['searxng_locale'].split('-')) > 1:
|
||||
if len(params["searxng_locale"].split("-")) > 1:
|
||||
# https://developers.dailymotion.com/api/#global-parameters
|
||||
args['localization'] = eng_region
|
||||
args['country'] = eng_region.split('_')[1]
|
||||
args["localization"] = eng_region
|
||||
args["country"] = eng_region.split("_")[1]
|
||||
# Insufficient rights for the `ams_country' parameter of route `GET /videos'
|
||||
# 'ams_country': eng_region.split('_')[1],
|
||||
|
||||
time_delta = time_delta_dict.get(params["time_range"])
|
||||
if time_delta:
|
||||
created_after = datetime.now() - time_delta
|
||||
args['created_after'] = datetime.timestamp(created_after)
|
||||
args["created_after"] = datetime.timestamp(created_after)
|
||||
|
||||
query_str = urlencode(args)
|
||||
params['url'] = search_url + query_str
|
||||
params["url"] = search_url + query_str
|
||||
|
||||
return params
|
||||
|
||||
@@ -143,46 +147,45 @@ def response(resp):
|
||||
search_res = resp.json()
|
||||
|
||||
# check for an API error
|
||||
if 'error' in search_res:
|
||||
raise SearxEngineAPIException(search_res['error'].get('message'))
|
||||
if "error" in search_res:
|
||||
raise SearxEngineAPIException(search_res["error"].get("message"))
|
||||
|
||||
raise_for_httperror(resp)
|
||||
|
||||
# parse results
|
||||
for res in search_res.get('list', []):
|
||||
for res in search_res.get("list", []):
|
||||
title = res["title"]
|
||||
url = res["url"]
|
||||
|
||||
title = res['title']
|
||||
url = res['url']
|
||||
|
||||
content = html_to_text(res['description'])
|
||||
content = html_to_text(res["description"])
|
||||
if len(content) > 300:
|
||||
content = content[:300] + '...'
|
||||
content = content[:300] + "..."
|
||||
|
||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
||||
publishedDate = datetime.fromtimestamp(res["created_time"], None)
|
||||
|
||||
length = time.gmtime(res.get('duration'))
|
||||
length = time.gmtime(res.get("duration"))
|
||||
if length.tm_hour:
|
||||
length = time.strftime("%H:%M:%S", length)
|
||||
else:
|
||||
length = time.strftime("%M:%S", length)
|
||||
|
||||
thumbnail = res['thumbnail_360_url']
|
||||
thumbnail = res["thumbnail_360_url"]
|
||||
thumbnail = thumbnail.replace("http://", "https://")
|
||||
|
||||
item = {
|
||||
'template': 'videos.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'publishedDate': publishedDate,
|
||||
'length': length,
|
||||
'thumbnail': thumbnail,
|
||||
"template": "videos.html",
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"publishedDate": publishedDate,
|
||||
"length": length,
|
||||
"thumbnail": thumbnail,
|
||||
}
|
||||
|
||||
# HINT: no mater what the value is, without API token videos can't shown
|
||||
# embedded
|
||||
if res['allow_embed']:
|
||||
item['iframe_src'] = iframe_src.format(video_id=res['id'])
|
||||
if res["allow_embed"]:
|
||||
item["iframe_src"] = iframe_src.format(video_id=res["id"])
|
||||
|
||||
results.append(item)
|
||||
|
||||
@@ -208,13 +211,13 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
"""
|
||||
|
||||
resp = get('https://api.dailymotion.com/locales')
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from dailymotion/locales is not OK.")
|
||||
resp = get("https://api.dailymotion.com/locales", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Dailymotion locales is not OK.")
|
||||
|
||||
for item in resp.json()['list']: # type: ignore
|
||||
eng_tag = item['locale']
|
||||
if eng_tag in ('en_EN', 'ar_AA'):
|
||||
for item in resp.json()["list"]: # type: ignore
|
||||
eng_tag = item["locale"]
|
||||
if eng_tag in ("en_EN", "ar_AA"):
|
||||
continue
|
||||
try:
|
||||
sxng_tag = region_tag(babel.Locale.parse(eng_tag))
|
||||
@@ -229,14 +232,14 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
continue
|
||||
engine_traits.regions[sxng_tag] = eng_tag
|
||||
|
||||
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
|
||||
locale_lang_list = [x.split("_")[0] for x in engine_traits.regions.values()]
|
||||
|
||||
resp = get('https://api.dailymotion.com/languages')
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from dailymotion/languages is not OK.")
|
||||
resp = get("https://api.dailymotion.com/languages", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Dailymotion languages is not OK.")
|
||||
|
||||
for item in resp.json()['list']: # type: ignore
|
||||
eng_tag = item['code']
|
||||
for item in resp.json()["list"]: # type: ignore
|
||||
eng_tag = item["code"]
|
||||
if eng_tag in locale_lang_list:
|
||||
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
||||
engine_traits.languages[sxng_tag] = eng_tag
|
||||
|
||||
@@ -32,7 +32,7 @@ if t.TYPE_CHECKING:
|
||||
|
||||
|
||||
engine_type = "online"
|
||||
send_accept_language_header = True
|
||||
# send_accept_language_header = False
|
||||
categories = ["general"]
|
||||
disabled = True
|
||||
timeout = 2.0
|
||||
|
||||
@@ -2,145 +2,274 @@
|
||||
"""
|
||||
DuckDuckGo WEB
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
DDG's WEB search:
|
||||
|
||||
- DuckDuckGo WEB : ``https://links.duckduckgo.com/d.js?q=..`` (HTTP GET)
|
||||
- DuckDuckGo WEB no-AI: ``https://noai.duckduckgo.com/`` (HTTP GET)
|
||||
- DuckDuckGo WEB html : ``https://html.duckduckgo.com/html`` (HTTP POST no-JS / form data)
|
||||
- DuckDuckGo WEB lite : ``https://lite.duckduckgo.com/lite`` (HTTP POST no-JS / form data)
|
||||
|
||||
DDG's content search / see engine ``duckduckgo_extra.py``
|
||||
|
||||
- DuckDuckGo Images : ``https://duckduckgo.com/i.js??q=...&vqd=...``
|
||||
- DuckDuckGo Videos : ``https://duckduckgo.com/v.js??q=...&vqd=...``
|
||||
- DuckDuckGo News : ``https://duckduckgo.com/news.js??q=...&vqd=...``
|
||||
|
||||
.. hint::
|
||||
|
||||
For WEB searches and to determine the ``vqd`` value, DDG-html (no-JS) is
|
||||
used.
|
||||
|
||||
Special features of the no-JS services (DDG-lite & DDG-html):
|
||||
|
||||
- The no-JS clients receive a form that contains all the controlling parameters.
|
||||
- When the form data is submitted, a real WEB browser sets the HTTP *Sec-Fetch*
|
||||
headers.
|
||||
|
||||
HTML ``<form>``, HTTP-Headers & DDG's bot Blocker:
|
||||
|
||||
The HTTP User-Agent_ (see below) is generated by the WEB-client and are
|
||||
checked by DDG's bot blocker.
|
||||
|
||||
To simulate the behavior of a real browser session, it might be necessary to
|
||||
evaluate additional headers. For example, in the response from DDG, the
|
||||
Referrer-Policy_ is always set to ``origin``. A real browser would then include
|
||||
the following header in the next request::
|
||||
|
||||
Referer: https://html.duckduckgo.com/
|
||||
|
||||
The fields of the html-form are reverse-engineered from DDG-html and may be
|
||||
subject to additional bot detection mechanisms and breaking changes in the
|
||||
future.
|
||||
|
||||
Query field:
|
||||
|
||||
Intro page: https://html.duckduckgo.com/html/
|
||||
|
||||
- ``q`` (str): Search query string
|
||||
- ``b`` (str): Beginning parameter - empty string for first page requests. If a
|
||||
second page is requested, this field is not set!
|
||||
|
||||
Search options:
|
||||
|
||||
- ``kl`` (str): Keyboard language/region code (e.g. 'en-us' default: 'wt-wt')
|
||||
- ``df`` (str): Time filter, maps to values like 'd' (day), 'w' (week), 'm' (month), 'y' (year)
|
||||
|
||||
The key/value pairs ``df`` and ``kl`` are additional saved in the cookies,
|
||||
example::
|
||||
|
||||
Cookie: kl=en-us; df=m
|
||||
|
||||
*next page* form fields:
|
||||
|
||||
- ``nextParams`` (str): Continuation parameters from previous page response,
|
||||
typically empty string. Opposite of ``b``; this field is not set when
|
||||
requesting the first result page.
|
||||
|
||||
- ``api`` (str): API endpoint identifier, typically 'd.js'
|
||||
- ``o`` (str): Output format, typically ``json``
|
||||
- ``v`` (str): Typically ``l`` for subsequent pages
|
||||
|
||||
|
||||
- ``dc`` (int): Display count - value equal to offset (s) + 1
|
||||
- ``s`` (int): Search offset for pagination
|
||||
- ``vqd`` (str): Validation query digest
|
||||
|
||||
General assumptions regarding DDG's bot blocker:
|
||||
|
||||
- Except ``Cookie: kl=..; df=..`` DDG does not use cookies in any of its
|
||||
services.
|
||||
|
||||
- DDG does not accept queries with more than 499 chars
|
||||
|
||||
- The ``vqd`` value ("Validation query digest") is needed to pass DDG's bot
|
||||
protection and is used by all request to DDG.
|
||||
|
||||
- The ``vqd`` value is generally not needed for the first query (intro); it is
|
||||
only required when additional pages are accessed (or when new content needs to
|
||||
be loaded for the query while scrolling).
|
||||
|
||||
- The second page (additional content) for a query cannot be requested without
|
||||
``vqd``, as this would lead to an immediate blocking, since such a use-case
|
||||
does not exist in the process flows provided by DDG (and is a clear indication
|
||||
of a bot).
|
||||
|
||||
The following HTTP headers are being evaluated (and may possibly be responsible
|
||||
for issues):
|
||||
|
||||
User-Agent_:
|
||||
The HTTP User-Agent is also involved in the formation of the vqd value, read
|
||||
`DuckDuckGo Bot Detection Research & Solution`_. However, it is not checked
|
||||
whether the UA is a known header. However, it is possible that certain UA
|
||||
headers (such as curl) are filtered.
|
||||
|
||||
Sec-Fetch-Mode_:
|
||||
In the past, Sec-Fetch-Mode had to be set to 'navigate', otherwise there were
|
||||
problems with the bot blocker.. I don't know if DDG still evaluates this
|
||||
header today
|
||||
|
||||
Accept-Language_:
|
||||
DDG-Lite and DDG-HTML TRY to guess user's preferred language from the HTTP
|
||||
``Accept-Language``. Optional the user can select a region filter (but not a
|
||||
language).
|
||||
|
||||
In DDG's bot blocker, the IP will be blocked (DDG does not have a client session!)
|
||||
|
||||
- As far as is known, it is possible to remove a un-blocked an IP by executing a
|
||||
DDG query in a real web browser over the blocked IP (at least that's my
|
||||
assumption).
|
||||
|
||||
How exactly the blocking mechanism currently works is not fully known, and
|
||||
there were also changes to the bot blocker in the period of Q3/Q4 2025: in the
|
||||
past, the IP blocking was implemented as a 'sliding window' (unblock after
|
||||
about 1 hour without requests from this IP)
|
||||
|
||||
Terms / phrases that you keep coming across:
|
||||
|
||||
- ``d.js``, ``i.js``, ``v.js``, ``news.js`` are the endpoints of the DDG's web
|
||||
API through which additional content for a query can be requested (vqd
|
||||
required)
|
||||
|
||||
The ``*.js`` endpoints return a JSON response and can therefore only be
|
||||
executed on a JS-capable client.
|
||||
|
||||
The service at https://lite.duckduckgo.com/lite offers general WEB searches
|
||||
(no news, videos etc). DDG-lite and DDG-html can be used by clients that do
|
||||
not support JS, aka *no-JS*.
|
||||
|
||||
DDG-lite works a bit differently: here, ``d.js`` is not an endpoint but a
|
||||
field (``api=d.js``) in a form that is sent to DDG-lite.
|
||||
|
||||
- The request argument ``origin=funnel_home_website`` is often seen in the DDG
|
||||
services when the category is changed (e.g., from web search to news, images,
|
||||
or to the video category)
|
||||
|
||||
.. _DuckDuckGo Bot Detection Research & Solution:
|
||||
https://github.com/ggfevans/searxng/blob/mod-sidecar-harvester/docs/ddg-bot-detection-research.md
|
||||
|
||||
.. _Sec-Fetch-Mode:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Sec-Fetch-Mode>
|
||||
|
||||
.. _Referrer-Policy:
|
||||
https://developer.mozilla.org/docs/Web/HTTP/Reference/Headers/Referrer-Policy#directives
|
||||
|
||||
.. _Referer:
|
||||
https://developer.mozilla.org/de/docs/Web/HTTP/Reference/Headers/Referer
|
||||
|
||||
.. _User-Agent:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent
|
||||
|
||||
.. _Accept-Language:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language
|
||||
|
||||
"""
|
||||
# pylint: disable=global-statement
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from urllib.parse import quote_plus
|
||||
import typing as t
|
||||
|
||||
import babel
|
||||
import lxml.html
|
||||
|
||||
from searx import (
|
||||
locales,
|
||||
external_bang,
|
||||
)
|
||||
from searx import locales
|
||||
from searx.enginelib import EngineCache
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.external_bang import EXTERNAL_BANGS, get_node # type: ignore
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import (
|
||||
ElementType,
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
extr,
|
||||
extract_text,
|
||||
gen_useragent,
|
||||
)
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.enginelib import EngineCache
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
about = {
|
||||
"website": 'https://lite.duckduckgo.com/lite/',
|
||||
"wikidata_id": 'Q12805',
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about: dict[str, str | bool] = {
|
||||
"website": "https://lite.duckduckgo.com/lite/",
|
||||
"wikidata_id": "Q12805",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
send_accept_language_header = True
|
||||
"""DuckDuckGo-Lite tries to guess user's preferred language from the HTTP
|
||||
``Accept-Language``. Optional the user can select a region filter (but not a
|
||||
language).
|
||||
"""
|
||||
categories: list[str] = ["general", "web"]
|
||||
paging: bool = True
|
||||
time_range_support: bool = True
|
||||
safesearch: bool = True
|
||||
"""DDG-lite: user can't select but the results are filtered."""
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'web']
|
||||
paging = True
|
||||
time_range_support = True
|
||||
safesearch = True # user can't select but the results are filtered
|
||||
ddg_url: str = "https://html.duckduckgo.com/html/"
|
||||
"""The process flow for determining the ``vqd`` values was implemented for the
|
||||
no-JS variant (DDG-html)"""
|
||||
|
||||
url = "https://html.duckduckgo.com/html/"
|
||||
time_range_dict: dict[str, str] = {"day": "d", "week": "w", "month": "m", "year": "y"}
|
||||
|
||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
||||
|
||||
_CACHE: EngineCache = None # type: ignore
|
||||
_CACHE: EngineCache = None # pyright: ignore[reportAssignmentType]
|
||||
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
|
||||
seconds."""
|
||||
|
||||
_HTTP_User_Agent: str = gen_useragent()
|
||||
|
||||
def get_cache():
|
||||
global _CACHE # pylint: disable=global-statement
|
||||
if _CACHE is None:
|
||||
_CACHE = EngineCache("duckduckgo") # type:ignore
|
||||
|
||||
def get_cache() -> EngineCache:
|
||||
global _CACHE
|
||||
if _CACHE is None: # pyright: ignore[reportUnnecessaryComparison]
|
||||
_CACHE = EngineCache("duckduckgo") # pyright: ignore[reportUnreachable]
|
||||
return _CACHE
|
||||
|
||||
|
||||
def get_vqd(query: str, region: str, force_request: bool = False) -> str:
|
||||
"""Returns the ``vqd`` that fits to the *query*.
|
||||
|
||||
:param query: The query term
|
||||
:param region: DDG's region code
|
||||
:param force_request: force a request to get a vqd value from DDG
|
||||
|
||||
TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used
|
||||
by all request to DDG:
|
||||
|
||||
- DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data)
|
||||
- DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
|
||||
- DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
|
||||
- DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
|
||||
- DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
|
||||
|
||||
DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
|
||||
(such as extremely long search terms that are often sent by bots), no ``vqd``
|
||||
value can be determined.
|
||||
|
||||
If SearXNG cannot determine a ``vqd`` value, then no request should go out
|
||||
to DDG.
|
||||
|
||||
.. attention::
|
||||
|
||||
A request with a wrong ``vqd`` value leads to DDG temporarily putting
|
||||
SearXNG's IP on a block list.
|
||||
|
||||
Requests from IPs in this block list run into timeouts. Not sure, but it
|
||||
seems the block list is a sliding window: to get my IP rid from the bot list
|
||||
I had to cool down my IP for 1h (send no requests from that IP to DDG).
|
||||
"""
|
||||
def set_vqd(query: str | int, value: str, params: "OnlineParams") -> None:
|
||||
cache = get_cache()
|
||||
key = cache.secret_hash(f"{query}//{region}")
|
||||
value = cache.get(key=key)
|
||||
if value is not None and not force_request:
|
||||
logger.debug("vqd: re-use cached value: %s", value)
|
||||
return value
|
||||
|
||||
logger.debug("vqd: request value from from duckduckgo.com")
|
||||
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
||||
if resp.status_code == 200: # type: ignore
|
||||
value = extr(resp.text, 'vqd="', '"') # type: ignore
|
||||
if value:
|
||||
logger.debug("vqd value from duckduckgo.com request: '%s'", value)
|
||||
else:
|
||||
logger.error("vqd: can't parse value from ddg response (return empty string)")
|
||||
return ""
|
||||
else:
|
||||
logger.error("vqd: got HTTP %s from duckduckgo.com", resp.status_code)
|
||||
|
||||
if value:
|
||||
cache.set(key=key, value=value)
|
||||
else:
|
||||
logger.error("none vqd value from duckduckgo.com: HTTP %s", resp.status_code)
|
||||
return value
|
||||
|
||||
|
||||
def set_vqd(query: str, region: str, value: str):
|
||||
cache = get_cache()
|
||||
key = cache.secret_hash(f"{query}//{region}")
|
||||
key = cache.secret_hash(f"{query}//{params['headers']['User-Agent']}")
|
||||
cache.set(key=key, value=value, expire=3600)
|
||||
|
||||
|
||||
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
||||
def get_vqd(
|
||||
query: str,
|
||||
params: "OnlineParams",
|
||||
) -> str:
|
||||
"""Returns the ``vqd`` value that fits to the *query* (and HTTP User-Agent_
|
||||
header).
|
||||
|
||||
:param query: the query term
|
||||
:param params: request parameters
|
||||
"""
|
||||
cache = get_cache()
|
||||
key = cache.secret_hash(f"{query}//{params['headers']['User-Agent']}")
|
||||
value: str = cache.get(key=key) or ""
|
||||
if value:
|
||||
logger.debug("get_vqd: re-use cached value: %s", value)
|
||||
return value
|
||||
|
||||
|
||||
def get_ddg_lang(
|
||||
eng_traits: EngineTraits,
|
||||
sxng_locale: str,
|
||||
default: str = "en_US",
|
||||
) -> str | None:
|
||||
"""Get DuckDuckGo's language identifier from SearXNG's locale.
|
||||
|
||||
DuckDuckGo defines its languages by region codes (see
|
||||
:py:obj:`fetch_traits`).
|
||||
.. hint::
|
||||
|
||||
To get region and language of a DDG service use:
|
||||
`DDG-lite <https://lite.duckduckgo.com/lite>`__ and the *no Javascript*
|
||||
page https://html.duckduckgo.com/html do not offer a language selection
|
||||
to the user.
|
||||
|
||||
DDG defines its languages by a region code (:py:obj:`fetch_traits`). To
|
||||
get region and language of a DDG service use:
|
||||
|
||||
.. code: python
|
||||
|
||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
||||
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
||||
eng_region = traits.get_region(params["searxng_locale"], traits.all_locale)
|
||||
eng_lang = get_ddg_lang(traits, params["searxng_locale"])
|
||||
|
||||
It might confuse, but the ``l`` value of the cookie is what SearXNG calls
|
||||
the *region*:
|
||||
@@ -152,220 +281,225 @@ def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
||||
params['cookies']['ah'] = eng_region
|
||||
params['cookies']['l'] = eng_region
|
||||
|
||||
.. hint::
|
||||
|
||||
`DDG-lite <https://lite.duckduckgo.com/lite>`__ and the *no Javascript*
|
||||
page https://html.duckduckgo.com/html do not offer a language selection
|
||||
to the user, only a region can be selected by the user (``eng_region``
|
||||
from the example above). DDG-lite and *no Javascript* store the selected
|
||||
region in a cookie::
|
||||
|
||||
params['cookies']['kl'] = eng_region # 'ar-es'
|
||||
|
||||
"""
|
||||
return eng_traits.custom['lang_region'].get( # type: ignore
|
||||
sxng_locale, eng_traits.get_language(sxng_locale, default)
|
||||
)
|
||||
lang: str | None = eng_traits.get_language(sxng_locale, default)
|
||||
|
||||
return eng_traits.custom["lang_region"].get(sxng_locale, lang) or None
|
||||
|
||||
|
||||
ddg_reg_map = {
|
||||
'tw-tzh': 'zh_TW',
|
||||
'hk-tzh': 'zh_HK',
|
||||
'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES
|
||||
'es-ca': 'ca_ES',
|
||||
'id-en': 'id_ID',
|
||||
'no-no': 'nb_NO',
|
||||
'jp-jp': 'ja_JP',
|
||||
'kr-kr': 'ko_KR',
|
||||
'xa-ar': 'ar_SA',
|
||||
'sl-sl': 'sl_SI',
|
||||
'th-en': 'th_TH',
|
||||
'vn-en': 'vi_VN',
|
||||
ddg_reg_map: dict[str, str] = {
|
||||
"tw-tzh": "zh_TW",
|
||||
"hk-tzh": "zh_HK",
|
||||
"ct-ca": "skip", # ct-ca and es-ca both map to ca_ES
|
||||
"es-ca": "ca_ES",
|
||||
"id-en": "id_ID",
|
||||
"no-no": "nb_NO",
|
||||
"jp-jp": "ja_JP",
|
||||
"kr-kr": "ko_KR",
|
||||
"xa-ar": "ar_SA",
|
||||
"sl-sl": "sl_SI",
|
||||
"th-en": "th_TH",
|
||||
"vn-en": "vi_VN",
|
||||
}
|
||||
|
||||
ddg_lang_map = {
|
||||
ddg_lang_map: dict[str, str] = {
|
||||
# use ar --> ar_EG (Egypt's arabic)
|
||||
"ar_DZ": 'lang_region',
|
||||
"ar_JO": 'lang_region',
|
||||
"ar_SA": 'lang_region',
|
||||
"ar_DZ": "lang_region",
|
||||
"ar_JO": "lang_region",
|
||||
"ar_SA": "lang_region",
|
||||
# use bn --> bn_BD
|
||||
'bn_IN': 'lang_region',
|
||||
"bn_IN": "lang_region",
|
||||
# use de --> de_DE
|
||||
'de_CH': 'lang_region',
|
||||
"de_CH": "lang_region",
|
||||
# use en --> en_US,
|
||||
'en_AU': 'lang_region',
|
||||
'en_CA': 'lang_region',
|
||||
'en_GB': 'lang_region',
|
||||
"en_AU": "lang_region",
|
||||
"en_CA": "lang_region",
|
||||
"en_GB": "lang_region",
|
||||
# Esperanto
|
||||
'eo_XX': 'eo',
|
||||
"eo_XX": "eo",
|
||||
# use es --> es_ES,
|
||||
'es_AR': 'lang_region',
|
||||
'es_CL': 'lang_region',
|
||||
'es_CO': 'lang_region',
|
||||
'es_CR': 'lang_region',
|
||||
'es_EC': 'lang_region',
|
||||
'es_MX': 'lang_region',
|
||||
'es_PE': 'lang_region',
|
||||
'es_UY': 'lang_region',
|
||||
'es_VE': 'lang_region',
|
||||
"es_AR": "lang_region",
|
||||
"es_CL": "lang_region",
|
||||
"es_CO": "lang_region",
|
||||
"es_CR": "lang_region",
|
||||
"es_EC": "lang_region",
|
||||
"es_MX": "lang_region",
|
||||
"es_PE": "lang_region",
|
||||
"es_UY": "lang_region",
|
||||
"es_VE": "lang_region",
|
||||
# use fr --> rf_FR
|
||||
'fr_CA': 'lang_region',
|
||||
'fr_CH': 'lang_region',
|
||||
'fr_BE': 'lang_region',
|
||||
"fr_CA": "lang_region",
|
||||
"fr_CH": "lang_region",
|
||||
"fr_BE": "lang_region",
|
||||
# use nl --> nl_NL
|
||||
'nl_BE': 'lang_region',
|
||||
"nl_BE": "lang_region",
|
||||
# use pt --> pt_PT
|
||||
'pt_BR': 'lang_region',
|
||||
"pt_BR": "lang_region",
|
||||
# skip these languages
|
||||
'od_IN': 'skip',
|
||||
'io_XX': 'skip',
|
||||
'tokipona_XX': 'skip',
|
||||
"od_IN": "skip",
|
||||
"io_XX": "skip",
|
||||
"tokipona_XX": "skip",
|
||||
}
|
||||
|
||||
|
||||
def quote_ddg_bangs(query):
|
||||
# quote ddg bangs
|
||||
query_parts = []
|
||||
def quote_ddg_bangs(query: str) -> str:
|
||||
"""To avoid a redirect, the !bang directives in the query string are
|
||||
quoted."""
|
||||
|
||||
# for val in re.split(r'(\s+)', query):
|
||||
for val in re.split(r'(\s+)', query):
|
||||
_q: list[str] = []
|
||||
|
||||
for val in re.split(r"(\s+)", query):
|
||||
if not val.strip():
|
||||
continue
|
||||
if val.startswith('!') and external_bang.get_node(external_bang.EXTERNAL_BANGS, val[1:]):
|
||||
|
||||
if val.startswith("!") and get_node(EXTERNAL_BANGS, val[1:]):
|
||||
val = f"'{val}'"
|
||||
query_parts.append(val)
|
||||
return ' '.join(query_parts)
|
||||
_q.append(val)
|
||||
return " ".join(_q)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query = quote_ddg_bangs(query)
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
|
||||
if len(query) >= 500:
|
||||
# DDG does not accept queries with more than 499 chars
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
eng_region: str = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
query = quote_ddg_bangs(query)
|
||||
eng_region: str = traits.get_region(
|
||||
params["searxng_locale"],
|
||||
traits.all_locale,
|
||||
) # pyright: ignore[reportAssignmentType]
|
||||
|
||||
# Note: The API is reverse-engineered from DuckDuckGo's HTML webpage
|
||||
# (https://html.duckduckgo.com/html/) and may be subject to additional bot detection mechanisms
|
||||
# and breaking changes in the future.
|
||||
#
|
||||
# The params['data'] dictionary can have the following key parameters, in this order:
|
||||
# - q (str): Search query string
|
||||
# - b (str): Beginning parameter - empty string for first page requests
|
||||
# - s (int): Search offset for pagination
|
||||
# - nextParams (str): Continuation parameters from previous page response, typically empty
|
||||
# - v (str): Typically 'l' for subsequent pages
|
||||
# - o (str): Output format, typically 'json'
|
||||
# - dc (int): Display count - value equal to offset (s) + 1
|
||||
# - api (str): API endpoint identifier, typically 'd.js'
|
||||
# - vqd (str): Validation query digest
|
||||
# - kl (str): Keyboard language/region code (e.g., 'en-us')
|
||||
# - df (str): Time filter, maps to values like 'd' (day), 'w' (week), 'm' (month), 'y' (year)
|
||||
# HTTP headers
|
||||
# ============
|
||||
|
||||
params['data']['q'] = query
|
||||
headers = params["headers"]
|
||||
|
||||
if params['pageno'] == 1:
|
||||
params['data']['b'] = ""
|
||||
elif params['pageno'] >= 2:
|
||||
offset = 10 + (params['pageno'] - 2) * 15 # Page 2 = 10, Page 3+ = 10 + n*15
|
||||
params['data']['s'] = offset
|
||||
params['data']['nextParams'] = form_data.get('nextParams', '')
|
||||
params['data']['v'] = form_data.get('v', 'l')
|
||||
params['data']['o'] = form_data.get('o', 'json')
|
||||
params['data']['dc'] = offset + 1
|
||||
params['data']['api'] = form_data.get('api', 'd.js')
|
||||
# The vqd value is generated from the query and the UA header. To be able to
|
||||
# reuse the vqd value, the UA header must be static.
|
||||
headers["User-Agent"] = _HTTP_User_Agent
|
||||
|
||||
headers["Sec-Fetch-Dest"] = "document"
|
||||
headers["Sec-Fetch-Mode"] = "navigate"
|
||||
headers["Sec-Fetch-Site"] = "same-origin"
|
||||
headers["Sec-Fetch-User"] = "?1"
|
||||
|
||||
headers["Referer"] = "https://html.duckduckgo.com/"
|
||||
|
||||
ui_lang = params["searxng_locale"]
|
||||
if not headers.get("Accept-Language"):
|
||||
headers["Accept-Language"] = f"{ui_lang},{ui_lang}-{ui_lang.upper()};q=0.7"
|
||||
|
||||
# DDG search form (POST data)
|
||||
# ===========================
|
||||
|
||||
# form_data: dict[str,str] = {"v": "l", "api": "d.js", "o": "json"}
|
||||
# """The WEB-API "endpoint" is ``api``."""
|
||||
|
||||
data = params["data"]
|
||||
data["q"] = query
|
||||
params["url"] = ddg_url
|
||||
params["method"] = "POST"
|
||||
|
||||
if params["pageno"] == 1:
|
||||
data["b"] = ""
|
||||
else:
|
||||
# vqd is required to request other pages after the first one
|
||||
vqd = get_vqd(query, eng_region, force_request=False)
|
||||
vqd = get_vqd(query=query, params=params)
|
||||
if vqd:
|
||||
params['data']['vqd'] = vqd
|
||||
data["vqd"] = vqd
|
||||
else:
|
||||
# Don't try to call follow up pages without a vqd value.
|
||||
# Don"t try to call follow up pages without a vqd value.
|
||||
# DDG recognizes this as a request from a bot. This lowers the
|
||||
# reputation of the SearXNG IP and DDG starts to activate CAPTCHAs.
|
||||
params["url"] = None
|
||||
return
|
||||
# set suspend time to zero is OK --> ddg does not block the IP
|
||||
raise SearxEngineCaptchaException(
|
||||
suspended_time=0,
|
||||
message=f"VQD missed (page: {params['pageno']}, locale: {params['searxng_locale']})",
|
||||
)
|
||||
|
||||
if params['searxng_locale'].startswith("zh"):
|
||||
if params["searxng_locale"].startswith("zh"):
|
||||
# Some locales (at least China) do not have a "next page" button and DDG
|
||||
# will return a HTTP/2 403 Forbidden for a request of such a page.
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
# Put empty kl in form data if language/region set to all
|
||||
data["nextParams"] = ""
|
||||
data["api"] = "d.js"
|
||||
data["o"] = "json"
|
||||
data["v"] = "l"
|
||||
|
||||
offset = 10 + (params["pageno"] - 2) * 15 # Page 2 = 10, Page 2+n = 10 + n*15
|
||||
data["dc"] = offset + 1
|
||||
data["s"] = offset
|
||||
|
||||
if eng_region == "wt-wt":
|
||||
params['data']['kl'] = ""
|
||||
# Put empty kl in form data if language/region set to all
|
||||
# data["kl"] = ""
|
||||
data["kl"] = "wt-wt"
|
||||
else:
|
||||
params['data']['kl'] = eng_region
|
||||
data["kl"] = eng_region
|
||||
params["cookies"]["kl"] = eng_region
|
||||
|
||||
params['data']['df'] = ''
|
||||
if params['time_range'] in time_range_dict:
|
||||
params['data']['df'] = time_range_dict[params['time_range']]
|
||||
params['cookies']['df'] = time_range_dict[params['time_range']]
|
||||
t_range: str = time_range_dict.get(str(params["time_range"]), "")
|
||||
if t_range:
|
||||
data["df"] = t_range
|
||||
params["cookies"]["df"] = t_range
|
||||
|
||||
params['cookies']['kl'] = eng_region
|
||||
params["headers"]["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
params["headers"]["Referer"] = ddg_url
|
||||
|
||||
params['url'] = url
|
||||
params['method'] = 'POST'
|
||||
|
||||
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
params['headers']['Referer'] = url
|
||||
params['headers']['Sec-Fetch-Dest'] = "document"
|
||||
params['headers']['Sec-Fetch-Mode'] = "navigate" # at least this one is used by ddg's bot detection
|
||||
params['headers']['Sec-Fetch-Site'] = "same-origin"
|
||||
params['headers']['Sec-Fetch-User'] = "?1"
|
||||
|
||||
logger.debug("param headers: %s", params['headers'])
|
||||
logger.debug("param data: %s", params['data'])
|
||||
logger.debug("param cookies: %s", params['cookies'])
|
||||
logger.debug("param headers: %s", params["headers"])
|
||||
logger.debug("param data: %s", params["data"])
|
||||
logger.debug("param cookies: %s", params["cookies"])
|
||||
|
||||
|
||||
def is_ddg_captcha(dom):
|
||||
def is_ddg_captcha(dom: ElementType):
|
||||
"""In case of CAPTCHA ddg response its own *not a Robot* dialog and is not
|
||||
redirected to a CAPTCHA page."""
|
||||
|
||||
return bool(eval_xpath(dom, "//form[@id='challenge-form']"))
|
||||
|
||||
|
||||
def response(resp) -> EngineResults:
|
||||
results = EngineResults()
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
if resp.status_code == 303:
|
||||
return results
|
||||
return res
|
||||
|
||||
doc = lxml.html.fromstring(resp.text)
|
||||
params = resp.search_params
|
||||
|
||||
if is_ddg_captcha(doc):
|
||||
# set suspend time to zero is OK --> ddg does not block the IP
|
||||
raise SearxEngineCaptchaException(suspended_time=0, message=f"CAPTCHA ({resp.search_params['data'].get('kl')})")
|
||||
raise SearxEngineCaptchaException(suspended_time=0, message=f"CAPTCHA ({params['data'].get('kl')})")
|
||||
|
||||
form = eval_xpath(doc, '//input[@name="vqd"]/..')
|
||||
|
||||
# Some locales (at least China) do not have a "next page" button and DDG
|
||||
# will return a HTTP/2 403 Forbidden for a request of such a page.
|
||||
if len(form):
|
||||
# some locales (at least China) does not have a "next page" button
|
||||
form = form[0]
|
||||
form_vqd = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
||||
q: str = str(params["data"]["q"])
|
||||
set_vqd(
|
||||
query=resp.search_params['data']['q'],
|
||||
region=resp.search_params['data']['kl'],
|
||||
query=q,
|
||||
value=str(form_vqd),
|
||||
params=resp.search_params,
|
||||
)
|
||||
|
||||
# just select "web-result" and ignore results of class "result--ad result--ad--small"
|
||||
for div_result in eval_xpath(doc, '//div[@id="links"]/div[contains(@class, "web-result")]'):
|
||||
|
||||
item = {}
|
||||
title = eval_xpath(div_result, './/h2/a')
|
||||
if not title:
|
||||
# this is the "No results." item in the result list
|
||||
continue
|
||||
item["title"] = extract_text(title)
|
||||
item["url"] = eval_xpath(div_result, './/h2/a/@href')[0]
|
||||
item["content"] = extract_text(
|
||||
eval_xpath_getindex(div_result, './/a[contains(@class, "result__snippet")]', 0, [])
|
||||
_title = eval_xpath(div_result, ".//h2/a")
|
||||
_content = eval_xpath_getindex(div_result, './/a[contains(@class, "result__snippet")]', 0, [])
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
title=extract_text(_title) or "",
|
||||
url=eval_xpath(div_result, ".//h2/a/@href")[0],
|
||||
content=extract_text(_content) or "",
|
||||
)
|
||||
)
|
||||
results.append(item)
|
||||
|
||||
zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
|
||||
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip() # type: ignore
|
||||
@@ -375,20 +509,19 @@ def response(resp) -> EngineResults:
|
||||
and "Your user agent:" not in zero_click
|
||||
and "URL Decoded:" not in zero_click
|
||||
):
|
||||
results.add(
|
||||
results.types.Answer(
|
||||
res.add(
|
||||
res.types.Answer(
|
||||
answer=zero_click,
|
||||
url=eval_xpath_getindex(doc, '//div[@id="zero_click_abstract"]/a/@href', 0), # type: ignore
|
||||
url=eval_xpath_getindex(doc, '//div[@id="zero_click_abstract"]/a/@href', 0),
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
return res
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
"""Fetch languages & regions from DuckDuckGo.
|
||||
|
||||
SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
|
||||
SearXNG's ``all`` locale maps DuckDuckGo's "All regions" (``wt-wt``).
|
||||
DuckDuckGo's language "Browsers preferred language" (``wt_WT``) makes no
|
||||
sense in a SearXNG request since SearXNG's ``all`` will not add a
|
||||
``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``
|
||||
@@ -407,34 +540,34 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
"""
|
||||
# pylint: disable=too-many-branches, too-many-statements, disable=import-outside-toplevel
|
||||
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.utils import js_obj_str_to_python
|
||||
|
||||
# fetch regions
|
||||
|
||||
engine_traits.all_locale = 'wt-wt'
|
||||
engine_traits.all_locale = "wt-wt"
|
||||
|
||||
# updated from u661.js to u.7669f071a13a7daa57cb / should be updated automatically?
|
||||
resp = get('https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js')
|
||||
resp = get("https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from DuckDuckGo regions is not OK.")
|
||||
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from DuckDuckGo is not OK.")
|
||||
|
||||
js_code = extr(resp.text, 'regions:', ',snippetLengths') # type: ignore
|
||||
js_code = extr(resp.text, "regions:", ",snippetLengths")
|
||||
|
||||
regions = json.loads(js_code)
|
||||
for eng_tag, name in regions.items():
|
||||
|
||||
if eng_tag == 'wt-wt':
|
||||
engine_traits.all_locale = 'wt-wt'
|
||||
if eng_tag == "wt-wt":
|
||||
engine_traits.all_locale = "wt-wt"
|
||||
continue
|
||||
|
||||
region = ddg_reg_map.get(eng_tag)
|
||||
if region == 'skip':
|
||||
if region == "skip":
|
||||
continue
|
||||
|
||||
if not region:
|
||||
eng_territory, eng_lang = eng_tag.split('-')
|
||||
region = eng_lang + '_' + eng_territory.upper()
|
||||
eng_territory, eng_lang = eng_tag.split("-")
|
||||
region = eng_lang + "_" + eng_territory.upper()
|
||||
|
||||
try:
|
||||
sxng_tag = locales.region_tag(babel.Locale.parse(region))
|
||||
@@ -451,25 +584,23 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
# fetch languages
|
||||
|
||||
engine_traits.custom['lang_region'] = {}
|
||||
engine_traits.custom["lang_region"] = {}
|
||||
|
||||
js_code = extr(resp.text, 'languages:', ',regions') # type: ignore
|
||||
js_code = extr(resp.text, "languages:", ",regions")
|
||||
|
||||
languages: dict[str, str] = js_obj_str_to_python(js_code)
|
||||
for eng_lang, name in languages.items():
|
||||
|
||||
if eng_lang == 'wt_WT':
|
||||
if eng_lang == "wt_WT":
|
||||
continue
|
||||
|
||||
babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
|
||||
if babel_tag == 'skip':
|
||||
if babel_tag == "skip":
|
||||
continue
|
||||
|
||||
try:
|
||||
|
||||
if babel_tag == 'lang_region':
|
||||
if babel_tag == "lang_region":
|
||||
sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
|
||||
engine_traits.custom['lang_region'][sxng_tag] = eng_lang
|
||||
engine_traits.custom["lang_region"][sxng_tag] = eng_lang
|
||||
continue
|
||||
|
||||
sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
|
||||
|
||||
@@ -12,70 +12,75 @@ least we could not find out how language support should work. It seems that
|
||||
most of the features are based on English terms.
|
||||
|
||||
"""
|
||||
import typing as t
|
||||
|
||||
from urllib.parse import urlencode, urlparse, urljoin
|
||||
from lxml import html
|
||||
|
||||
from searx.data import WIKIDATA_UNITS
|
||||
from searx.utils import extract_text, html_to_text, get_string_replaces_function
|
||||
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
||||
from searx.external_urls import (
|
||||
get_external_url,
|
||||
get_earth_coordinates_url,
|
||||
area_to_osm_zoom,
|
||||
)
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
# about
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": 'https://duckduckgo.com/',
|
||||
"wikidata_id": 'Q12805',
|
||||
"official_api_documentation": 'https://duckduckgo.com/api',
|
||||
"website": "https://duckduckgo.com/",
|
||||
"wikidata_id": "Q12805",
|
||||
"official_api_documentation": "https://duckduckgo.com/api",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
send_accept_language_header = True
|
||||
|
||||
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||
URL = "https://api.duckduckgo.com/" + "?{query}&format=json&pretty=0&no_redirect=1&d=1"
|
||||
|
||||
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
|
||||
WIKIDATA_PREFIX = ["http://www.wikidata.org/entity/", "https://www.wikidata.org/entity/"]
|
||||
|
||||
replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
|
||||
replace_http_by_https = get_string_replaces_function({"http:": "https:"})
|
||||
|
||||
|
||||
def is_broken_text(text):
|
||||
def is_broken_text(text: str) -> bool:
|
||||
"""duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
|
||||
|
||||
The href URL is broken, the "Related website" may contains some HTML.
|
||||
|
||||
The best solution seems to ignore these results.
|
||||
"""
|
||||
return text.startswith('http') and ' ' in text
|
||||
return text.startswith("http") and " " in text
|
||||
|
||||
|
||||
def result_to_text(text, htmlResult):
|
||||
def result_to_text(text: str, htmlResult: str) -> str | None:
|
||||
# TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme
|
||||
result = None
|
||||
result = ""
|
||||
dom = html.fromstring(htmlResult)
|
||||
a = dom.xpath('//a')
|
||||
a = dom.xpath("//a")
|
||||
if len(a) >= 1:
|
||||
result = extract_text(a[0])
|
||||
else:
|
||||
result = text
|
||||
if not is_broken_text(result):
|
||||
if result and not is_broken_text(result):
|
||||
return result
|
||||
return None
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = URL.format(query=urlencode({'q': query}))
|
||||
return params
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
params["url"] = URL.format(query=urlencode({"q": query}))
|
||||
|
||||
|
||||
def response(resp) -> EngineResults:
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
|
||||
results = EngineResults()
|
||||
search_res: dict[str, str] = resp.json()
|
||||
|
||||
search_res = resp.json()
|
||||
|
||||
# search_res.get('Entity') possible values (not exhaustive) :
|
||||
# search_res.get("Entity") possible values (not exhaustive) :
|
||||
# * continent / country / department / location / waterfall
|
||||
# * actor / musician / artist
|
||||
# * book / performing art / film / television / media franchise / concert tour / playwright
|
||||
@@ -83,79 +88,82 @@ def response(resp) -> EngineResults:
|
||||
# * website / software / os / programming language / file format / software engineer
|
||||
# * company
|
||||
|
||||
content = ''
|
||||
heading = search_res.get('Heading', '')
|
||||
attributes = []
|
||||
urls = []
|
||||
content: str = ""
|
||||
heading: str = search_res.get("Heading", "")
|
||||
attributes: list[dict[str, str | dict[str, str]]] = []
|
||||
urls: list[dict[str, str | bool]] = []
|
||||
infobox_id = None
|
||||
relatedTopics = []
|
||||
relatedTopics: list[dict[str, str | list[str]]] = []
|
||||
|
||||
# add answer if there is one
|
||||
answer = search_res.get('Answer', '')
|
||||
answer: str = search_res.get("Answer", "")
|
||||
if answer:
|
||||
answer_type = search_res.get('AnswerType')
|
||||
logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer)
|
||||
if isinstance(answer, str) and answer_type not in ['calc', 'ip']:
|
||||
answer_type = search_res.get("AnswerType")
|
||||
logger.debug("AnswerType='%s' Answer='%s'", answer_type, answer)
|
||||
if isinstance(answer, str) and answer_type not in ["calc", "ip"]:
|
||||
results.add(
|
||||
results.types.Answer(
|
||||
answer=html_to_text(answer),
|
||||
url=search_res.get('AbstractURL', ''),
|
||||
url=search_res.get("AbstractURL", ""),
|
||||
)
|
||||
)
|
||||
|
||||
# add infobox
|
||||
if 'Definition' in search_res:
|
||||
content = content + search_res.get('Definition', '')
|
||||
if "Definition" in search_res:
|
||||
content = content + search_res.get("Definition", "")
|
||||
|
||||
if 'Abstract' in search_res:
|
||||
content = content + search_res.get('Abstract', '')
|
||||
if "Abstract" in search_res:
|
||||
content = content + search_res.get("Abstract", "")
|
||||
|
||||
# image
|
||||
image = search_res.get('Image')
|
||||
image = None if image == '' else image
|
||||
if image is not None and urlparse(image).netloc == '':
|
||||
image = urljoin('https://duckduckgo.com', image)
|
||||
image = search_res.get("Image")
|
||||
image = None if image == "" else image
|
||||
if image is not None and urlparse(image).netloc == "":
|
||||
image = urljoin("https://duckduckgo.com", image)
|
||||
|
||||
# urls
|
||||
# Official website, Wikipedia page
|
||||
for ddg_result in search_res.get('Results', []):
|
||||
firstURL = ddg_result.get('FirstURL')
|
||||
text = ddg_result.get('Text')
|
||||
_result_list: list[dict[str, str]] = search_res.get("Results", []) # pyright: ignore[reportAssignmentType]
|
||||
|
||||
for ddg_result in _result_list:
|
||||
firstURL = ddg_result.get("FirstURL")
|
||||
text = ddg_result.get("Text")
|
||||
if firstURL is not None and text is not None:
|
||||
urls.append({'title': text, 'url': firstURL})
|
||||
results.append({'title': heading, 'url': firstURL})
|
||||
urls.append({"title": text, "url": firstURL})
|
||||
results.add(results.types.LegacyResult({"title": heading, "url": firstURL}))
|
||||
|
||||
# related topics
|
||||
for ddg_result in search_res.get('RelatedTopics', []):
|
||||
if 'FirstURL' in ddg_result:
|
||||
firstURL = ddg_result.get('FirstURL')
|
||||
text = ddg_result.get('Text')
|
||||
_result_list = search_res.get("RelatedTopics", []) # pyright: ignore[reportAssignmentType]
|
||||
for ddg_result in _result_list:
|
||||
if "FirstURL" in ddg_result:
|
||||
firstURL = ddg_result.get("FirstURL")
|
||||
text = ddg_result.get("Text", "")
|
||||
if not is_broken_text(text):
|
||||
suggestion = result_to_text(text, ddg_result.get('Result'))
|
||||
suggestion = result_to_text(text, ddg_result.get("Result", ""))
|
||||
if suggestion != heading and suggestion is not None:
|
||||
results.append({'suggestion': suggestion})
|
||||
elif 'Topics' in ddg_result:
|
||||
suggestions = []
|
||||
relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions})
|
||||
for topic_result in ddg_result.get('Topics', []):
|
||||
suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result'))
|
||||
results.add(results.types.LegacyResult({"suggestion": suggestion}))
|
||||
elif "Topics" in ddg_result:
|
||||
suggestions: list[str] = []
|
||||
relatedTopics.append({"name": ddg_result.get("Name", ""), "suggestions": suggestions})
|
||||
_topic_results: list[dict[str, str]] = ddg_result.get("Topics", []) # pyright: ignore[reportAssignmentType]
|
||||
for topic_result in _topic_results:
|
||||
suggestion = result_to_text(topic_result.get("Text", ""), topic_result.get("Result", ""))
|
||||
if suggestion != heading and suggestion is not None:
|
||||
suggestions.append(suggestion)
|
||||
|
||||
# abstract
|
||||
abstractURL = search_res.get('AbstractURL', '')
|
||||
if abstractURL != '':
|
||||
abstractURL = search_res.get("AbstractURL", "")
|
||||
if abstractURL != "":
|
||||
# add as result ? problem always in english
|
||||
infobox_id = abstractURL
|
||||
urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True})
|
||||
results.append({'url': abstractURL, 'title': heading})
|
||||
urls.append({"title": search_res.get("AbstractSource", ""), "url": abstractURL, "official": True})
|
||||
results.add(results.types.LegacyResult({"url": abstractURL, "title": heading}))
|
||||
|
||||
# definition
|
||||
definitionURL = search_res.get('DefinitionURL', '')
|
||||
if definitionURL != '':
|
||||
definitionURL = search_res.get("DefinitionURL", "")
|
||||
if definitionURL != "":
|
||||
# add as result ? as answer ? problem always in english
|
||||
infobox_id = definitionURL
|
||||
urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
|
||||
urls.append({"title": search_res.get("DefinitionSource", ""), "url": definitionURL})
|
||||
|
||||
# to merge with wikidata's infobox
|
||||
if infobox_id:
|
||||
@@ -163,15 +171,15 @@ def response(resp) -> EngineResults:
|
||||
|
||||
# attributes
|
||||
# some will be converted to urls
|
||||
if 'Infobox' in search_res:
|
||||
infobox = search_res.get('Infobox')
|
||||
if 'content' in infobox:
|
||||
if "Infobox" in search_res:
|
||||
infobox: dict[str, t.Any] = search_res.get("Infobox", {}) # pyright: ignore[reportAssignmentType]
|
||||
if "content" in infobox:
|
||||
osm_zoom = 17
|
||||
coordinates = None
|
||||
for info in infobox.get('content'):
|
||||
data_type = info.get('data_type')
|
||||
data_label = info.get('label')
|
||||
data_value = info.get('value')
|
||||
for info in infobox.get("content", {}):
|
||||
data_type: str = info.get("data_type", "")
|
||||
data_label = info.get("label")
|
||||
data_value = info.get("value")
|
||||
|
||||
# Workaround: ddg may return a double quote
|
||||
if data_value == '""':
|
||||
@@ -181,77 +189,79 @@ def response(resp) -> EngineResults:
|
||||
# * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile
|
||||
# * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id
|
||||
# * netflix_id
|
||||
external_url = get_external_url(data_type, data_value)
|
||||
external_url: str | None = get_external_url(data_type, data_value) # type: ignore
|
||||
if external_url is not None:
|
||||
urls.append({'title': data_label, 'url': external_url})
|
||||
elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
|
||||
urls.append({"title": data_label, "url": external_url})
|
||||
elif data_type in ["instance", "wiki_maps_trigger", "google_play_artist_id"]:
|
||||
# ignore instance: Wikidata value from "Instance Of" (Qxxxx)
|
||||
# ignore wiki_maps_trigger: reference to a javascript
|
||||
# ignore google_play_artist_id: service shutdown
|
||||
pass
|
||||
elif data_type == 'string' and data_label == 'Website':
|
||||
elif data_type == "string" and data_label == "Website":
|
||||
# There is already an URL for the website
|
||||
pass
|
||||
elif data_type == 'area':
|
||||
attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'})
|
||||
osm_zoom = area_to_osm_zoom(data_value.get('amount'))
|
||||
elif data_type == 'coordinates':
|
||||
if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
|
||||
elif data_type == "area":
|
||||
attributes.append({"label": data_label, "value": area_to_str(data_value), "entity": "P2046"})
|
||||
osm_zoom = area_to_osm_zoom(data_value.get("amount"))
|
||||
elif data_type == "coordinates":
|
||||
if data_value.get("globe") == "http://www.wikidata.org/entity/Q2":
|
||||
# coordinate on Earth
|
||||
# get the zoom information from the area
|
||||
coordinates = info
|
||||
else:
|
||||
# coordinate NOT on Earth
|
||||
attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'})
|
||||
elif data_type == 'string':
|
||||
attributes.append({'label': data_label, 'value': data_value})
|
||||
attributes.append({"label": data_label, "value": data_value, "entity": "P625"})
|
||||
elif data_type == "string":
|
||||
attributes.append({"label": data_label, "value": data_value})
|
||||
|
||||
if coordinates:
|
||||
data_label = coordinates.get('label')
|
||||
data_value = coordinates.get('value')
|
||||
latitude = data_value.get('latitude')
|
||||
longitude = data_value.get('longitude')
|
||||
url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
|
||||
urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'})
|
||||
data_label = coordinates.get("label")
|
||||
data_value = coordinates.get("value")
|
||||
latitude = data_value.get("latitude")
|
||||
longitude = data_value.get("longitude")
|
||||
_url: str = get_earth_coordinates_url(latitude, longitude, osm_zoom) # type: ignore
|
||||
urls.append({"title": "OpenStreetMap", "url": _url, "entity": "P625"})
|
||||
|
||||
if len(heading) > 0:
|
||||
# TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme
|
||||
# TODO get infobox.meta.value where .label="article_title" # pylint: disable=fixme
|
||||
if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0:
|
||||
results.append({'url': urls[0]['url'], 'title': heading, 'content': content})
|
||||
results.add(results.types.LegacyResult({"url": urls[0]["url"], "title": heading, "content": content}))
|
||||
else:
|
||||
results.append(
|
||||
{
|
||||
'infobox': heading,
|
||||
'id': infobox_id,
|
||||
'content': content,
|
||||
'img_src': image,
|
||||
'attributes': attributes,
|
||||
'urls': urls,
|
||||
'relatedTopics': relatedTopics,
|
||||
}
|
||||
results.add(
|
||||
results.types.LegacyResult(
|
||||
{
|
||||
"infobox": heading,
|
||||
"id": infobox_id,
|
||||
"content": content,
|
||||
"img_src": image,
|
||||
"attributes": attributes,
|
||||
"urls": urls,
|
||||
"relatedTopics": relatedTopics,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def unit_to_str(unit):
|
||||
def unit_to_str(unit: str) -> str:
|
||||
for prefix in WIKIDATA_PREFIX:
|
||||
if unit.startswith(prefix):
|
||||
wikidata_entity = unit[len(prefix) :]
|
||||
real_unit = WIKIDATA_UNITS.get(wikidata_entity)
|
||||
if real_unit is None:
|
||||
return unit
|
||||
return real_unit['symbol']
|
||||
return real_unit["symbol"]
|
||||
return unit
|
||||
|
||||
|
||||
def area_to_str(area):
|
||||
"""parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
|
||||
unit = unit_to_str(area.get('unit'))
|
||||
if unit is not None:
|
||||
def area_to_str(area: dict[str, str]) -> str:
|
||||
"""parse ``{"unit": "https://www.wikidata.org/entity/Q712226", "amount": "+20.99"}``"""
|
||||
unit = unit_to_str(area.get("unit", ""))
|
||||
if unit:
|
||||
try:
|
||||
amount = float(area.get('amount'))
|
||||
return '{} {}'.format(amount, unit)
|
||||
amount = float(area.get("amount", ""))
|
||||
return "{} {}".format(amount, unit)
|
||||
except ValueError:
|
||||
pass
|
||||
return '{} {}'.format(area.get('amount', ''), area.get('unit', ''))
|
||||
return "{} {}".format(area.get("amount", ""), area.get("unit", ""))
|
||||
|
||||
@@ -4,84 +4,144 @@ DuckDuckGo Extra (images, videos, news)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import get_embeded_stream_url, html_to_text
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from searx.utils import get_embeded_stream_url, html_to_text, gen_useragent, extr
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
||||
from searx.engines.duckduckgo import get_ddg_lang, get_vqd
|
||||
from searx.engines.duckduckgo import get_ddg_lang, get_vqd, set_vqd
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://duckduckgo.com/',
|
||||
"wikidata_id": 'Q12805',
|
||||
"website": "https://duckduckgo.com/",
|
||||
"wikidata_id": "Q12805",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON (site requires js to get images)',
|
||||
"results": "JSON (site requires js to get images)",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images', 'web']
|
||||
ddg_category = 'images'
|
||||
categories = []
|
||||
ddg_category = ""
|
||||
"""The category must be any of ``images``, ``videos`` and ``news``
|
||||
"""
|
||||
paging = True
|
||||
safesearch = True
|
||||
send_accept_language_header = True
|
||||
|
||||
safesearch_cookies = {0: '-2', 1: None, 2: '1'}
|
||||
safesearch_args = {0: '1', 1: None, 2: '1'}
|
||||
safesearch_cookies = {0: "-2", 1: None, 2: "1"}
|
||||
safesearch_args = {0: "1", 1: None, 2: "1"}
|
||||
|
||||
search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
|
||||
search_path_map = {"images": "i", "videos": "v", "news": "news"}
|
||||
_HTTP_User_Agent: str = gen_useragent()
|
||||
|
||||
|
||||
def request(query, params):
|
||||
eng_region: str = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
def init(engine_settings: dict[str, t.Any]):
|
||||
|
||||
# request needs a vqd argument
|
||||
vqd = get_vqd(query, eng_region, force_request=True)
|
||||
if engine_settings["ddg_category"] not in ["images", "videos", "news"]:
|
||||
raise ValueError(f"Unsupported DuckDuckGo category: {engine_settings['ddg_category']}")
|
||||
|
||||
if not vqd:
|
||||
# some search terms do not have results and therefore no vqd value
|
||||
params['url'] = None
|
||||
return params
|
||||
|
||||
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
||||
def fetch_vqd(
|
||||
query: str,
|
||||
params: "OnlineParams",
|
||||
):
|
||||
|
||||
args = {
|
||||
'q': query,
|
||||
'o': 'json',
|
||||
# 'u': 'bing',
|
||||
'l': eng_region,
|
||||
'f': ',,,,,',
|
||||
'vqd': vqd,
|
||||
logger.debug("fetch_vqd: request value from from duckduckgo.com")
|
||||
resp = get(
|
||||
url=f"https://duckduckgo.com/?q={quote_plus(query)}&iar=images&t=h_",
|
||||
headers=params["headers"],
|
||||
timeout=2,
|
||||
)
|
||||
|
||||
value = ""
|
||||
if resp.status_code == 200:
|
||||
value = extr(resp.text, 'vqd="', '"')
|
||||
if value:
|
||||
logger.debug("vqd value from duckduckgo.com request: '%s'", value)
|
||||
else:
|
||||
logger.error("vqd: can't parse value from ddg response (return empty string)")
|
||||
return ""
|
||||
else:
|
||||
logger.error("vqd: got HTTP %s from duckduckgo.com", resp.status_code)
|
||||
|
||||
if value:
|
||||
set_vqd(query=query, value=value, params=params)
|
||||
else:
|
||||
logger.error("none vqd value from duckduckgo.com: HTTP %s", resp.status_code)
|
||||
return value
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
|
||||
if len(query) >= 500:
|
||||
# DDG does not accept queries with more than 499 chars
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
# HTTP headers
|
||||
# ============
|
||||
|
||||
headers = params["headers"]
|
||||
# The vqd value is generated from the query and the UA header. To be able to
|
||||
# reuse the vqd value, the UA header must be static.
|
||||
headers["User-Agent"] = _HTTP_User_Agent
|
||||
vqd = get_vqd(query=query, params=params) or fetch_vqd(query=query, params=params)
|
||||
|
||||
headers["Accept"] = "*/*"
|
||||
headers["Referer"] = "https://duckduckgo.com/"
|
||||
headers["Host"] = "duckduckgo.com"
|
||||
# headers["X-Requested-With"] = "XMLHttpRequest"
|
||||
|
||||
# DDG XHTMLRequest
|
||||
# ================
|
||||
|
||||
eng_region: str = traits.get_region(
|
||||
params["searxng_locale"],
|
||||
traits.all_locale,
|
||||
) # pyright: ignore[reportAssignmentType]
|
||||
|
||||
eng_lang: str = get_ddg_lang(traits, params["searxng_locale"]) or "wt-wt"
|
||||
|
||||
args: dict[str, str | int] = {
|
||||
"o": "json",
|
||||
"q": query,
|
||||
"u": "bing",
|
||||
"l": eng_region,
|
||||
"bpia": "1",
|
||||
"vqd": vqd,
|
||||
"a": "h_",
|
||||
}
|
||||
|
||||
if params['pageno'] > 1:
|
||||
args['s'] = (params['pageno'] - 1) * 100
|
||||
params["cookies"]["ad"] = eng_lang # zh_CN
|
||||
params["cookies"]["ah"] = eng_region # "us-en,de-de"
|
||||
params["cookies"]["l"] = eng_region # "hk-tzh"
|
||||
|
||||
params['cookies']['ad'] = eng_lang # zh_CN
|
||||
params['cookies']['ah'] = eng_region # "us-en,de-de"
|
||||
params['cookies']['l'] = eng_region # "hk-tzh"
|
||||
args["ct"] = "EN"
|
||||
if params["searxng_locale"] != "all":
|
||||
args["ct"] = params["searxng_locale"].split("-")[0].upper()
|
||||
|
||||
safe_search = safesearch_cookies.get(params['safesearch'])
|
||||
if params["pageno"] > 1:
|
||||
args["s"] = (params["pageno"] - 1) * 100
|
||||
|
||||
safe_search = safesearch_cookies.get(params["safesearch"])
|
||||
if safe_search is not None:
|
||||
params['cookies']['p'] = safe_search # "-2", "1"
|
||||
safe_search = safesearch_args.get(params['safesearch'])
|
||||
if safe_search is not None:
|
||||
args['p'] = safe_search # "-1", "1"
|
||||
params["cookies"]["p"] = safe_search # "-2", "1"
|
||||
args["p"] = safe_search
|
||||
|
||||
logger.debug("cookies: %s", params['cookies'])
|
||||
params["url"] = f"https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}"
|
||||
|
||||
params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
|
||||
|
||||
# sending these two headers prevents rate limiting for the query
|
||||
params['headers'] = {
|
||||
'Referer': 'https://duckduckgo.com/',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
|
||||
return params
|
||||
logger.debug("param headers: %s", params["headers"])
|
||||
logger.debug("param data: %s", params["data"])
|
||||
logger.debug("param cookies: %s", params["cookies"])
|
||||
|
||||
|
||||
def _image_result(result):
|
||||
|
||||
@@ -27,8 +27,6 @@ about = {
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
send_accept_language_header = True
|
||||
|
||||
# engine dependent config
|
||||
categories = ["weather"]
|
||||
base_url = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
|
||||
|
||||
@@ -42,8 +42,8 @@ def response(resp):
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': item['source_page_url'],
|
||||
'title': item['source_site'],
|
||||
'url': item.get('source_page_url'),
|
||||
'title': item.get('source_site'),
|
||||
'img_src': img if item['type'] == 'IMAGE' else thumb,
|
||||
'filesize': humanize_bytes(item['meme_file_size']),
|
||||
'publishedDate': formatted_date,
|
||||
|
||||
95
searx/engines/gmx.py
Normal file
95
searx/engines/gmx.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""GMX (general)
|
||||
|
||||
It's unclear which index it uses, the results were the most similar to Google's.
|
||||
|
||||
In theory it supports multiple languages, but even if changing the region on their website,
|
||||
most of the results are still in English."""
|
||||
|
||||
import time
|
||||
import typing as t
|
||||
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.utils import extr, gen_useragent, html_to_text
|
||||
from searx.network import get
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://search.gmx.com",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
base_url = "https://search.gmx.com" # alternatively: search.gmx.net
|
||||
categories = ["general"]
|
||||
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
|
||||
time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
|
||||
|
||||
|
||||
def _get_page_hash(query: str, page: int, headers: dict[str, str]) -> str:
|
||||
resp = get(f"{base_url}/web/result?q={query}&page={page}", headers=headers)
|
||||
|
||||
# the text we search for looks like:
|
||||
# load("/desk?lang="+eV.p.param['hl']+"&q="+eV['p']['q_encode']+"&page=5&h=aa45603&t=177582576&origin=web&comp=web_serp_pag&p=gmx-com&sp=&lr="+eV.p.param['lr0']+"&mkt="+eV.p.param['mkt0']+"&family="+eV.p.param['familyFilter']+"&fcons="+eV.p.perm.fCons,"google", "eMMO", "eMH","eMP"); # pylint: disable=line-too-long
|
||||
return extr(resp.text, "&h=", "&t=")
|
||||
|
||||
|
||||
def request(query: str, params: 'OnlineParams'):
|
||||
# the headers have to be as close to normal browsers as possible, otherwise you get rate-limited quickly
|
||||
# the user agent for loading the hash and requesting the results has to be the same
|
||||
headers = {
|
||||
"User-Agent": gen_useragent(),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": base_url,
|
||||
}
|
||||
|
||||
# the "h" parameter has to be set to the current time in seconds with the last digit removed
|
||||
# e.g., if the current time is 1775829848, h has to be 177582984
|
||||
now = int(time.time() / 10)
|
||||
|
||||
# the page hash depends on the query and page number
|
||||
page_hash = _get_page_hash(query, params["pageno"], headers)
|
||||
# the headers have to match the ones from the previous request
|
||||
|
||||
args = {"lang": "en", "q": query, "page": params["pageno"], "h": page_hash, "t": now}
|
||||
if params["safesearch"]:
|
||||
args["family"] = True
|
||||
if params.get("time_range"):
|
||||
args["time"] = time_range_map[params["time_range"]]
|
||||
|
||||
params["url"] = f"{base_url}/desk?{urlencode(args)}"
|
||||
|
||||
params["headers"].update(headers)
|
||||
|
||||
|
||||
def response(resp: 'SXNG_Response') -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
results = resp.json()["results"]
|
||||
|
||||
for suggestion in results["rs"]:
|
||||
res.add(res.types.LegacyResult({"suggestion": suggestion["t"]}))
|
||||
|
||||
for result in results["hits"]:
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=result["u"],
|
||||
title=html_to_text(result["t"]),
|
||||
content=html_to_text(result["s"]),
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
@@ -11,40 +11,45 @@ engines:
|
||||
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
import re
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
import typing as t
|
||||
from urllib.parse import unquote, urlencode
|
||||
|
||||
import babel
|
||||
import babel.core
|
||||
import babel.languages
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||
from searx.locales import language_tag, region_tag, get_official_locales
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.locales import get_official_locales, language_tag, region_tag
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
gen_gsa_useragent,
|
||||
)
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": 'https://www.google.com',
|
||||
"wikidata_id": 'Q9366',
|
||||
"official_api_documentation": 'https://developers.google.com/custom-search/',
|
||||
"website": "https://www.google.com",
|
||||
"wikidata_id": "Q9366",
|
||||
"official_api_documentation": "https://developers.google.com/custom-search/",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'web']
|
||||
categories = ["general", "web"]
|
||||
paging = True
|
||||
max_page = 50
|
||||
"""`Google max 50 pages`_
|
||||
@@ -54,17 +59,17 @@ max_page = 50
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
|
||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
time_range_dict = {"day": "d", "week": "w", "month": "m", "year": "y"}
|
||||
|
||||
# Filter results. 0: None, 1: Moderate, 2: Strict
|
||||
filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
|
||||
filter_mapping = {0: "off", 1: "medium", 2: "high"}
|
||||
|
||||
# specific xpath variables
|
||||
# ------------------------
|
||||
|
||||
# Suggestions are links placed in a *card-section*, we extract only the text
|
||||
# from the links not the links itself.
|
||||
suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
|
||||
suggestion_xpath = '//div[contains(@class, "gGQDvd iIWm4b")]//a'
|
||||
|
||||
|
||||
_arcid_range = string.ascii_letters + string.digits + "_-"
|
||||
@@ -87,7 +92,7 @@ def ui_async(start: int) -> str:
|
||||
|
||||
# create a new random arc_id every hour
|
||||
if not _arcid_random or (int(time.time()) - _arcid_random[1]) > 3600:
|
||||
_arcid_random = (''.join(random.choices(_arcid_range, k=23)), int(time.time()))
|
||||
_arcid_random = ("".join(random.choices(_arcid_range, k=23)), int(time.time()))
|
||||
arc_id = f"arc_id:srp_{_arcid_random[0]}_1{start:02}"
|
||||
|
||||
return ",".join([arc_id, use_ac, _fmt])
|
||||
@@ -149,23 +154,23 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
"""
|
||||
|
||||
ret_val: dict[str, t.Any] = {
|
||||
'language': None,
|
||||
'country': None,
|
||||
'subdomain': None,
|
||||
'params': {},
|
||||
'headers': {},
|
||||
'cookies': {},
|
||||
'locale': None,
|
||||
"language": None,
|
||||
"country": None,
|
||||
"subdomain": None,
|
||||
"params": {},
|
||||
"headers": {},
|
||||
"cookies": {},
|
||||
"locale": None,
|
||||
}
|
||||
|
||||
sxng_locale = params.get('searxng_locale', 'all')
|
||||
sxng_locale = params.get("searxng_locale", "all")
|
||||
try:
|
||||
locale = babel.Locale.parse(sxng_locale, sep='-')
|
||||
locale = babel.Locale.parse(sxng_locale, sep="-")
|
||||
except babel.core.UnknownLocaleError:
|
||||
locale = None
|
||||
|
||||
eng_lang = eng_traits.get_language(sxng_locale, 'lang_en')
|
||||
lang_code = eng_lang.split('_')[-1] # lang_zh-TW --> zh-TW / lang_en --> en
|
||||
eng_lang = eng_traits.get_language(sxng_locale, "lang_en")
|
||||
lang_code = eng_lang.split("_")[-1] # lang_zh-TW --> zh-TW / lang_en --> en
|
||||
country = eng_traits.get_region(sxng_locale, eng_traits.all_locale)
|
||||
|
||||
# Test zh_hans & zh_hant --> in the topmost links in the result list of list
|
||||
@@ -176,10 +181,10 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
# '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5
|
||||
# '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5
|
||||
|
||||
ret_val['language'] = eng_lang
|
||||
ret_val['country'] = country
|
||||
ret_val['locale'] = locale
|
||||
ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com')
|
||||
ret_val["language"] = eng_lang
|
||||
ret_val["country"] = country
|
||||
ret_val["locale"] = locale
|
||||
ret_val["subdomain"] = eng_traits.custom["supported_domains"].get(country.upper(), "www.google.com")
|
||||
|
||||
# hl parameter:
|
||||
# The hl parameter specifies the interface language (host language) of
|
||||
@@ -191,7 +196,7 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
# https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
|
||||
|
||||
# https://github.com/searxng/searxng/issues/2515#issuecomment-1607150817
|
||||
ret_val['params']['hl'] = f'{lang_code}-{country}'
|
||||
ret_val["params"]["hl"] = f"{lang_code}-{country}"
|
||||
|
||||
# lr parameter:
|
||||
# The lr (language restrict) parameter restricts search results to
|
||||
@@ -207,9 +212,9 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
# By example: &lr=lang_zh-TW%7Clang_de selects articles written in
|
||||
# traditional chinese OR german language.
|
||||
|
||||
ret_val['params']['lr'] = eng_lang
|
||||
if sxng_locale == 'all':
|
||||
ret_val['params']['lr'] = ''
|
||||
ret_val["params"]["lr"] = eng_lang
|
||||
if sxng_locale == "all":
|
||||
ret_val["params"]["lr"] = ""
|
||||
|
||||
# cr parameter:
|
||||
# The cr parameter restricts search results to documents originating in a
|
||||
@@ -218,9 +223,9 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
|
||||
# specify a region (country) only if a region is given in the selected
|
||||
# locale --> https://github.com/searxng/searxng/issues/2672
|
||||
ret_val['params']['cr'] = ''
|
||||
if len(sxng_locale.split('-')) > 1:
|
||||
ret_val['params']['cr'] = 'country' + country
|
||||
ret_val["params"]["cr"] = ""
|
||||
if len(sxng_locale.split("-")) > 1:
|
||||
ret_val["params"]["cr"] = "country" + country
|
||||
|
||||
# gl parameter: (mandatory by Google News)
|
||||
# The gl parameter value is a two-letter country code. For WebSearch
|
||||
@@ -241,14 +246,14 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
# to interpret the query string. The default ie value is latin1.
|
||||
# https://developers.google.com/custom-search/docs/xml_results#iesp
|
||||
|
||||
ret_val['params']['ie'] = 'utf8'
|
||||
ret_val["params"]["ie"] = "utf8"
|
||||
|
||||
# oe parameter:
|
||||
# The oe parameter sets the character encoding scheme that should be used
|
||||
# to decode the XML result. The default oe value is latin1.
|
||||
# https://developers.google.com/custom-search/docs/xml_results#oesp
|
||||
|
||||
ret_val['params']['oe'] = 'utf8'
|
||||
ret_val["params"]["oe"] = "utf8"
|
||||
|
||||
# num parameter:
|
||||
# The num parameter identifies the number of search results to return.
|
||||
@@ -261,42 +266,41 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
|
||||
# HTTP headers
|
||||
|
||||
ret_val['headers']['Accept'] = '*/*'
|
||||
ret_val["headers"]["Accept"] = "*/*"
|
||||
ret_val["headers"]["User-Agent"] = gen_gsa_useragent()
|
||||
|
||||
# Cookies
|
||||
|
||||
# - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
|
||||
# - https://github.com/searxng/searxng/issues/1555
|
||||
ret_val['cookies']['CONSENT'] = "YES+"
|
||||
ret_val["cookies"]["CONSENT"] = "YES+"
|
||||
|
||||
return ret_val
|
||||
|
||||
|
||||
def detect_google_sorry(resp):
|
||||
if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'):
|
||||
if resp.url.host == "sorry.google.com" or resp.url.path.startswith("/sorry"):
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
"""Google search request"""
|
||||
# pylint: disable=line-too-long
|
||||
start = (params['pageno'] - 1) * 10
|
||||
str_async = ui_async(start)
|
||||
start = (params["pageno"] - 1) * 10
|
||||
google_info = get_google_info(params, traits)
|
||||
logger.debug("ARC_ID: %s", str_async)
|
||||
|
||||
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
||||
query_url = (
|
||||
'https://'
|
||||
+ google_info['subdomain']
|
||||
+ '/search'
|
||||
"https://"
|
||||
+ google_info["subdomain"]
|
||||
+ "/search"
|
||||
+ "?"
|
||||
+ urlencode(
|
||||
{
|
||||
'q': query,
|
||||
**google_info['params'],
|
||||
'filter': '0',
|
||||
'start': start,
|
||||
"q": query,
|
||||
**google_info["params"],
|
||||
"filter": "0",
|
||||
"start": start,
|
||||
# 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',
|
||||
# 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',
|
||||
# 'cs' : 1,
|
||||
@@ -307,40 +311,33 @@ def request(query: str, params: "OnlineParams") -> None:
|
||||
# 'sa': 'N',
|
||||
# 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'
|
||||
# formally known as use_mobile_ui
|
||||
'asearch': 'arc',
|
||||
'async': str_async,
|
||||
# "asearch": "arc",
|
||||
# "async": str_async,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
if params['time_range'] in time_range_dict:
|
||||
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
|
||||
if params['safesearch']:
|
||||
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
|
||||
params['url'] = query_url
|
||||
if params["time_range"] in time_range_dict:
|
||||
query_url += "&" + urlencode({"tbs": "qdr:" + time_range_dict[params["time_range"]]})
|
||||
if params["safesearch"]:
|
||||
query_url += "&" + urlencode({"safe": filter_mapping[params["safesearch"]]})
|
||||
params["url"] = query_url
|
||||
|
||||
params['cookies'] = google_info['cookies']
|
||||
params['headers'].update(google_info['headers'])
|
||||
params["cookies"] = google_info["cookies"]
|
||||
params["headers"].update(google_info["headers"])
|
||||
|
||||
|
||||
# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
|
||||
# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
|
||||
RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
|
||||
RE_DATA_IMAGE_end = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*)$')
|
||||
# regex match to get image map that is found inside the returned javascript:
|
||||
# (function(){var s='...';var i=['...'] ...}
|
||||
RE_DATA_IMAGE = re.compile(r"(data:image[^']*?)'[^']*?'((?:dimg|pimg|tsuid)[^']*)")
|
||||
|
||||
|
||||
def parse_data_images(text: str):
|
||||
def parse_url_images(text: str):
|
||||
data_image_map = {}
|
||||
|
||||
for img_id, data_image in RE_DATA_IMAGE.findall(text):
|
||||
end_pos = data_image.rfind('=')
|
||||
if end_pos > 0:
|
||||
data_image = data_image[: end_pos + 1]
|
||||
data_image_map[img_id] = data_image
|
||||
last = RE_DATA_IMAGE_end.search(text)
|
||||
if last:
|
||||
data_image_map[last.group(1)] = last.group(2)
|
||||
logger.debug('data:image objects --> %s', list(data_image_map.keys()))
|
||||
for image_url, img_id in RE_DATA_IMAGE.findall(text):
|
||||
data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
|
||||
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
|
||||
return data_image_map
|
||||
|
||||
|
||||
@@ -348,65 +345,57 @@ def response(resp: "SXNG_Response"):
|
||||
"""Get response from google's search request"""
|
||||
# pylint: disable=too-many-branches, too-many-statements
|
||||
detect_google_sorry(resp)
|
||||
data_image_map = parse_data_images(resp.text)
|
||||
data_image_map = parse_url_images(resp.text)
|
||||
|
||||
results = EngineResults()
|
||||
|
||||
# convert the text to dom
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# results --> answer
|
||||
answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
|
||||
for item in answer_list:
|
||||
for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'):
|
||||
bubble.drop_tree()
|
||||
results.add(
|
||||
results.types.Answer(
|
||||
answer=extract_text(item),
|
||||
url=(eval_xpath(item, '../..//a/@href') + [None])[0],
|
||||
)
|
||||
)
|
||||
|
||||
# parse results
|
||||
|
||||
for result in eval_xpath_list(dom, './/div[contains(@jscontroller, "SC7lYd")]'):
|
||||
for result in eval_xpath_list(dom, '//a[@data-ved and not(@class)]'):
|
||||
# pylint: disable=too-many-nested-blocks
|
||||
|
||||
try:
|
||||
title_tag = eval_xpath_getindex(result, './/a/h3[1]', 0, default=None)
|
||||
title_tag = eval_xpath_getindex(result, './/div[@style]', 0, default=None)
|
||||
if title_tag is None:
|
||||
# this not one of the common google results *section*
|
||||
logger.debug('ignoring item from the result_xpath list: missing title')
|
||||
logger.debug("ignoring item from the result_xpath list: missing title")
|
||||
continue
|
||||
title = extract_text(title_tag)
|
||||
|
||||
url = eval_xpath_getindex(result, './/a[h3]/@href', 0, None)
|
||||
if url is None:
|
||||
logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title)
|
||||
raw_url = result.get("href")
|
||||
if raw_url is None:
|
||||
logger.debug(
|
||||
'ignoring item from the result_xpath list: missing url of title "%s"',
|
||||
title,
|
||||
)
|
||||
continue
|
||||
|
||||
content_nodes = eval_xpath(result, './/div[contains(@data-sncf, "1")]')
|
||||
if raw_url.startswith('/url?q='):
|
||||
url = unquote(raw_url[7:].split("&sa=U")[0]) # remove the google redirector
|
||||
else:
|
||||
url = raw_url
|
||||
|
||||
content_nodes = eval_xpath(result, '../..//div[contains(@class, "ilUpNd H66NU aSRlid")]')
|
||||
for item in content_nodes:
|
||||
for script in item.xpath(".//script"):
|
||||
script.getparent().remove(script)
|
||||
|
||||
content = extract_text(content_nodes)
|
||||
content = extract_text(content_nodes[0])
|
||||
|
||||
if not content:
|
||||
logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
|
||||
continue
|
||||
# Images that are NOT the favicon
|
||||
xpath_image = eval_xpath_getindex(result, './/img', index=0, default=None)
|
||||
|
||||
thumbnail = content_nodes[0].xpath('.//img/@src')
|
||||
if thumbnail:
|
||||
thumbnail = thumbnail[0]
|
||||
if thumbnail.startswith('data:image'):
|
||||
img_id = content_nodes[0].xpath('.//img/@id')
|
||||
thumbnail = None
|
||||
if xpath_image is not None:
|
||||
thumbnail = xpath_image.get("src")
|
||||
if thumbnail.startswith("data:image"):
|
||||
img_id = xpath_image.get("id")
|
||||
if img_id:
|
||||
thumbnail = data_image_map.get(img_id[0])
|
||||
else:
|
||||
thumbnail = None
|
||||
thumbnail = data_image_map.get(img_id)
|
||||
|
||||
results.append({'url': url, 'title': title, 'content': content, 'thumbnail': thumbnail})
|
||||
results.append({"url": url, "title": title, "content": content or '', "thumbnail": thumbnail})
|
||||
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
logger.error(e, exc_info=True)
|
||||
@@ -415,7 +404,7 @@ def response(resp: "SXNG_Response"):
|
||||
# parse suggestion
|
||||
for suggestion in eval_xpath_list(dom, suggestion_xpath):
|
||||
# append suggestion
|
||||
results.append({'suggestion': extract_text(suggestion)})
|
||||
results.append({"suggestion": extract_text(suggestion)})
|
||||
|
||||
# return results
|
||||
return results
|
||||
@@ -426,27 +415,27 @@ def response(resp: "SXNG_Response"):
|
||||
|
||||
skip_countries = [
|
||||
# official language of google-country not in google-languages
|
||||
'AL', # Albanien (sq)
|
||||
'AZ', # Aserbaidschan (az)
|
||||
'BD', # Bangladesch (bn)
|
||||
'BN', # Brunei Darussalam (ms)
|
||||
'BT', # Bhutan (dz)
|
||||
'ET', # Äthiopien (am)
|
||||
'GE', # Georgien (ka, os)
|
||||
'GL', # Grönland (kl)
|
||||
'KH', # Kambodscha (km)
|
||||
'LA', # Laos (lo)
|
||||
'LK', # Sri Lanka (si, ta)
|
||||
'ME', # Montenegro (sr)
|
||||
'MK', # Nordmazedonien (mk, sq)
|
||||
'MM', # Myanmar (my)
|
||||
'MN', # Mongolei (mn)
|
||||
'MV', # Malediven (dv) // dv_MV is unknown by babel
|
||||
'MY', # Malaysia (ms)
|
||||
'NP', # Nepal (ne)
|
||||
'TJ', # Tadschikistan (tg)
|
||||
'TM', # Turkmenistan (tk)
|
||||
'UZ', # Usbekistan (uz)
|
||||
"AL", # Albanien (sq)
|
||||
"AZ", # Aserbaidschan (az)
|
||||
"BD", # Bangladesch (bn)
|
||||
"BN", # Brunei Darussalam (ms)
|
||||
"BT", # Bhutan (dz)
|
||||
"ET", # Äthiopien (am)
|
||||
"GE", # Georgien (ka, os)
|
||||
"GL", # Grönland (kl)
|
||||
"KH", # Kambodscha (km)
|
||||
"LA", # Laos (lo)
|
||||
"LK", # Sri Lanka (si, ta)
|
||||
"ME", # Montenegro (sr)
|
||||
"MK", # Nordmazedonien (mk, sq)
|
||||
"MM", # Myanmar (my)
|
||||
"MN", # Mongolei (mn)
|
||||
"MV", # Malediven (dv) // dv_MV is unknown by babel
|
||||
"MY", # Malaysia (ms)
|
||||
"NP", # Nepal (ne)
|
||||
"TJ", # Tadschikistan (tg)
|
||||
"TM", # Turkmenistan (tk)
|
||||
"UZ", # Usbekistan (uz)
|
||||
]
|
||||
|
||||
|
||||
@@ -454,21 +443,23 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
|
||||
"""Fetch languages from Google."""
|
||||
# pylint: disable=import-outside-toplevel, too-many-branches
|
||||
|
||||
engine_traits.custom['supported_domains'] = {}
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
resp = get('https://www.google.com/preferences')
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from Google's preferences is not OK.")
|
||||
engine_traits.custom["supported_domains"] = {}
|
||||
|
||||
dom = html.fromstring(resp.text.replace('<?xml version="1.0" encoding="UTF-8"?>', ''))
|
||||
resp = get("https://www.google.com/preferences", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Google preferences is not OK.")
|
||||
|
||||
dom = html.fromstring(resp.text.replace('<?xml version="1.0" encoding="UTF-8"?>', ""))
|
||||
|
||||
# supported language codes
|
||||
|
||||
lang_map = {'no': 'nb'}
|
||||
lang_map = {"no": "nb"}
|
||||
for x in eval_xpath_list(dom, "//select[@name='hl']/option"):
|
||||
eng_lang = x.get("value")
|
||||
try:
|
||||
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
|
||||
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep="-")
|
||||
except babel.UnknownLocaleError:
|
||||
print("INFO: google UI language %s (%s) is unknown by babel" % (eng_lang, x.text.split("(")[0].strip()))
|
||||
continue
|
||||
@@ -479,10 +470,10 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
|
||||
if conflict != eng_lang:
|
||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
|
||||
continue
|
||||
engine_traits.languages[sxng_lang] = 'lang_' + eng_lang
|
||||
engine_traits.languages[sxng_lang] = "lang_" + eng_lang
|
||||
|
||||
# alias languages
|
||||
engine_traits.languages['zh'] = 'lang_zh-CN'
|
||||
engine_traits.languages["zh"] = "lang_zh-CN"
|
||||
|
||||
# supported region codes
|
||||
|
||||
@@ -491,37 +482,37 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
|
||||
|
||||
if eng_country in skip_countries:
|
||||
continue
|
||||
if eng_country == 'ZZ':
|
||||
engine_traits.all_locale = 'ZZ'
|
||||
if eng_country == "ZZ":
|
||||
engine_traits.all_locale = "ZZ"
|
||||
continue
|
||||
|
||||
sxng_locales = get_official_locales(eng_country, engine_traits.languages.keys(), regional=True)
|
||||
|
||||
if not sxng_locales:
|
||||
print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country))
|
||||
print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get("data-name"), eng_country))
|
||||
continue
|
||||
|
||||
for sxng_locale in sxng_locales:
|
||||
engine_traits.regions[region_tag(sxng_locale)] = eng_country
|
||||
|
||||
# alias regions
|
||||
engine_traits.regions['zh-CN'] = 'HK'
|
||||
engine_traits.regions["zh-CN"] = "HK"
|
||||
|
||||
# supported domains
|
||||
|
||||
if add_domains:
|
||||
resp = get('https://www.google.com/supported_domains')
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
|
||||
resp = get("https://www.google.com/supported_domains", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Google supported domains is not OK.")
|
||||
|
||||
for domain in resp.text.split(): # type: ignore
|
||||
for domain in resp.text.split():
|
||||
domain = domain.strip()
|
||||
if not domain or domain in [
|
||||
'.google.com',
|
||||
".google.com",
|
||||
]:
|
||||
continue
|
||||
region = domain.split('.')[-1].upper()
|
||||
engine_traits.custom['supported_domains'][region] = 'www' + domain # type: ignore
|
||||
if region == 'HK':
|
||||
region = domain.split(".")[-1].upper()
|
||||
engine_traits.custom["supported_domains"][region] = "www" + domain
|
||||
if region == "HK":
|
||||
# There is no google.cn, we use .com.hk for zh-CN
|
||||
engine_traits.custom['supported_domains']['CN'] = 'www' + domain # type: ignore
|
||||
engine_traits.custom["supported_domains"]["CN"] = "www" + domain
|
||||
|
||||
@@ -44,7 +44,6 @@ max_page = 50
|
||||
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
send_accept_language_header = True
|
||||
|
||||
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
|
||||
|
||||
|
||||
@@ -60,11 +60,10 @@ paging = False
|
||||
time_range_support = False
|
||||
|
||||
# Google-News results are always *SafeSearch*. Option 'safesearch' is set to
|
||||
# False here, otherwise checker will report safesearch-errors::
|
||||
# False here.
|
||||
#
|
||||
# safesearch : results are identical for safesearch=0 and safesearch=2
|
||||
safesearch = True
|
||||
# send_accept_language_header = True
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user