maintain own robots.txt instead of using upstraem and disallow some annoying seo marketing crap bots from logs #34

Merged
ben merged 1 commits from robotstxt into main 2022-10-26 14:15:58 +00:00
6 changed files with 54 additions and 0 deletions

View File

@ -57,6 +57,7 @@
mode: 0755
tags:
- authelia-nginx
- authelia-robots
notify: reload nginx
- name: template nginx vhost

View File

@ -61,6 +61,7 @@
- sitemap.xml
tags:
- robots
- gitea-robots
- name: copy gitea templates
copy:

View File

@ -1,6 +1,37 @@
User-agent: baidu
crawl-delay: 1
{# SEO/SEM #}
User-agent: SemrushBot
Disallow: /
{# SEO/SEM #}
User-agent: AhrefsBot
Disallow: /
{# SEO/SEM #}
User-agent: DataForSeoBot
Disallow: /
{# https://www.ionos.de/terms-gtc/faq-crawler/ #}
{# disallowed on principle for not having that page in english #}
User-agent: IonCrawl
Disallow: /
{# SEO/SEM #}
User-agent: barkrowler
Disallow: /
{# SEO/SEM #}
User-agent: seoscanners.net
Disallow: /
{# SEO/SEM #}
User-agent: MegaIndex.ru
Disallow: /
User-agent: megaindex.com
Disallow: /
User-agent: *
Disallow: /*/pulse

View File

@ -209,6 +209,17 @@
daemon_reload: true
name: jellyfin_auth
- name: template robots.txt
template:
src: "robots.txt.j2"
dest: "{{ systemuserlist.jellyfin.home }}/robots.txt"
owner: jellyfin
group: jellyfin
mode: 0755
tags:
- jellyfin-nginx
- jellyfin-robots
notify: reload nginx
- name: template jellyfin nginx config
template:

View File

@ -69,6 +69,14 @@ server {
return 302 https://$host/web/;
}
# jellyfin-web already has a robots.txt file that disallows everything, but we still want to maintain our own.
# jellyfin (not -web) will issue a 302 redirect from {{ jellyfin_url }}/robots.txt to {{ jellyfin_url }}/web/robots.txt
# where the file is then served from.
#
# https://github.com/jellyfin/jellyfin-web/blob/master/src/robots.txt
location = /robots.txt {
alias {{ systemuserlist.jellyfin.home }}/robots.txt;
}
location / {
#include /etc/nginx/jellyfin/jellyfin_auth.conf;

View File

@ -0,0 +1,2 @@
User-agent: *
Disallow: /