From 6890478c69706f905bc91776279ae3a9b70323b6 Mon Sep 17 00:00:00 2001 From: Ben Kristinsson Date: Wed, 26 Oct 2022 16:04:13 +0200 Subject: [PATCH] maintain own robots.txt instead of using upstraem and disallow some annoying seo marketing crap bots from logs --- roles/authelia/tasks/authelia.yml | 1 + roles/gitea/tasks/gitea.yml | 1 + roles/gitea/templates/robots.txt.j2 | 31 +++++++++++++++++++++++++ roles/jellyfin/tasks/jellyfin.yml | 11 +++++++++ roles/jellyfin/templates/01-jellyfin.j2 | 8 +++++++ roles/jellyfin/templates/robots.txt.j2 | 2 ++ 6 files changed, 54 insertions(+) create mode 100644 roles/jellyfin/templates/robots.txt.j2 diff --git a/roles/authelia/tasks/authelia.yml b/roles/authelia/tasks/authelia.yml index 5af9891..a804d35 100644 --- a/roles/authelia/tasks/authelia.yml +++ b/roles/authelia/tasks/authelia.yml @@ -57,6 +57,7 @@ mode: 0755 tags: - authelia-nginx + - authelia-robots notify: reload nginx - name: template nginx vhost diff --git a/roles/gitea/tasks/gitea.yml b/roles/gitea/tasks/gitea.yml index b074fd6..72bbf1b 100644 --- a/roles/gitea/tasks/gitea.yml +++ b/roles/gitea/tasks/gitea.yml @@ -61,6 +61,7 @@ - sitemap.xml tags: - robots + - gitea-robots - name: copy gitea templates copy: diff --git a/roles/gitea/templates/robots.txt.j2 b/roles/gitea/templates/robots.txt.j2 index 3af3651..75c3614 100644 --- a/roles/gitea/templates/robots.txt.j2 +++ b/roles/gitea/templates/robots.txt.j2 @@ -1,6 +1,37 @@ User-agent: baidu crawl-delay: 1 +{# SEO/SEM #} +User-agent: SemrushBot +Disallow: / + +{# SEO/SEM #} +User-agent: AhrefsBot +Disallow: / + +{# SEO/SEM #} +User-agent: DataForSeoBot +Disallow: / + +{# https://www.ionos.de/terms-gtc/faq-crawler/ #} +{# disallowed on principle for not having that page in english #} +User-agent: IonCrawl +Disallow: / + +{# SEO/SEM #} +User-agent: barkrowler +Disallow: / + +{# SEO/SEM #} +User-agent: seoscanners.net +Disallow: / + +{# SEO/SEM #} +User-agent: MegaIndex.ru +Disallow: / +User-agent: megaindex.com +Disallow: / + User-agent: * Disallow: /*/pulse diff --git a/roles/jellyfin/tasks/jellyfin.yml b/roles/jellyfin/tasks/jellyfin.yml index 536480b..03e9f2f 100644 --- a/roles/jellyfin/tasks/jellyfin.yml +++ b/roles/jellyfin/tasks/jellyfin.yml @@ -209,6 +209,17 @@ daemon_reload: true name: jellyfin_auth +- name: template robots.txt + template: + src: "robots.txt.j2" + dest: "{{ systemuserlist.jellyfin.home }}/robots.txt" + owner: jellyfin + group: jellyfin + mode: 0755 + tags: + - jellyfin-nginx + - jellyfin-robots + notify: reload nginx - name: template jellyfin nginx config template: diff --git a/roles/jellyfin/templates/01-jellyfin.j2 b/roles/jellyfin/templates/01-jellyfin.j2 index 54eb0b1..23f4a25 100644 --- a/roles/jellyfin/templates/01-jellyfin.j2 +++ b/roles/jellyfin/templates/01-jellyfin.j2 @@ -69,6 +69,14 @@ server { return 302 https://$host/web/; } + # jellyfin-web already has a robots.txt file that disallows everything, but we still want to maintain our own. + # jellyfin (not -web) will issue a 302 redirect from {{ jellyfin_url }}/robots.txt to {{ jellyfin_url }}/web/robots.txt + # where the file is then served from. + # + # https://github.com/jellyfin/jellyfin-web/blob/master/src/robots.txt + location = /robots.txt { + alias {{ systemuserlist.jellyfin.home }}/robots.txt; + } location / { #include /etc/nginx/jellyfin/jellyfin_auth.conf; diff --git a/roles/jellyfin/templates/robots.txt.j2 b/roles/jellyfin/templates/robots.txt.j2 new file mode 100644 index 0000000..1f53798 --- /dev/null +++ b/roles/jellyfin/templates/robots.txt.j2 @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / -- 2.40.1