From ce1213cd14395a1ed3512143211a6076d58f1a89 Mon Sep 17 00:00:00 2001 From: John Bowdre Date: Sun, 4 Aug 2024 17:33:37 -0500 Subject: [PATCH] build robots.txt from ai.robots.txt on github --- content/changelog.md | 4 ++-- content/colophon.md | 2 +- layouts/partials/bad-robots.html | 15 +++++++++++++++ layouts/partials/dark-visitors.html | 27 --------------------------- layouts/robots.txt | 2 +- 5 files changed, 19 insertions(+), 31 deletions(-) create mode 100644 layouts/partials/bad-robots.html delete mode 100644 layouts/partials/dark-visitors.html diff --git a/content/changelog.md b/content/changelog.md index 28539f9..0e48232 100644 --- a/content/changelog.md +++ b/content/changelog.md @@ -1,14 +1,14 @@ --- title: "/changelog" date: "2024-05-26T21:19:08Z" -lastmod: "2024-08-02T21:16:14Z" +lastmod: "2024-08-04T22:30:43Z" description: "Maybe I should keep a log of all my site-related tinkering?" featured: false toc: false timeless: true categories: slashes --- -*High-level list of config/layout changes to the site. The full changelog is of course [on GitHub](https://github.com/jbowdre/runtimeterror/commits/main/).* +*Running list of config/layout changes to the site. The full changelog is of course [on GitHub](https://github.com/jbowdre/runtimeterror/commits/main/).* **2024-08-02:** - Display "pinned" recent track in sidebar using [MusicThread](https://musicthread.app) instead of latest scrobble diff --git a/content/colophon.md b/content/colophon.md index d7c8c6b..4e531a9 100644 --- a/content/colophon.md +++ b/content/colophon.md @@ -15,7 +15,7 @@ categories: slashes - uses the font face [Berkeley Mono](https://berkeleygraphics.com/typefaces/berkeley-mono/) ([details](/using-custom-font-hugo/)), and icons from [Font Awesome](https://fontawesome.com/) and [Fork Awesome](https://forkaweso.me/). - performs syntax highlighting with [Torchlight](https://torchlight.dev) ([details](/spotlight-on-torchlight/)). - provides site search with [lunr](https://lunrjs.com/) based on an implementation detailed by [Victoria Drake](https://victoria.dev/blog/add-search-to-hugo-static-sites-with-lunr/). -- uses [Dark Visitors](https://darkvisitors.com/docs/robots-txt)'s API to dynamically generate a [robots.txt](/robots.txt) discouraging AI scrapers with some Hugo code from [Luke Harris](https://github.com/lkhrs/hugo-dark-visitors). +- fetches [ai.robots.txt](https://github.com/ai-robots-txt/ai.robots.txt) to dynamically generate a [robots.txt](/robots.txt) discouraging AI scrapers with Hugo's [`resources.GetRemote` capability](https://gohugo.io/functions/resources/getremote/). - leverages [Cabin](https://withcabin.com) for [privacy-friendly](https://withcabin.com/privacy/runtimeterror.dev) analytics. - fetches recently-played music from [MusicThread](https://musicthread.app/). - displays my latest status from [omg.lol](https://home.omg.lol/referred-by/jbowdre). diff --git a/layouts/partials/bad-robots.html b/layouts/partials/bad-robots.html new file mode 100644 index 0000000..2dd28e3 --- /dev/null +++ b/layouts/partials/bad-robots.html @@ -0,0 +1,15 @@ +{{- $url := "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.json" -}} +{{- with resources.GetRemote $url -}} + {{- with .Err -}} + {{- errorf "%s" . -}} + {{- else -}} + {{- $robots := unmarshal .Content -}} + {{- range $botname, $props := $robots }} + {{- printf "User-agent: %s\n" $botname }} + {{- end }} + {{- printf "Disallow: /\n" }} + {{- printf "\n# (bad bots bundled by https://github.com/ai-robots-txt/ai.robots.txt)" }} + {{- end -}} +{{- else -}} + {{- errorf "Unable to get remote resource %q" $url -}} +{{- end -}} \ No newline at end of file diff --git a/layouts/partials/dark-visitors.html b/layouts/partials/dark-visitors.html deleted file mode 100644 index e7aec71..0000000 --- a/layouts/partials/dark-visitors.html +++ /dev/null @@ -1,27 +0,0 @@ -{{/* borrowed from Luke Harris @ https://github.com/lkhrs/hugo-dark-visitors */}} -{{- $url := "https://api.darkvisitors.com/robots-txts" -}} -{{- $api_key := getenv "HUGO_DARKVISITORS" -}} -{{- $bearer := printf "Bearer %v" $api_key -}} -{{- $agent_types := slice -}} -{{- if .Site.Params.darkVisitors -}} - {{- range .Site.Params.darkVisitors -}} - {{- $agent_types = $agent_types | append . -}} - {{- end -}} -{{- else -}} - {{- $agent_types = slice "AI Data Scraper" -}} -{{- end -}} -{{- $agent_types := $agent_types | jsonify -}} -{{- $opts := dict - "method" "post" - "headers" (dict "Authorization" (slice $bearer) "Content-Type" "application/json") - "body" (printf `{"agent_types": %s,"disallow": "/"}` $agent_types) --}} -{{- with resources.GetRemote $url $opts -}} - {{- with .Err -}} - {{- errorf "%s" . -}} - {{- else -}} - {{- .Content -}} - {{- end -}} -{{- else -}} - {{- errorf "Unable to get remote resource %q" $url -}} -{{- end -}} \ No newline at end of file diff --git a/layouts/robots.txt b/layouts/robots.txt index 2a7e039..b5d0b09 100644 --- a/layouts/robots.txt +++ b/layouts/robots.txt @@ -8,4 +8,4 @@ Disallow: # except for these bots which are not friends: -{{ partial "dark-visitors.html" . }} +{{ partial "bad-robots.html" . }}