From ff4c91e6f69ab78daae89986d103c2f4610ba03b Mon Sep 17 00:00:00 2001 From: John Bowdre Date: Mon, 29 Jul 2024 20:05:42 -0500 Subject: [PATCH] generate robots.txt from darkvisitors api --- .github/workflows/deploy-prod.yml | 5 ++++- config/_default/params.toml | 30 ++++------------------------- config/local/hugo.toml | 3 ++- config/preview/hugo.toml | 3 ++- layouts/partials/dark-visitors.html | 27 ++++++++++++++++++++++++++ layouts/robots.txt | 6 ++---- 6 files changed, 41 insertions(+), 33 deletions(-) create mode 100644 layouts/partials/dark-visitors.html diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 5001cf6..487ed45 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -45,7 +45,10 @@ jobs: echo "${{ secrets.SSH_KNOWN_HOSTS }}" > ~/.ssh/known_hosts chmod 644 ~/.ssh/known_hosts - name: Build with Hugo - run: HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} hugo --minify + run: | + HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} \ + HUGO_DARKVISITORS=${{ secrets.DARKVISITORS_TOKEN }} \ + hugo --minify - name: Insert 404 page run: | mkdir -p public/bunnycdn_errors diff --git a/config/_default/params.toml b/config/_default/params.toml index 9a5dc3e..e0f5d42 100644 --- a/config/_default/params.toml +++ b/config/_default/params.toml @@ -8,32 +8,10 @@ numberOfRelatedPosts = 5 indexTitle = ".-. ..- -. - .. -- . - . .-. .-. --- .-." -bad_robots = [ - "AdsBot-Google", - "Amazonbot", - "anthropic-ai", - "Applebot-Extended", - "AwarioRssBot", - "AwarioSmartBot", - "Bytespider", - "CCBot", - "ChatGPT", - "ChatGPT-User", - "Claude-Web", - "ClaudeBot", - "cohere-ai", - "DataForSeoBot", - "Diffbot", - "FacebookBot", - "Google-Extended", - "GPTBot", - "ImagesiftBot", - "magpie-crawler", - "omgili", - "Omgilibot", - "peer39_crawler", - "PerplexityBot", - "YouBot" +darkVisitors = [ + "AI Assistant", + "AI Data Scraper", + "AI Search Crawler" ] # Comments diff --git a/config/local/hugo.toml b/config/local/hugo.toml index 0873821..fe992f0 100644 --- a/config/local/hugo.toml +++ b/config/local/hugo.toml @@ -1 +1,2 @@ -baseURL = "http://localhost:1313/" \ No newline at end of file +baseURL = "http://localhost:1313/" +enableRobotsTXT = false \ No newline at end of file diff --git a/config/preview/hugo.toml b/config/preview/hugo.toml index f9f012b..bdba38b 100644 --- a/config/preview/hugo.toml +++ b/config/preview/hugo.toml @@ -1 +1,2 @@ -baseURL = "https://preview.runtimeterror.dev/" \ No newline at end of file +baseURL = "https://preview.runtimeterror.dev/" +enableRobotsTXT = false \ No newline at end of file diff --git a/layouts/partials/dark-visitors.html b/layouts/partials/dark-visitors.html new file mode 100644 index 0000000..00c234b --- /dev/null +++ b/layouts/partials/dark-visitors.html @@ -0,0 +1,27 @@ +{{/* borrowed from https://github.com/lkhrs/hugo-dark-visitors */}} +{{- $url := "https://api.darkvisitors.com/robots-txts" -}} +{{- $api_key := getenv "HUGO_DARKVISITORS" -}} +{{- $bearer := printf "Bearer %v" $api_key -}} +{{- $agent_types := slice -}} +{{- if .Site.Params.darkVisitors -}} + {{- range .Site.Params.darkVisitors -}} + {{- $agent_types = $agent_types | append . -}} + {{- end -}} +{{- else -}} + {{- $agent_types = slice "AI Data Scraper" -}} +{{- end -}} +{{- $agent_types := $agent_types | jsonify -}} +{{- $opts := dict + "method" "post" + "headers" (dict "Authorization" (slice $bearer) "Content-Type" "application/json") + "body" (printf `{"agent_types": %s,"disallow": "/"}` $agent_types) +-}} +{{- with resources.GetRemote $url $opts -}} + {{- with .Err -}} + {{- errorf "%s" . -}} + {{- else -}} + {{- .Content -}} + {{- end -}} +{{- else -}} + {{- errorf "Unable to get remote resource %q" $url -}} +{{- end -}} \ No newline at end of file diff --git a/layouts/robots.txt b/layouts/robots.txt index 6c7d9c0..2a7e039 100644 --- a/layouts/robots.txt +++ b/layouts/robots.txt @@ -7,7 +7,5 @@ User-agent: * Disallow: # except for these bots which are not friends: -{{ range .Site.Params.bad_robots }} -User-agent: {{ . }} -{{- end }} -Disallow: / + +{{ partial "dark-visitors.html" . }}