generate robots.txt from darkvisitors api

This commit is contained in:
John Bowdre 2024-07-29 20:05:42 -05:00
parent b15c943f0b
commit ff4c91e6f6
6 changed files with 41 additions and 33 deletions

View file

@ -45,7 +45,10 @@ jobs:
echo "${{ secrets.SSH_KNOWN_HOSTS }}" > ~/.ssh/known_hosts
chmod 644 ~/.ssh/known_hosts
- name: Build with Hugo
run: HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} hugo --minify
run: |
HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} \
HUGO_DARKVISITORS=${{ secrets.DARKVISITORS_TOKEN }} \
hugo --minify
- name: Insert 404 page
run: |
mkdir -p public/bunnycdn_errors

View file

@ -8,32 +8,10 @@ numberOfRelatedPosts = 5
indexTitle = ".-. ..- -. - .. -- . - . .-. .-. --- .-."
bad_robots = [
"AdsBot-Google",
"Amazonbot",
"anthropic-ai",
"Applebot-Extended",
"AwarioRssBot",
"AwarioSmartBot",
"Bytespider",
"CCBot",
"ChatGPT",
"ChatGPT-User",
"Claude-Web",
"ClaudeBot",
"cohere-ai",
"DataForSeoBot",
"Diffbot",
"FacebookBot",
"Google-Extended",
"GPTBot",
"ImagesiftBot",
"magpie-crawler",
"omgili",
"Omgilibot",
"peer39_crawler",
"PerplexityBot",
"YouBot"
darkVisitors = [
"AI Assistant",
"AI Data Scraper",
"AI Search Crawler"
]
# Comments

View file

@ -1 +1,2 @@
baseURL = "http://localhost:1313/"
baseURL = "http://localhost:1313/"
enableRobotsTXT = false

View file

@ -1 +1,2 @@
baseURL = "https://preview.runtimeterror.dev/"
baseURL = "https://preview.runtimeterror.dev/"
enableRobotsTXT = false

View file

@ -0,0 +1,27 @@
{{/* borrowed from https://github.com/lkhrs/hugo-dark-visitors */}}
{{- $url := "https://api.darkvisitors.com/robots-txts" -}}
{{- $api_key := getenv "HUGO_DARKVISITORS" -}}
{{- $bearer := printf "Bearer %v" $api_key -}}
{{- $agent_types := slice -}}
{{- if .Site.Params.darkVisitors -}}
{{- range .Site.Params.darkVisitors -}}
{{- $agent_types = $agent_types | append . -}}
{{- end -}}
{{- else -}}
{{- $agent_types = slice "AI Data Scraper" -}}
{{- end -}}
{{- $agent_types := $agent_types | jsonify -}}
{{- $opts := dict
"method" "post"
"headers" (dict "Authorization" (slice $bearer) "Content-Type" "application/json")
"body" (printf `{"agent_types": %s,"disallow": "/"}` $agent_types)
-}}
{{- with resources.GetRemote $url $opts -}}
{{- with .Err -}}
{{- errorf "%s" . -}}
{{- else -}}
{{- .Content -}}
{{- end -}}
{{- else -}}
{{- errorf "Unable to get remote resource %q" $url -}}
{{- end -}}

View file

@ -7,7 +7,5 @@ User-agent: *
Disallow:
# except for these bots which are not friends:
{{ range .Site.Params.bad_robots }}
User-agent: {{ . }}
{{- end }}
Disallow: /
{{ partial "dark-visitors.html" . }}