mirror of
https://github.com/jbowdre/runtimeterror.git
synced 2024-11-21 14:32:19 +00:00
generate robots.txt from darkvisitors api
This commit is contained in:
parent
b15c943f0b
commit
ff4c91e6f6
6 changed files with 41 additions and 33 deletions
5
.github/workflows/deploy-prod.yml
vendored
5
.github/workflows/deploy-prod.yml
vendored
|
@ -45,7 +45,10 @@ jobs:
|
|||
echo "${{ secrets.SSH_KNOWN_HOSTS }}" > ~/.ssh/known_hosts
|
||||
chmod 644 ~/.ssh/known_hosts
|
||||
- name: Build with Hugo
|
||||
run: HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} hugo --minify
|
||||
run: |
|
||||
HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} \
|
||||
HUGO_DARKVISITORS=${{ secrets.DARKVISITORS_TOKEN }} \
|
||||
hugo --minify
|
||||
- name: Insert 404 page
|
||||
run: |
|
||||
mkdir -p public/bunnycdn_errors
|
||||
|
|
|
@ -8,32 +8,10 @@ numberOfRelatedPosts = 5
|
|||
|
||||
indexTitle = ".-. ..- -. - .. -- . - . .-. .-. --- .-."
|
||||
|
||||
bad_robots = [
|
||||
"AdsBot-Google",
|
||||
"Amazonbot",
|
||||
"anthropic-ai",
|
||||
"Applebot-Extended",
|
||||
"AwarioRssBot",
|
||||
"AwarioSmartBot",
|
||||
"Bytespider",
|
||||
"CCBot",
|
||||
"ChatGPT",
|
||||
"ChatGPT-User",
|
||||
"Claude-Web",
|
||||
"ClaudeBot",
|
||||
"cohere-ai",
|
||||
"DataForSeoBot",
|
||||
"Diffbot",
|
||||
"FacebookBot",
|
||||
"Google-Extended",
|
||||
"GPTBot",
|
||||
"ImagesiftBot",
|
||||
"magpie-crawler",
|
||||
"omgili",
|
||||
"Omgilibot",
|
||||
"peer39_crawler",
|
||||
"PerplexityBot",
|
||||
"YouBot"
|
||||
darkVisitors = [
|
||||
"AI Assistant",
|
||||
"AI Data Scraper",
|
||||
"AI Search Crawler"
|
||||
]
|
||||
|
||||
# Comments
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
baseURL = "http://localhost:1313/"
|
||||
enableRobotsTXT = false
|
|
@ -1 +1,2 @@
|
|||
baseURL = "https://preview.runtimeterror.dev/"
|
||||
enableRobotsTXT = false
|
27
layouts/partials/dark-visitors.html
Normal file
27
layouts/partials/dark-visitors.html
Normal file
|
@ -0,0 +1,27 @@
|
|||
{{/* borrowed from https://github.com/lkhrs/hugo-dark-visitors */}}
|
||||
{{- $url := "https://api.darkvisitors.com/robots-txts" -}}
|
||||
{{- $api_key := getenv "HUGO_DARKVISITORS" -}}
|
||||
{{- $bearer := printf "Bearer %v" $api_key -}}
|
||||
{{- $agent_types := slice -}}
|
||||
{{- if .Site.Params.darkVisitors -}}
|
||||
{{- range .Site.Params.darkVisitors -}}
|
||||
{{- $agent_types = $agent_types | append . -}}
|
||||
{{- end -}}
|
||||
{{- else -}}
|
||||
{{- $agent_types = slice "AI Data Scraper" -}}
|
||||
{{- end -}}
|
||||
{{- $agent_types := $agent_types | jsonify -}}
|
||||
{{- $opts := dict
|
||||
"method" "post"
|
||||
"headers" (dict "Authorization" (slice $bearer) "Content-Type" "application/json")
|
||||
"body" (printf `{"agent_types": %s,"disallow": "/"}` $agent_types)
|
||||
-}}
|
||||
{{- with resources.GetRemote $url $opts -}}
|
||||
{{- with .Err -}}
|
||||
{{- errorf "%s" . -}}
|
||||
{{- else -}}
|
||||
{{- .Content -}}
|
||||
{{- end -}}
|
||||
{{- else -}}
|
||||
{{- errorf "Unable to get remote resource %q" $url -}}
|
||||
{{- end -}}
|
|
@ -7,7 +7,5 @@ User-agent: *
|
|||
Disallow:
|
||||
|
||||
# except for these bots which are not friends:
|
||||
{{ range .Site.Params.bad_robots }}
|
||||
User-agent: {{ . }}
|
||||
{{- end }}
|
||||
Disallow: /
|
||||
|
||||
{{ partial "dark-visitors.html" . }}
|
||||
|
|
Loading…
Reference in a new issue