mirror of
https://github.com/jbowdre/runtimeterror.git
synced 2024-11-21 22:42:19 +00:00
generate robots.txt from darkvisitors api
This commit is contained in:
parent
b15c943f0b
commit
ff4c91e6f6
6 changed files with 41 additions and 33 deletions
5
.github/workflows/deploy-prod.yml
vendored
5
.github/workflows/deploy-prod.yml
vendored
|
@ -45,7 +45,10 @@ jobs:
|
||||||
echo "${{ secrets.SSH_KNOWN_HOSTS }}" > ~/.ssh/known_hosts
|
echo "${{ secrets.SSH_KNOWN_HOSTS }}" > ~/.ssh/known_hosts
|
||||||
chmod 644 ~/.ssh/known_hosts
|
chmod 644 ~/.ssh/known_hosts
|
||||||
- name: Build with Hugo
|
- name: Build with Hugo
|
||||||
run: HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} hugo --minify
|
run: |
|
||||||
|
HUGO_REMOTE_FONT_PATH=${{ secrets.REMOTE_FONT_PATH }} \
|
||||||
|
HUGO_DARKVISITORS=${{ secrets.DARKVISITORS_TOKEN }} \
|
||||||
|
hugo --minify
|
||||||
- name: Insert 404 page
|
- name: Insert 404 page
|
||||||
run: |
|
run: |
|
||||||
mkdir -p public/bunnycdn_errors
|
mkdir -p public/bunnycdn_errors
|
||||||
|
|
|
@ -8,32 +8,10 @@ numberOfRelatedPosts = 5
|
||||||
|
|
||||||
indexTitle = ".-. ..- -. - .. -- . - . .-. .-. --- .-."
|
indexTitle = ".-. ..- -. - .. -- . - . .-. .-. --- .-."
|
||||||
|
|
||||||
bad_robots = [
|
darkVisitors = [
|
||||||
"AdsBot-Google",
|
"AI Assistant",
|
||||||
"Amazonbot",
|
"AI Data Scraper",
|
||||||
"anthropic-ai",
|
"AI Search Crawler"
|
||||||
"Applebot-Extended",
|
|
||||||
"AwarioRssBot",
|
|
||||||
"AwarioSmartBot",
|
|
||||||
"Bytespider",
|
|
||||||
"CCBot",
|
|
||||||
"ChatGPT",
|
|
||||||
"ChatGPT-User",
|
|
||||||
"Claude-Web",
|
|
||||||
"ClaudeBot",
|
|
||||||
"cohere-ai",
|
|
||||||
"DataForSeoBot",
|
|
||||||
"Diffbot",
|
|
||||||
"FacebookBot",
|
|
||||||
"Google-Extended",
|
|
||||||
"GPTBot",
|
|
||||||
"ImagesiftBot",
|
|
||||||
"magpie-crawler",
|
|
||||||
"omgili",
|
|
||||||
"Omgilibot",
|
|
||||||
"peer39_crawler",
|
|
||||||
"PerplexityBot",
|
|
||||||
"YouBot"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Comments
|
# Comments
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
baseURL = "http://localhost:1313/"
|
baseURL = "http://localhost:1313/"
|
||||||
|
enableRobotsTXT = false
|
|
@ -1 +1,2 @@
|
||||||
baseURL = "https://preview.runtimeterror.dev/"
|
baseURL = "https://preview.runtimeterror.dev/"
|
||||||
|
enableRobotsTXT = false
|
27
layouts/partials/dark-visitors.html
Normal file
27
layouts/partials/dark-visitors.html
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
{{/* borrowed from https://github.com/lkhrs/hugo-dark-visitors */}}
|
||||||
|
{{- $url := "https://api.darkvisitors.com/robots-txts" -}}
|
||||||
|
{{- $api_key := getenv "HUGO_DARKVISITORS" -}}
|
||||||
|
{{- $bearer := printf "Bearer %v" $api_key -}}
|
||||||
|
{{- $agent_types := slice -}}
|
||||||
|
{{- if .Site.Params.darkVisitors -}}
|
||||||
|
{{- range .Site.Params.darkVisitors -}}
|
||||||
|
{{- $agent_types = $agent_types | append . -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- $agent_types = slice "AI Data Scraper" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- $agent_types := $agent_types | jsonify -}}
|
||||||
|
{{- $opts := dict
|
||||||
|
"method" "post"
|
||||||
|
"headers" (dict "Authorization" (slice $bearer) "Content-Type" "application/json")
|
||||||
|
"body" (printf `{"agent_types": %s,"disallow": "/"}` $agent_types)
|
||||||
|
-}}
|
||||||
|
{{- with resources.GetRemote $url $opts -}}
|
||||||
|
{{- with .Err -}}
|
||||||
|
{{- errorf "%s" . -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- .Content -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- errorf "Unable to get remote resource %q" $url -}}
|
||||||
|
{{- end -}}
|
|
@ -7,7 +7,5 @@ User-agent: *
|
||||||
Disallow:
|
Disallow:
|
||||||
|
|
||||||
# except for these bots which are not friends:
|
# except for these bots which are not friends:
|
||||||
{{ range .Site.Params.bad_robots }}
|
|
||||||
User-agent: {{ . }}
|
{{ partial "dark-visitors.html" . }}
|
||||||
{{- end }}
|
|
||||||
Disallow: /
|
|
||||||
|
|
Loading…
Reference in a new issue