mirror of
https://github.com/jbowdre/runtimeterror.git
synced 2024-11-09 17:42:19 +00:00
update post with apple ai bot name, formatting fixes
This commit is contained in:
parent
be389e3895
commit
eb3a9d443c
1 changed files with 16 additions and 9 deletions
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
title: "Blocking AI Crawlers"
|
title: "Blocking AI Crawlers"
|
||||||
date: 2024-04-12
|
date: 2024-04-12
|
||||||
lastmod: "2024-04-14T02:21:57Z"
|
lastmod: "2024-06-13T20:51:54Z"
|
||||||
description: "Using Hugo to politely ask AI bots to not steal my content - and then configuring Cloudflare's WAF to actively block them, just to be sure."
|
description: "Using Hugo to politely ask AI bots to not steal my content - and then configuring Cloudflare's WAF to actively block them, just to be sure."
|
||||||
featured: false
|
featured: false
|
||||||
toc: true
|
toc: true
|
||||||
|
@ -24,7 +24,7 @@ robots = [
|
||||||
"AdsBot-Google",
|
"AdsBot-Google",
|
||||||
"Amazonbot",
|
"Amazonbot",
|
||||||
"anthropic-ai",
|
"anthropic-ai",
|
||||||
"Applebot",
|
"Applebot-Extended",
|
||||||
"AwarioRssBot",
|
"AwarioRssBot",
|
||||||
"AwarioSmartBot",
|
"AwarioSmartBot",
|
||||||
"Bytespider",
|
"Bytespider",
|
||||||
|
@ -47,9 +47,6 @@ robots = [
|
||||||
"PerplexityBot",
|
"PerplexityBot",
|
||||||
"YouBot"
|
"YouBot"
|
||||||
]
|
]
|
||||||
|
|
||||||
[author]
|
|
||||||
name = "John Bowdre"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
I then created a new template in `layouts/robots.txt`:
|
I then created a new template in `layouts/robots.txt`:
|
||||||
|
@ -57,9 +54,14 @@ I then created a new template in `layouts/robots.txt`:
|
||||||
```text
|
```text
|
||||||
Sitemap: {{ .Site.BaseURL }}/sitemap.xml
|
Sitemap: {{ .Site.BaseURL }}/sitemap.xml
|
||||||
|
|
||||||
|
# hello robots [^_^]
|
||||||
|
# let's be friends <3
|
||||||
|
|
||||||
User-agent: *
|
User-agent: *
|
||||||
Disallow:
|
Disallow:
|
||||||
{{ range .Site.Params.robots }}
|
|
||||||
|
# except for these bots which are not friends:
|
||||||
|
{{ range .Site.Params.bad_robots }}
|
||||||
User-agent: {{ . }}
|
User-agent: {{ . }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
Disallow: /
|
Disallow: /
|
||||||
|
@ -74,15 +76,20 @@ enableRobotsTXT = true
|
||||||
Now Hugo will generate the following `robots.txt` file for me:
|
Now Hugo will generate the following `robots.txt` file for me:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
Sitemap: https://runtimeterror.dev//sitemap.xml
|
Sitemap: https://runtimeterror.dev/sitemap.xml
|
||||||
|
|
||||||
|
# hello robots [^_^]
|
||||||
|
# let's be friends <3
|
||||||
|
|
||||||
User-agent: *
|
User-agent: *
|
||||||
Disallow:
|
Disallow:
|
||||||
|
|
||||||
|
# except for these bots which are not friends:
|
||||||
|
|
||||||
User-agent: AdsBot-Google
|
User-agent: AdsBot-Google
|
||||||
User-agent: Amazonbot
|
User-agent: Amazonbot
|
||||||
User-agent: anthropic-ai
|
User-agent: anthropic-ai
|
||||||
User-agent: Applebot
|
User-agent: Applebot-Extended
|
||||||
User-agent: AwarioRssBot
|
User-agent: AwarioRssBot
|
||||||
User-agent: AwarioSmartBot
|
User-agent: AwarioSmartBot
|
||||||
User-agent: Bytespider
|
User-agent: Bytespider
|
||||||
|
@ -129,7 +136,7 @@ So I added a [WAF Custom Rule](https://developers.cloudflare.com/waf/custom-rule
|
||||||
Here's the expression I'm using:
|
Here's the expression I'm using:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
(http.user_agent contains "AdsBot-Google") or (http.user_agent contains "Amazonbot") or (http.user_agent contains "anthropic-ai") or (http.user_agent contains "Applebot") or (http.user_agent contains "AwarioRssBot") or (http.user_agent contains "AwarioSmartBot") or (http.user_agent contains "Bytespider") or (http.user_agent contains "CCBot") or (http.user_agent contains "ChatGPT-User") or (http.user_agent contains "ClaudeBot") or (http.user_agent contains "Claude-Web") or (http.user_agent contains "cohere-ai") or (http.user_agent contains "DataForSeoBot") or (http.user_agent contains "FacebookBot") or (http.user_agent contains "Google-Extended") or (http.user_agent contains "GoogleOther") or (http.user_agent contains "GPTBot") or (http.user_agent contains "ImagesiftBot") or (http.user_agent contains "magpie-crawler") or (http.user_agent contains "Meltwater") or (http.user_agent contains "omgili") or (http.user_agent contains "omgilibot") or (http.user_agent contains "peer39_crawler") or (http.user_agent contains "peer39_crawler/1.0") or (http.user_agent contains "PerplexityBot") or (http.user_agent contains "Seekr") or (http.user_agent contains "YouBot")
|
(http.user_agent contains "AdsBot-Google") or (http.user_agent contains "Amazonbot") or (http.user_agent contains "anthropic-ai") or (http.user_agent contains "Applebot-Extended") or (http.user_agent contains "AwarioRssBot") or (http.user_agent contains "AwarioSmartBot") or (http.user_agent contains "Bytespider") or (http.user_agent contains "CCBot") or (http.user_agent contains "ChatGPT-User") or (http.user_agent contains "ClaudeBot") or (http.user_agent contains "Claude-Web") or (http.user_agent contains "cohere-ai") or (http.user_agent contains "DataForSeoBot") or (http.user_agent contains "FacebookBot") or (http.user_agent contains "Google-Extended") or (http.user_agent contains "GoogleOther") or (http.user_agent contains "GPTBot") or (http.user_agent contains "ImagesiftBot") or (http.user_agent contains "magpie-crawler") or (http.user_agent contains "Meltwater") or (http.user_agent contains "omgili") or (http.user_agent contains "omgilibot") or (http.user_agent contains "peer39_crawler") or (http.user_agent contains "peer39_crawler/1.0") or (http.user_agent contains "PerplexityBot") or (http.user_agent contains "Seekr") or (http.user_agent contains "YouBot")
|
||||||
```
|
```
|
||||||
|
|
||||||
![Creating a custom WAF rule in Cloudflare's web UI](cloudflare-waf-rule.png)
|
![Creating a custom WAF rule in Cloudflare's web UI](cloudflare-waf-rule.png)
|
||||||
|
|
Loading…
Reference in a new issue