Skip to content

Commit

Permalink
Tell AI bots they're not welcome
Browse files Browse the repository at this point in the history
  • Loading branch information
henriksommerfeld committed Nov 1, 2024
1 parent 6484808 commit 926f368
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 1 deletion.
14 changes: 14 additions & 0 deletions scripts/update-ai-robots.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash

set -eo pipefail

scriptDir=$(dirname -- "$(readlink -f -- "${BASH_SOURCE[0]}")")
robots_txt_path="$scriptDir/../static/robots.txt"
temp_file=$(mktemp)
trap 'rm -f $temp_file' 0 2 3 15
curl -s "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/robots.txt" -o "$temp_file"

lead='^# Begin ai.robots.txt$'
tail='^# End ai.robots.txt$'
sed --in-place -e "/$lead/,/$tail/{ /$lead/{p; r $temp_file
}; /$tail/p; d }" "$robots_txt_path"
47 changes: 46 additions & 1 deletion static/robots.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,49 @@

# Allow crawling of all content
User-agent: *
Disallow:
Disallow:

# Begin ai.robots.txt
User-agent: AI2Bot
User-agent: Ai2Bot-Dolma
User-agent: Amazonbot
User-agent: anthropic-ai
User-agent: Applebot
User-agent: Applebot-Extended
User-agent: Bytespider
User-agent: CCBot
User-agent: ChatGPT-User
User-agent: Claude-Web
User-agent: ClaudeBot
User-agent: cohere-ai
User-agent: Diffbot
User-agent: DuckAssistBot
User-agent: FacebookBot
User-agent: facebookexternalhit
User-agent: FriendlyCrawler
User-agent: Google-Extended
User-agent: GoogleOther
User-agent: GoogleOther-Image
User-agent: GoogleOther-Video
User-agent: GPTBot
User-agent: iaskspider/2.0
User-agent: ICC-Crawler
User-agent: ImagesiftBot
User-agent: img2dataset
User-agent: ISSCyberRiskCrawler
User-agent: Kangaroo Bot
User-agent: Meta-ExternalAgent
User-agent: Meta-ExternalFetcher
User-agent: OAI-SearchBot
User-agent: omgili
User-agent: omgilibot
User-agent: PerplexityBot
User-agent: PetalBot
User-agent: Scrapy
User-agent: Sidetrade indexer bot
User-agent: Timpibot
User-agent: VelenPublicWebCrawler
User-agent: Webzio-Extended
User-agent: YouBot
Disallow: /
# End ai.robots.txt

0 comments on commit 926f368

Please sign in to comment.