# Earthquake Archive - robots.txt # This file guides search engines ONLY # Real security is handled by .htaccess User-agent: * Allow: / # Block duplicate content for SEO (NOT security) # Block search query parameters to prevent index bloat Disallow: /search?* Disallow: /*?* # Block error pages from indexing Disallow: /404 Disallow: /403 Disallow: /500 # ====== Allow All AI Bots ====== # These bots crawl for AI training and knowledge bases # OpenAI (ChatGPT) User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / # Anthropic (Claude) User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # Google Extended (for Bard/Gemini) User-agent: Google-Extended Allow: / # Apple Intelligence User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Meta AI User-agent: FacebookBot Allow: / User-agent: Meta-ExternalAgent Allow: / # Cohere AI User-agent: cohere-ai Allow: / # Common Crawl (research dataset) User-agent: CCBot Allow: / # Diffbot User-agent: Diffbot Allow: / # Bytedance (TikTok) User-agent: Bytespider Allow: / # ====== Sitemap Location ====== Sitemap: https://earthquakearchive.com/sitemap.xml