# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# -----------------------------------------------------------------------------
# 1. ALLOW AI SEARCH ENGINES (Traffic Sources)
# -----------------------------------------------------------------------------
# Explicitly allow these so your news appears in AI answers.

# OpenAI Search (SearchGPT) - Live answers, not training
User-agent: OAI-SearchBot
Allow: /

# Perplexity AI - Citation-based answer engine
User-agent: PerplexityBot
Allow: /

# Google Search (Standard)
User-agent: Googlebot
Allow: /

# Google News
User-agent: Googlebot-News
Allow: /

# Bing Search (Powers Microsoft Copilot)
User-agent: Bingbot
Allow: /

# -----------------------------------------------------------------------------
# 2. ALLOW SOCIAL PREVIEWS (Viral Traffic)
# -----------------------------------------------------------------------------
# Essential for nice images/headlines when links are shared on social.

# Facebook / Meta (Link Previews only)
User-agent: facebookexternalhit
Allow: /

# X (Twitter) Cards
User-agent: Twitterbot
Allow: /

# LinkedIn
User-agent: LinkedInBot
Allow: /

# Slack (Link Unfurling)
User-agent: Slackbot-LinkExpanding
Allow: /

# Discord (Link Previews)
User-agent: Discordbot
Allow: /

# -----------------------------------------------------------------------------
# 3. BLOCK AI TRAINING & SCRAPERS (Data Safety)
# -----------------------------------------------------------------------------
# Block the bots that only take data for training without sending traffic.

# OpenAI Training (ChatGPT Model Training)
User-agent: GPTBot
Disallow: /

# Google Gemini Training
User-agent: Google-Extended
Disallow: /

# Apple Intelligence Training
User-agent: Applebot-Extended
Disallow: /

# Anthropic (Claude) Training
User-agent: ClaudeBot
Disallow: /

# Common Crawl (Used by many unauthorized AI models)
User-agent: CCBot
Disallow: /

# ByteDance (TikTok/Doubao) - Aggressive scraper
User-agent: Bytespider
Disallow: /

# Amazon (Alexa/Training)
User-agent: Amazonbot
Disallow: /

# Meta/Facebook Training Crawler
User-agent: meta-externalagent
Disallow: /

# -----------------------------------------------------------------------------
# 4. GENERAL FALLBACK
# -----------------------------------------------------------------------------
# Allow all other well-behaved bots.
User-agent: *
Allow: /

Sitemap: https://www.cbtnews.com/sitemap_index.xml

apps-fileview.texmex_20251211.00_p1
robots.txt
Displaying robots.txt.