This repository contains a comprehensive regex pattern for detecting AI and tech company domains based on their services domains.
/.*(perplexity\.ai|you\.com|searchgpt\.com|chatgpt\.com|chat\.openai\.com|claude\.ai|anthropic\.com|gemini\.google\.com|bard\.google\.com|bing\.com.*chat|copilot\.microsoft\.com|phind\.com|kagi\.com|deepseek\.com|chat\.deepseek\.com|mistral\.ai|chat\.mistral\.ai|lechat\.mistral\.ai|cohere\.com|meta\.ai|groq\.com|together\.ai|fireworks\.ai|openrouter\.ai|grok\.x\.com|x\.com.*grok|pi\.ai|inflection\.ai|cerebras\.ai|openai\.com|google\.com.*bard|google\.com.*gemini|microsoft\.com.*copilot).*/i
- The regex starts with
.*
which matches any characters before the domain - All patterns will match the specified domain/subdomain plus any path that follows
- Patterns with
.*
in the middle (likebing\.com.*chat
) match the domain followed by any characters, then the specified text - The
i
flag makes the entire regex case-insensitive - The "Effective Pattern" column shows how each individual pattern works within the full
.*(...).*/i
structure
const aiDomainsRegex = /.*(perplexity\.ai|you\.com|searchgpt\.com|chatgpt\.com|chat\.openai\.com|claude\.ai|anthropic\.com|gemini\.google\.com|bard\.google\.com|bing\.com.*chat|copilot\.microsoft\.com|phind\.com|kagi\.com|deepseek\.com|chat\.deepseek\.com|mistral\.ai|chat\.mistral\.ai|lechat\.mistral\.ai|cohere\.com|meta\.ai|groq\.com|together\.ai|fireworks\.ai|openrouter\.ai|grok\.x\.com|x\.com.*grok|pi\.ai|inflection\.ai|cerebras\.ai|openai\.com|google\.com.*bard|google\.com.*gemini|microsoft\.com.*copilot).*/i;
// Test URLs
console.log(aiDomainsRegex.test('https://chat.openai.com/chat')); // true
console.log(aiDomainsRegex.test('https://claude.ai')); // true
console.log(aiDomainsRegex.test('https://bing.com/chat')); // true
console.log(aiDomainsRegex.test('https://example.com')); // false
// Extract matches
const url = 'https://chat.openai.com/chat';
const match = url.match(aiDomainsRegex);
console.log(match ? match[1] : 'No match'); // chat.openai.com
import re
ai_domains_pattern = r'.*(perplexity\.ai|you\.com|searchgpt\.com|chatgpt\.com|chat\.openai\.com|claude\.ai|anthropic\.com|gemini\.google\.com|bard\.google\.com|bing\.com.*chat|copilot\.microsoft\.com|phind\.com|kagi\.com|deepseek\.com|chat\.deepseek\.com|mistral\.ai|chat\.mistral\.ai|lechat\.mistral\.ai|cohere\.com|meta\.ai|groq\.com|together\.ai|fireworks\.ai|openrouter\.ai|grok\.x\.com|x\.com.*grok|pi\.ai|inflection\.ai|cerebras\.ai|openai\.com|google\.com.*bard|google\.com.*gemini|microsoft\.com.*copilot).*'
ai_domains_regex = re.compile(ai_domains_pattern, re.IGNORECASE)
# Test URLs
test_urls = [
'https://chat.openai.com/chat',
'https://claude.ai',
'https://bing.com/chat',
'https://example.com'
]
for url in test_urls:
if ai_domains_regex.match(url):
print(f"✓ {url} matches AI domain pattern")
else:
print(f"✗ {url} does not match")
# Extract AI domain from URL
def extract_ai_domain(url):
match = ai_domains_regex.match(url)
return match.group(1) if match else None
print(extract_ai_domain('https://chat.openai.com/chat')) # chat.openai.com
library(stringr)
ai_domains_pattern <- ".*(perplexity\\.ai|you\\.com|searchgpt\\.com|chatgpt\\.com|chat\\.openai\\.com|claude\\.ai|anthropic\\.com|gemini\\.google\\.com|bard\\.google\\.com|bing\\.com.*chat|copilot\\.microsoft\\.com|phind\\.com|kagi\\.com|deepseek\\.com|chat\\.deepseek\\.com|mistral\\.ai|chat\\.mistral\\.ai|lechat\\.mistral\\.ai|cohere\\.com|meta\\.ai|groq\\.com|together\\.ai|fireworks\\.ai|openrouter\\.ai|grok\\.x\\.com|x\\.com.*grok|pi\\.ai|inflection\\.ai|cerebras\\.ai|openai\\.com|google\\.com.*bard|google\\.com.*gemini|microsoft\\.com.*copilot).*"
# Test URLs
test_urls <- c(
"https://chat.openai.com/chat",
"https://claude.ai",
"https://bing.com/chat",
"https://example.com"
)
# Check matches
results <- str_detect(test_urls, regex(ai_domains_pattern, ignore_case = TRUE))
names(results) <- test_urls
print(results)
# Extract AI domains
extract_ai_domain <- function(url) {
match <- str_match(url, regex(ai_domains_pattern, ignore_case = TRUE))
return(match[2])
}
sapply(test_urls, extract_ai_domain)
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class AIDomainMatcher {
private static final String AI_DOMAINS_PATTERN =
".*(perplexity\\.ai|you\\.com|searchgpt\\.com|chatgpt\\.com|chat\\.openai\\.com|claude\\.ai|anthropic\\.com|gemini\\.google\\.com|bard\\.google\\.com|bing\\.com.*chat|copilot\\.microsoft\\.com|phind\\.com|kagi\\.com|deepseek\\.com|chat\\.deepseek\\.com|mistral\\.ai|chat\\.mistral\\.ai|lechat\\.mistral\\.ai|cohere\\.com|meta\\.ai|groq\\.com|together\\.ai|fireworks\\.ai|openrouter\\.ai|grok\\.x\\.com|x\\.com.*grok|pi\\.ai|inflection\\.ai|cerebras\\.ai|openai\\.com|google\\.com.*bard|google\\.com.*gemini|microsoft\\.com.*copilot).*";
private static final Pattern pattern = Pattern.compile(AI_DOMAINS_PATTERN, Pattern.CASE_INSENSITIVE);
public static boolean isAIDomain(String url) {
return pattern.matcher(url).matches();
}
public static String extractAIDomain(String url) {
Matcher matcher = pattern.matcher(url);
return matcher.matches() ? matcher.group(1) : null;
}
public static void main(String[] args) {
String[] testUrls = {
"https://chat.openai.com/chat",
"https://claude.ai",
"https://bing.com/chat",
"https://example.com"
};
for (String url : testUrls) {
System.out.println(url + " -> " + isAIDomain(url));
System.out.println("Extracted: " + extractAIDomain(url));
}
}
}
<?php
$ai_domains_pattern = '/.*(perplexity\.ai|you\.com|searchgpt\.com|chatgpt\.com|chat\.openai\.com|claude\.ai|anthropic\.com|gemini\.google\.com|bard\.google\.com|bing\.com.*chat|copilot\.microsoft\.com|phind\.com|kagi\.com|deepseek\.com|chat\.deepseek\.com|mistral\.ai|chat\.mistral\.ai|lechat\.mistral\.ai|cohere\.com|meta\.ai|groq\.com|together\.ai|fireworks\.ai|openrouter\.ai|grok\.x\.com|x\.com.*grok|pi\.ai|inflection\.ai|cerebras\.ai|openai\.com|google\.com.*bard|google\.com.*gemini|microsoft\.com.*copilot).*/i';
$test_urls = [
'https://chat.openai.com/chat',
'https://claude.ai',
'https://bing.com/chat',
'https://example.com'
];
foreach ($test_urls as $url) {
if (preg_match($ai_domains_pattern, $url, $matches)) {
echo "✓ $url matches AI domain: {$matches[1]}\n";
} else {
echo "✗ $url does not match\n";
}
}
function extractAIDomain($url) {
global $ai_domains_pattern;
return preg_match($ai_domains_pattern, $url, $matches) ? $matches[1] : null;
}
?>
package main
import (
"fmt"
"regexp"
)
func main() {
aiDomainsPattern := `(?i).*(perplexity\.ai|you\.com|searchgpt\.com|chatgpt\.com|chat\.openai\.com|claude\.ai|anthropic\.com|gemini\.google\.com|bard\.google\.com|bing\.com.*chat|copilot\.microsoft\.com|phind\.com|kagi\.com|deepseek\.com|chat\.deepseek\.com|mistral\.ai|chat\.mistral\.ai|lechat\.mistral\.ai|cohere\.com|meta\.ai|groq\.com|together\.ai|fireworks\.ai|openrouter\.ai|grok\.x\.com|x\.com.*grok|pi\.ai|inflection\.ai|cerebras\.ai|openai\.com|google\.com.*bard|google\.com.*gemini|microsoft\.com.*copilot).*`
regex := regexp.MustCompile(aiDomainsPattern)
testUrls := []string{
"https://chat.openai.com/chat",
"https://claude.ai",
"https://bing.com/chat",
"https://example.com",
}
for _, url := range testUrls {
if regex.MatchString(url) {
matches := regex.FindStringSubmatch(url)
fmt.Printf("✓ %s matches AI domain: %s\n", url, matches[1])
} else {
fmt.Printf("✗ %s does not match\n", url)
}
}
}
using System;
using System.Text.RegularExpressions;
class Program
{
private static readonly string AiDomainsPattern =
@".*(perplexity\.ai|you\.com|searchgpt\.com|chatgpt\.com|chat\.openai\.com|claude\.ai|anthropic\.com|gemini\.google\.com|bard\.google\.com|bing\.com.*chat|copilot\.microsoft\.com|phind\.com|kagi\.com|deepseek\.com|chat\.deepseek\.com|mistral\.ai|chat\.mistral\.ai|lechat\.mistral\.ai|cohere\.com|meta\.ai|groq\.com|together\.ai|fireworks\.ai|openrouter\.ai|grok\.x\.com|x\.com.*grok|pi\.ai|inflection\.ai|cerebras\.ai|openai\.com|google\.com.*bard|google\.com.*gemini|microsoft\.com.*copilot).*";
private static readonly Regex regex = new Regex(AiDomainsPattern, RegexOptions.IgnoreCase);
static void Main()
{
string[] testUrls = {
"https://chat.openai.com/chat",
"https://claude.ai",
"https://bing.com/chat",
"https://example.com"
};
foreach (string url in testUrls)
{
Match match = regex.Match(url);
if (match.Success)
{
Console.WriteLine($"✓ {url} matches AI domain: {match.Groups[1].Value}");
}
else
{
Console.WriteLine($"✗ {url} does not match");
}
}
}
}
- AI Assistants: 12 patterns
- AI Companies: 8 patterns
- Search Engines: 3 patterns
- AI Platforms: 4 patterns
- AI Hardware: 1 pattern
- AI Inference: 1 pattern
Total: 32 unique patterns covering major AI and tech domains