Skip to content

Commit

Permalink
[submodule:helpers] Add new method isAIBot(): detect AI bots
Browse files Browse the repository at this point in the history
  • Loading branch information
faisalman committed Nov 16, 2024
1 parent 5b375b9 commit 70b3003
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 6 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,15 @@ see what's new & breaking.
<td>✅</td>
</tr>
<tr>
<td>Extras (Apps, Libs, Emails, Media Players, etc)</td>
<td>AI Bot detection</td>
<td>❌</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
</tr>
<tr>
<td>Extras (Apps, Libs, Emails, Media Players, etc) detection</td>
<td>❌</td>
<td>✅</td>
<td>✅</td>
Expand Down
8 changes: 4 additions & 4 deletions src/extensions/ua-parser-extensions.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ const Crawlers = Object.freeze({
// Yeti (Naver)
/(yeti)\/([\w\.]+)/i,

// aiHitBot / Cohere-AI / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
/((?:aihit|diff|timpi|you)bot|cohere-ai|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i
// aiHitBot / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
/((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i
],

[NAME, VERSION, [TYPE, CRAWLER]],
Expand Down Expand Up @@ -241,8 +241,8 @@ const Fetchers = Object.freeze({
],
[NAME, VERSION, [TYPE, FETCHER]],

// Google Bots / Snapchat / Vercelbot
[/(vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
// Google Bots / Cohere / Snapchat / Vercelbot
[/(cohere-ai|vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
[NAME, [TYPE, FETCHER]],
]
});
Expand Down
2 changes: 2 additions & 0 deletions src/helpers/ua-parser-helpers.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { IResult } from "../main/ua-parser";

declare function getDeviceVendor(model: string): string | undefined;
declare function isAppleSilicon(resultOrUA: IResult | string): boolean;
declare function isAIBot(resultOrUA: IResult | string): boolean;
declare function isBot(resultOrUA: IResult | string): boolean;
declare function isChromeFamily(resultOrUA: IResult | string): boolean;
declare function isElectron(): boolean;
Expand All @@ -16,6 +17,7 @@ declare function isStandalonePWA(): boolean;
export {
getDeviceVendor,
isAppleSilicon,
isAIBot,
isBot,
isChromeFamily,
isElectron,
Expand Down
72 changes: 72 additions & 0 deletions src/helpers/ua-parser-helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,77 @@ const isAppleSilicon = (resultOrUA) => {
return false;
}

const isAIBot = (resultOrUA) => [

// AI2
'ai2bot',

// Amazon
'amazonbot',

// Anthropic
'anthropic-ai',
'claude-web',
'claudebot',

// Apple
'applebot',
'applebot-extended',

// ByteDance
'bytespider',

// Common Crawl
'ccbot',

// DataForSeo
'dataforseobot',

// Diffbot
'diffbot',

// Google
'googleother',
'googleother-image',
'googleother-video',
'google-extended',

// Hive AI
'imagesiftbot',

// Huawei
'petalbot',

// Meta
'facebookbot',
'meta-externalagent',

// OpenAI
'gptbot',
'oai-searchbot',

// Perplexity
'perplexitybot',

// Timpi
'timpibot',

// Velen.io
'velenpublicwebcrawler',

// Webz.io
'omgili',
'omgilibot',
'webzio-extended',

// You.com
'youbot',

// Zyte
'scrapy'

].includes(String(toResult(resultOrUA, Bots).browser.name).toLowerCase());

const isBot = (resultOrUA) => [
'cli',
'crawler',
Expand All @@ -56,6 +127,7 @@ const isElectron = () => !!(process?.versions?.hasOwnProperty('electron') ||
module.exports = {
getDeviceVendor,
isAppleSilicon,
isAIBot,
isBot,
isChromeFamily,
isElectron,
Expand Down
16 changes: 15 additions & 1 deletion test/mocha-test-helpers.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const assert = require('assert');
const { UAParser } = require('../src/main/ua-parser');
const { getDeviceVendor, isAppleSilicon, isBot, isChromeFamily } = require('../src/helpers/ua-parser-helpers');
const { getDeviceVendor, isAppleSilicon, isAIBot, isBot, isChromeFamily } = require('../src/helpers/ua-parser-helpers');
const { Bots, Emails } = require('../src/extensions/ua-parser-extensions');

describe('getDeviceVendor', () => {
Expand Down Expand Up @@ -34,6 +34,20 @@ describe('isAppleSilicon', () => {
});
});

describe('isAIBot', () => {
it('Can detect AI Bots', () => {

const claudeBot = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; [email protected])';
const firefox = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0';
const searchGPT = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot';

assert.equal(isAIBot(UAParser(claudeBot, Bots)), true);
assert.equal(isAIBot(claudeBot), true);
assert.equal(isAIBot(firefox), false);
assert.equal(isAIBot(searchGPT), true);
});
});

describe('isBot', () => {
it('Can detect Bots', () => {

Expand Down

0 comments on commit 70b3003

Please sign in to comment.