diff --git a/src/module-elasticsuite-tracker/Helper/BotDetector.php b/src/module-elasticsuite-tracker/Helper/BotDetector.php new file mode 100644 index 000000000..5dde35e6a --- /dev/null +++ b/src/module-elasticsuite-tracker/Helper/BotDetector.php @@ -0,0 +1,78 @@ + + * @copyright 2025 Smile + * @license Open Software License ("OSL") v. 3.0 + */ + +namespace Smile\ElasticsuiteTracker\Helper; + +use Magento\Framework\App\Helper\AbstractHelper; +use Magento\Framework\App\Helper\Context; +use Magento\Framework\App\Request\Http; + +/** + * BotDetector helper. + * + * @category Smile + * @package Smile\ElasticsuiteTracker + */ +class BotDetector extends AbstractHelper +{ + /** + * @var Http + */ + protected $request; + + /** + * @var array + */ + protected $botUserAgents = []; + + /** + * Constructor. + * + * @param Context $context Context. + * @param Http $request HTTP request. + * @param array $botUserAgents Bot user agents list. + */ + public function __construct(Context $context, Http $request, $botUserAgents = []) + { + parent::__construct($context); + $this->request = $request; + $this->botUserAgents = $botUserAgents; + } + + /** + * Get the current user agent from the request + * + * @return string + */ + public function getUserAgent() + { + return $this->request->getHeader('User-Agent') ?: ''; + } + + /** + * Check if the current user agent belongs to a bot + * + * @return bool + */ + public function isBot() + { + $userAgent = strtolower($this->getUserAgent()); + foreach ($this->botUserAgents as $bot) { + if (strpos($userAgent, $bot) !== false) { + return true; + } + } + + return false; + } +} diff --git a/src/module-elasticsuite-tracker/Helper/Data.php b/src/module-elasticsuite-tracker/Helper/Data.php index bd2012b89..80b976668 100644 --- a/src/module-elasticsuite-tracker/Helper/Data.php +++ b/src/module-elasticsuite-tracker/Helper/Data.php @@ -72,6 +72,12 @@ class Data extends \Magento\Framework\App\Helper\AbstractHelper */ const CONFIG_IS_HEADLESS_MODE_XPATH = 'smile_elasticsuite_tracker/general/is_headless_mode'; + /** + * Whether to filter/ignore tracker hits coming from known bots configuration path + * @var string + */ + const CONFIG_IS_FILTERING_BOT_HITS_XPATH = 'smile_elasticsuite_tracker/general/filter_bot_hits'; + /** * Anonymization status configuration path * @var string @@ -249,6 +255,16 @@ public function getEventsQueueCleanupDelay() return (int) $this->scopeConfig->getValue(self::CONFIG_QUEUE_CLEANUP_DELAY_XPATH); } + /** + * Returns true if tracker hits generated by known bots should be preemptively ignored. + * + * @return bool + */ + public function isFilteringBotHits() + { + return $this->scopeConfig->isSetFlag(self::CONFIG_IS_FILTERING_BOT_HITS_XPATH); + } + /** * Return the current tracker visitor id * diff --git a/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php b/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php index 087c625c6..05802d20d 100644 --- a/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php +++ b/src/module-elasticsuite-tracker/Model/Customer/TrackingService.php @@ -42,6 +42,11 @@ class TrackingService implements \Smile\ElasticsuiteTracker\Api\CustomerTracking */ private $eventQueue; + /** + * @var \Smile\ElasticsuiteTracker\Helper\BotDetector + */ + private $botDetector; + /** * Constructor. * @@ -49,17 +54,20 @@ class TrackingService implements \Smile\ElasticsuiteTracker\Api\CustomerTracking * @param \Smile\ElasticsuiteTracker\Helper\Data $helper Tracking Helper. * @param \Smile\ElasticsuiteTracker\Api\EventQueueInterface $eventQueue Event Queue. * @param \Magento\Customer\Model\Session $customerSession Customer Session. + * @param \Smile\ElasticsuiteTracker\Helper\BotDetector $botDetector Bot detector. */ public function __construct( \Smile\ElasticsuiteTracker\Model\ResourceModel\CustomerLink $customerLinkResource, \Smile\ElasticsuiteTracker\Helper\Data $helper, \Smile\ElasticsuiteTracker\Api\EventQueueInterface $eventQueue, - \Magento\Customer\Model\Session $customerSession + \Magento\Customer\Model\Session $customerSession, + \Smile\ElasticsuiteTracker\Helper\BotDetector $botDetector ) { $this->customerLinkResource = $customerLinkResource; $this->helper = $helper; $this->customerSession = $customerSession; $this->eventQueue = $eventQueue; + $this->botDetector = $botDetector; } /** @@ -76,6 +84,9 @@ public function hit($eventData): void public function addEvent($eventData) { if ($this->helper->isEnabled()) { + if ($this->helper->isFilteringBotHits() && $this->botDetector->isBot()) { + return; + } $this->addCustomerLink($eventData); $this->eventQueue->addEvent($eventData); } diff --git a/src/module-elasticsuite-tracker/etc/adminhtml/system.xml b/src/module-elasticsuite-tracker/etc/adminhtml/system.xml index 4fc72dd28..aebb3db47 100644 --- a/src/module-elasticsuite-tracker/etc/adminhtml/system.xml +++ b/src/module-elasticsuite-tracker/etc/adminhtml/system.xml @@ -50,6 +50,11 @@ 1 + + + Magento\Config\Model\Config\Source\Yesno + + diff --git a/src/module-elasticsuite-tracker/etc/config.xml b/src/module-elasticsuite-tracker/etc/config.xml index f8265f677..584030a1a 100644 --- a/src/module-elasticsuite-tracker/etc/config.xml +++ b/src/module-elasticsuite-tracker/etc/config.xml @@ -23,6 +23,7 @@ 12 0 0 + 1 STUID diff --git a/src/module-elasticsuite-tracker/etc/di.xml b/src/module-elasticsuite-tracker/etc/di.xml index ef6066da5..3ea5732da 100644 --- a/src/module-elasticsuite-tracker/etc/di.xml +++ b/src/module-elasticsuite-tracker/etc/di.xml @@ -130,4 +130,31 @@ + + + + + googlebot + adsbot-google + feedfetcher-google + mediapartners-google + mediapartners (googlebot) + apis-google + google-inspectiontool + storebot-google + googleother + bingbot + slurp + linkedinbot + yandex + baiduspider + openai + duckduckbot + facebot + twitterbot + ia_archiver + + + + diff --git a/src/module-elasticsuite-tracker/i18n/de_DE.csv b/src/module-elasticsuite-tracker/i18n/de_DE.csv index 82b7e2271..24e787fba 100644 --- a/src/module-elasticsuite-tracker/i18n/de_DE.csv +++ b/src/module-elasticsuite-tracker/i18n/de_DE.csv @@ -33,5 +33,6 @@ "Delay","Verzögerung" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In Tagen. (Standard ist 7 Tage.)
Ungültige Tracker-Ereignisse, die älter als diese Verzögerung sind, werden periodisch aus der Warteschlange für Ereignisse entfernt." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","Sie können diesen Button anklicken, um alle ungültigen Tracker-Ereignisse unabhängig von der obigen Verzögerungseinstellung sofort aus der Indexierungsphase zu entfernen." -"","" -"","" +"Bot handling","Handhabung von Robotern" +"Filter bot hits","Filtern Sie Robotertreffer" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Wenn diese Option aktiviert ist, wird verhindert, dass bekannte IA- oder von Suchmaschinen-Crawler-Bots generierte Tracker-Treffer aufgezeichnet werden, um zu verhindern, dass Ihre Verhaltensdaten durch manchmal völlig inkohärente Paginierungs- oder Ergebniszählungsdaten verunreinigt werden." diff --git a/src/module-elasticsuite-tracker/i18n/en_US.csv b/src/module-elasticsuite-tracker/i18n/en_US.csv index 5ed831dcd..63ac348ff 100644 --- a/src/module-elasticsuite-tracker/i18n/en_US.csv +++ b/src/module-elasticsuite-tracker/i18n/en_US.csv @@ -33,5 +33,6 @@ Enabled,Enabled "Delay","Delay" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above." - - +"Bot Handling","Bot Handling" +"Filter bot hits","Filter bot hits" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data." diff --git a/src/module-elasticsuite-tracker/i18n/fr_FR.csv b/src/module-elasticsuite-tracker/i18n/fr_FR.csv index ca1be9fe6..73027beef 100644 --- a/src/module-elasticsuite-tracker/i18n/fr_FR.csv +++ b/src/module-elasticsuite-tracker/i18n/fr_FR.csv @@ -33,3 +33,6 @@ Tracking,Tracking "Delay","Délai" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","En jours. (Par défaut 7 jours.)
Les évènements invalides du tracker plus anciens que ce délai seront périodiquement supprimés de la file d'attente d'indexation des évènements." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","Vous pouvez cliquer sur ce bouton pour supprimer immédiatement tous les évènements invalides du tracker de la file d'attente d'indexation. Le paramètre ""Délai"" ci-dessus est ignoré." +"Bot Handling","Gestion des Robots" +"Filter bot hits","Filtrer les hits de robots" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Si activé, les hits du tracker générés par des robots de moteurs de recherche ou d'IA ne seront pas enregistrés afin d'empêcher la pollution de vos données comportementales par des données de nombre de résultats ou de pagination parfois totalement incohérents." diff --git a/src/module-elasticsuite-tracker/i18n/nl_NL.csv b/src/module-elasticsuite-tracker/i18n/nl_NL.csv index 96cc67297..742d08108 100644 --- a/src/module-elasticsuite-tracker/i18n/nl_NL.csv +++ b/src/module-elasticsuite-tracker/i18n/nl_NL.csv @@ -33,5 +33,6 @@ "Delay","Vertraging" "In days. (Default is 7 days.)
Invalid tracker events older than this delay are periodically removed from the events indexing queue.","In dagen. (Standaard is 7 dagen.)
Ongeldige tracker events ouder dan deze vertraging worden periodiek verwijderd uit de wachtrij voor indexering van evenementen." "You can click this button to immediately remove all the invalid tracker events from the indexing queue, regardless of the Delay setting above.","U kunt op deze knop klikken om onmiddellijk alle ongeldige trackergebeurtenissen uit de indexeringswachtrij te verwijderen, ongeacht de hierboven ingestelde vertraging." -"","" -"","" +"Bot Handling","Robotbediening" +"Filter bot hits","Filter robottreffers" +"If enabled, it will prevent known IA or search engines crawler bots generated tracker hits from being recorded to prevent your behavioral data from being polluted by sometimes totally incoherent pagination or results count data.","Indien ingeschakeld, voorkomt het dat door IA of zoekmachines gegenereerde tracker-hits worden geregistreerd om te voorkomen dat uw gedragsgegevens worden vervuild door soms totaal onsamenhangende paginering of gegevens over het aantal resultaten."