-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add middleware to recognise organic search and social traffic (#7)
* Add middleware to recognise organic search and social traffic
- Loading branch information
1 parent
17c6cce
commit de36a82
Showing
10 changed files
with
334 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"AOL": ["aol.com"], | ||
"Baidu": ["baidu.com"], | ||
"Bing": ["bing.com"], | ||
"DuckDuckGo": ["duckduckgo.com"], | ||
"Google": ["google.*", "google.co.*", "google.com.*"], | ||
"Yahoo": ["*.yahoo.*"], | ||
"Yandex": ["yandex.com"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"Facebook": ["facebook.com", "fb.me", "m.facebook.com", "l.facebook.com"], | ||
"Hacker News": ["news.ycombinator.com"], | ||
"Instagram": ["instagram.com", "l.instagram.com"], | ||
"LinkedIn": ["linkedin.com", "lnkd.in"], | ||
"Pinterest": ["pinterest.*", "pinterest.co.*", "pinterest.com.*"], | ||
"reddit": ["reddit.com", "*.reddit.com"], | ||
"Snapchat": ["snapchat.com"], | ||
"TikTok": ["tiktok.com"], | ||
"tumblr": ["tumblr.com", "t.umblr.com"], | ||
"Twitter": ["twitter.com", "t.co", "x.com"], | ||
"YouTube": ["youtube.com", "youtu.be"], | ||
"Vimeo": ["vimeo.com"], | ||
"Weibo": ["weibo.com"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import { InteractionMiddleware, Interaction } from '../types'; | ||
import {domainToService} from "../domainToService"; | ||
import {ServiceDatabase} from "../domainToService"; | ||
|
||
/** | ||
* Note: this requires the automatic referral detection to be enabled. | ||
*/ | ||
export default class ReferralMapper { | ||
/** | ||
* Turns referrals from known domains into organic search traffic. | ||
*/ | ||
public static newSearchEngineMiddleware(searchEngines?: ServiceDatabase): InteractionMiddleware { | ||
searchEngines ??= require('../../data/search-engines-basic.json') as ServiceDatabase; | ||
|
||
return ReferralMapper.newReferralMapper('organic', searchEngines); | ||
} | ||
/** | ||
* Turns referrals from known domains into organic social traffic. | ||
*/ | ||
public static newSocialNetworkMiddleware(socialNetworks?: ServiceDatabase): InteractionMiddleware { | ||
socialNetworks ??= require('../../data/social-networks-basic.json') as ServiceDatabase; | ||
|
||
return ReferralMapper.newReferralMapper('social', socialNetworks); | ||
} | ||
|
||
private static newReferralMapper(mappedMedium: string, services: ServiceDatabase): InteractionMiddleware { | ||
return (interaction: Interaction): Interaction => { | ||
// If it is not a referral nothing needs to be done | ||
if (interaction.medium !== 'referral' || ! interaction.source) { | ||
return interaction; | ||
} | ||
|
||
// The automatic referral detection puts the referrer domain as the source, | ||
// so we compare the source to the domains of the network | ||
let sourceDomain = interaction.source.toLowerCase(); | ||
let serviceName = domainToService(sourceDomain, services); | ||
|
||
if (serviceName) { | ||
return { | ||
...interaction, | ||
source: serviceName, | ||
medium: mappedMedium, | ||
} | ||
} | ||
|
||
return interaction; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
export type ServiceDatabase = Record<string, Array<string>>; | ||
|
||
/** | ||
* Attempts to match a domain to a service in the ServiceDatabase. | ||
* Supports wildcard lookup and will ignore www subdomains. | ||
*/ | ||
export function domainToService(domain: string, services: ServiceDatabase): string | null { | ||
// strip the www subdomain if it's at the start, so we can ignore it | ||
domain = domain.replace(/^www\./i, ''); | ||
|
||
for (const [serviceName, serviceDomains] of Object.entries(services)) { | ||
for (let serviceDomain of serviceDomains) { | ||
const domainPattern = serviceDomain | ||
// Strip any leading www subdomain from the service domain because we also removed it from the lookup domain | ||
.replace(/^www\./, '') | ||
// Escape . to not accidentally match any character when using the domain as a regex | ||
.replace(/\./g, '\\.') | ||
// The * wildcard maps to a pattern allowing any characters valid in a domain (not .) | ||
// which is a naive subdomain regex but works well enough for most cases | ||
// Replace *. by an optional subdomain, meaning it also matches a missing subdomain | ||
.replace(/\*\\\./g, '([\\w\\d-]+\.)?') | ||
// Replace * by any characters valid in a domain (not .) | ||
.replace(/\*/g, '[\\w\\d-]+'); | ||
|
||
if (new RegExp(`^${domainPattern}$`, 'i').test(domain)) { | ||
return serviceName; | ||
} | ||
} | ||
} | ||
|
||
return null; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import {expect, test} from '@jest/globals'; | ||
import ReferralMapper from '../../src/InteractionMiddlewares/ReferralMapper'; | ||
|
||
test('it ignores non-referral interactions', async () => { | ||
const searchEngines = {google: ['www.google.com']}; | ||
const searchEngineMiddleware = ReferralMapper.newSearchEngineMiddleware(searchEngines); | ||
|
||
const initialInteraction = {medium: 'foo', source: 'www.google.com'}; | ||
|
||
expect(searchEngineMiddleware(initialInteraction)).toEqual(initialInteraction); | ||
}); | ||
|
||
test('it returns initial interaction on no match', async () => { | ||
const searchEngines = {google: ['www.google.com'], bing: ['www.bing.com']}; | ||
const searchEngineMiddleware = ReferralMapper.newSearchEngineMiddleware(searchEngines); | ||
|
||
const initialInteraction = {medium: 'referral', source: 'www.reddit.com'}; | ||
|
||
expect(searchEngineMiddleware(initialInteraction)).toEqual(initialInteraction); | ||
}); | ||
|
||
test('it attributes search engine referrals to organic search', async () => { | ||
const searchEngines = {google: ['www.google.com']}; | ||
const searchEngineMiddleware = ReferralMapper.newSearchEngineMiddleware(searchEngines); | ||
|
||
const initialInteraction = {medium: 'referral', source: 'www.google.com'}; | ||
const expectedInteraction = {medium: 'organic', source: 'google'}; | ||
|
||
expect(searchEngineMiddleware(initialInteraction)).toEqual(expectedInteraction); | ||
}); | ||
|
||
test('it attributes social network referrals to social', async () => { | ||
const socialNetworks = {facebook: ['www.facebook.com']}; | ||
const socialNetworkMiddleware = ReferralMapper.newSocialNetworkMiddleware(socialNetworks); | ||
|
||
const initialInteraction = {medium: 'referral', source: 'www.facebook.com'}; | ||
const expectedInteraction = {medium: 'social', source: 'facebook'}; | ||
|
||
expect(socialNetworkMiddleware(initialInteraction)).toEqual(expectedInteraction); | ||
}); | ||
|
||
|
||
test.each([ | ||
['www.aol.com', 'AOL'], | ||
['www.baidu.com', 'Baidu'], | ||
['www.bing.com', 'Bing'], | ||
['duckduckgo.com', 'DuckDuckGo'], | ||
['google.com', 'Google'], | ||
['www.google.com', 'Google'], | ||
['www.google.nl', 'Google'], | ||
['www.google.co.uk', 'Google'], | ||
['www.google.com.au', 'Google'], | ||
['www.yahoo.com', 'Yahoo'], | ||
['yandex.com', 'Yandex'], | ||
])('it uses the basic list of search engines by default', async (domain, expectedService) => { | ||
const searchEngineMiddleware = ReferralMapper.newSearchEngineMiddleware(); | ||
|
||
const initialInteraction = {medium: 'referral', source: domain}; | ||
const expectedInteraction = {medium: 'organic', source: expectedService}; | ||
|
||
expect(searchEngineMiddleware(initialInteraction)).toEqual(expectedInteraction); | ||
}); | ||
|
||
test.each([ | ||
['www.facebook.com', 'Facebook'], | ||
['fb.me', 'Facebook'], | ||
['m.facebook.com', 'Facebook'], | ||
['l.facebook.com', 'Facebook'], | ||
['news.ycombinator.com', 'Hacker News'], | ||
['www.instagram.com', 'Instagram'], | ||
['l.instagram.com', 'Instagram'], | ||
['www.linkedin.com', 'LinkedIn'], | ||
['lnkd.in', 'LinkedIn'], | ||
['www.pinterest.com', 'Pinterest'], | ||
['www.pinterest.nl', 'Pinterest'], | ||
['www.pinterest.co.uk', 'Pinterest'], | ||
['www.pinterest.com.au', 'Pinterest'], | ||
['www.reddit.com', 'reddit'], | ||
['old.reddit.com', 'reddit'], | ||
['np.reddit.com', 'reddit'], | ||
['www.snapchat.com', 'Snapchat'], | ||
['www.tiktok.com', 'TikTok'], | ||
['www.tumblr.com', 'tumblr'], | ||
['t.umblr.com', 'tumblr'], | ||
['twitter.com', 'Twitter'], | ||
['t.co', 'Twitter'], | ||
['x.com', 'Twitter'], | ||
['www.youtube.com', 'YouTube'], | ||
['youtu.be', 'YouTube'], | ||
['vimeo.com', 'Vimeo'], | ||
['weibo.com', 'Weibo'], | ||
])('it uses the basic list of social networks by default', async (domain, expectedService) => { | ||
const socialNetworkMiddleware = ReferralMapper.newSocialNetworkMiddleware(); | ||
|
||
const initialInteraction = {medium: 'referral', source: domain}; | ||
const expectedInteraction = {medium: 'social', source: expectedService}; | ||
|
||
expect(socialNetworkMiddleware(initialInteraction)).toEqual(expectedInteraction); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import { expect, test } from '@jest/globals'; | ||
import {domainToService} from "../src/domainToService"; | ||
|
||
test('it matches service', async () => { | ||
const services = {google: ['www.google.com']}; | ||
|
||
expect(domainToService('www.google.com', services)).toEqual('google'); | ||
}); | ||
|
||
test('it ignores www subdomain', async () => { | ||
const services = {google: ['google.com']}; | ||
|
||
expect(domainToService('www.google.com', services)).toEqual('google'); | ||
}); | ||
|
||
test('it allows www subdomain', async () => { | ||
// const services = {google: ['www.google.*']}; | ||
const services = {"google": ["google.*", "google.co.*", "google.com.*"],}; | ||
|
||
expect(domainToService('www.google.com', services)).toEqual('google'); | ||
}); | ||
|
||
test('it ignores capitals', async () => { | ||
const services = {google: ['www.GOOGLE.com']}; | ||
|
||
expect(domainToService('www.Google.com', services)).toEqual('google'); | ||
}); | ||
|
||
test('it skips incorrect domain lists', async () => { | ||
const services = {google: 'www.google.com', test: ['www.google.com']}; | ||
|
||
// @ts-ignore | ||
expect(domainToService('www.google.com', services)).toEqual('test'); | ||
}); | ||
|
||
test.each([ | ||
[ | ||
{google: ['www.google.*']}, | ||
'www.google.nl', | ||
'google', | ||
], | ||
[ | ||
{google: ['*.google.com']}, | ||
'www.google.com', | ||
'google', | ||
], | ||
[ | ||
{google: ['*.google.com']}, | ||
'www.google.com', | ||
'google', | ||
], | ||
[ | ||
{google: ['*.google.*']}, | ||
'www.google.de', | ||
'google', | ||
], | ||
[ | ||
// We need special cases to handle the more common two part eTLDs | ||
{google: ['www.google.*', '*.google.com', '*.google.*', '*.google.co.*', '*.google.com.*']}, | ||
'www.google.co.uk', | ||
'google', | ||
], | ||
[ | ||
// We need special cases to handle the more common two part eTLDs | ||
{google: ['www.google.*', '*.google.com', '*.google.*', '*.google.co.*', '*.google.com.*']}, | ||
'www.google.com.au', | ||
'google', | ||
], | ||
])('it supports wildcards in the domains', async (services, domain, expectedServiceName) => { | ||
expect(domainToService(domain, services)).toEqual(expectedServiceName); | ||
}); | ||
|
||
test('it returns null on no match', async () => { | ||
const services = {google: ['www.google.com']}; | ||
|
||
expect(domainToService('www.example.com', services)).toBeNull(); | ||
}); | ||
|
||
/** | ||
* . is a wildcard in regex, and we use regex to compare domains | ||
*/ | ||
test('it does not use the . in domain names as wildcards', async () => { | ||
const services = {google: ['www.google.com']}; | ||
|
||
expect(domainToService('wwwxgoogle.com', services)).toBeNull(); | ||
}); |