Skip to content

Commit

Permalink
fix: Improve granularity of high organic traffic audit (#372)
Browse files Browse the repository at this point in the history
- This improvement will add top 5 traffic sources from referrer data in
RUM and also provides the CTR. The overall page views and ctr will be available in `"vendor": "*"`
  • Loading branch information
rpapani committed Sep 20, 2024
1 parent 2ca921b commit a2d3770
Show file tree
Hide file tree
Showing 13 changed files with 381 additions and 106 deletions.
33 changes: 27 additions & 6 deletions packages/spacecat-shared-rum-api-client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ Calculates the amount of inorganic traffic and the bounce rate for each page. Id
"screenshot": "",
"trackedPageKPIName": "Bounce Rate",
"trackedPageKPIValue": 0.6507592190889371,
"trackedKPISiteAverage": "",
"pageViews": 46100,
"samples": 46100,
"metrics": [
Expand All @@ -223,6 +224,7 @@ Calculates the amount of inorganic traffic and the bounce rate for each page. Id
"screenshot": "",
"trackedPageKPIName": "Bounce Rate",
"trackedPageKPIValue": 0.8723897911832946,
"trackedKPISiteAverage": "",
"pageViews": 43100,
"samples": 43100,
"metrics": [
Expand All @@ -242,7 +244,7 @@ Calculates the amount of inorganic traffic and the bounce rate for each page. Id

### high-organic-low-ctr (Experimentation Opportunity)

Calculates the amount of non-inorganic (earned and owned) traffic and the click-through rate for each page. Identifies pages with high non-inorganic traffic and low click-through rates, which can be targeted for future experimentation opportunities. An example payload is provided below:
Calculates the amount of non-inorganic (earned and owned) traffic and the click-through rate for each page and vendor. Identifies pages with high non-inorganic traffic and low click-through rates, which can be targeted for future experimentation opportunities. An example payload is provided below:

```json
[
Expand All @@ -251,24 +253,43 @@ Calculates the amount of non-inorganic (earned and owned) traffic and the click-
"page": "https://www.spacecat.com/about-us",
"screenshot": "",
"trackedPageKPIName": "Click Through Rate",
"trackedPageKPIValue": 0.14099783080260303,
"trackedPageKPIValue": 0.14316702819956617,
"trackedKPISiteAverage": 0.40828402366863903,
"pageViews": 46100,
"samples": 46100,
"metrics": [
{
"type": "traffic",
"vendor": "*",
"value": {
"total": 46100,
"paid": 0,
"owned": 46100,
"paid": 300,
"owned": 45800,
"earned": 0
}
},
{
"type": "ctr",
"vendor": "*",
"value": {
"page": 0.14099783080260303,
"siteAverage": 0.4077909270216962
"page": 0.14316702819956617
}
},
{
"type": "traffic",
"vendor": "tiktok",
"value": {
"total": 300,
"owned": 0,
"earned": 0,
"paid": 300
}
},
{
"type": "ctr",
"vendor": "tiktok",
"value": {
"page": 0.3333333333333333
}
}
]
Expand Down
56 changes: 56 additions & 0 deletions packages/spacecat-shared-rum-api-client/src/common/aggregateFns.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
* governing permissions and limitations under the License.
*/

import { extractTrafficHints, classifyVendor, getSecondLevelDomain } from './traffic.js';

/**
* Calculates the total page views by URL from an array of bundles.
* @param {Array<Object>} bundles - An array of RUM bundles (NOT Flat bundles).
Expand Down Expand Up @@ -52,6 +54,59 @@ function getCTRByUrl(bundles) {
}, {});
}

/**
* Calculates the Click-Through Rate (CTR) by URL and Referrer.
* CTR is defined as the total number of sessions with at least one click event per referrer.
* divided by the total number of pageviews for each URL per referrer.
*
* @param {Array<Object>} bundles - An array of RUM bundles (NOT Flat bundles).
* @returns {Object} - An object where the key is the URL and the value is an object
* with the CTR value by referrer.
*/
function getCTRByUrlAndVendor(bundles) {
const aggregated = bundles.reduce((acc, bundle) => {
const { url } = bundle;
const trafficHints = extractTrafficHints(bundle);
const referrerDomain = getSecondLevelDomain(trafficHints.referrer);
const vendor = classifyVendor(referrerDomain, trafficHints.utmSource, trafficHints.utmMedium);
if (!acc[url]) {
acc[url] = { sessionsWithClick: 0, totalPageviews: 0, vendors: {} };
}
const hasClick = bundle.events.some((event) => event.checkpoint === 'click');

acc[url].totalPageviews += bundle.weight;
if (hasClick) {
acc[url].sessionsWithClick += bundle.weight;
}
if (vendor) {
if (!acc[url].vendors[vendor]) {
acc[url].vendors[vendor] = { sessionsWithClick: 0, totalPageviews: 0 };
}
acc[url].vendors[vendor].totalPageviews += bundle.weight;
if (hasClick) {
acc[url].vendors[vendor].sessionsWithClick += bundle.weight;
}
}
return acc;
}, {});
return Object.entries(aggregated)
.reduce((acc, [url, { sessionsWithClick, totalPageviews, vendors }]) => {
if (!acc[url]) {
acc[url] = { value: 0, vendors: {} };
}
acc[url].value = (sessionsWithClick / totalPageviews);
acc[url].vendors = Object.entries(vendors)
.reduce((_acc, [source, {
sessionsWithClick: _sessionsWithClick, totalPageviews: _totalPageviews,
}]) => {
// eslint-disable-next-line no-param-reassign
_acc[source] = (_sessionsWithClick / _totalPageviews);
return _acc;
}, {});
return acc;
}, {});
}

/**
* Calculates the Click-Through Rate (CTR) average for the entire site.
* CTR is defined as the total number of sessions with at least one click event
Expand All @@ -78,5 +133,6 @@ function getSiteAvgCTR(bundles) {
export {
getSiteAvgCTR,
getCTRByUrl,
getCTRByUrlAndVendor,
pageviewsByUrl,
};
78 changes: 75 additions & 3 deletions packages/spacecat-shared-rum-api-client/src/common/traffic.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import URI from 'urijs';
* @returns {string} The second-level domain of the given URL, or the original
* URL if it does not contain any text.
*/
function getSecondLevelDomain(url) {
export function getSecondLevelDomain(url) {
if (!hasText(url)) return url;
const uri = new URI(url);
const domain = uri.domain();
Expand All @@ -39,7 +39,7 @@ function getSecondLevelDomain(url) {
// Referrer related
const referrers = {
search: /google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask/,
social: /^\b(x)\b|(.*(facebook|tiktok|snapchat|x|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
social: /^\b(x)\b|(.*(facebook|tiktok|snapchat|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
ad: /googlesyndication|2mdn|doubleclick|syndicatedsearch/,
video: /youtube|vimeo|twitch|dailymotion|wistia/,
};
Expand All @@ -63,11 +63,48 @@ const sources = {
social: /^\b(ig|fb|x|soc)\b|(.*(meta|tiktok|facebook|snapchat|twitter|igshopping|instagram|linkedin|reddit).*)$/,
search: /^\b(goo)\b|(.*(sea|google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask).*)$/,
video: /youtube|vimeo|twitch|dailymotion|wistia/,
display: /optumib2b|jun|googleads|dv36|dv360|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
display: /optumib2b|jun|googleads|dv360|dv36|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
affiliate: /brandreward|yieldkit|fashionistatop|partner|linkbux|stylesblog|linkinbio|affiliate/,
email: /sfmc|email/,
};

/**
* Vendor classification rules from https://github.com/adobe/helix-website/blob/main/tools/oversight/acquisition.js#L12
* Added dailymotion, twitch to the list
* Using full word match for social media shorts like ig, fb, x
*/
const vendorClassifications = [
{ regex: /google|googleads|google-ads|google_search|google_deman|adwords|dv360|gdn|doubleclick|dbm|gmb/i, result: 'google' },
{ regex: /instagram|\b(ig)\b/i, result: 'instagram' },
{ regex: /facebook|\b(fb)\b|meta/i, result: 'facebook' },
{ regex: /bing/i, result: 'bing' },
{ regex: /tiktok/i, result: 'tiktok' },
{ regex: /youtube|yt/i, result: 'youtube' },
{ regex: /linkedin/i, result: 'linkedin' },
{ regex: /twitter|^\b(x)\b/i, result: 'x' },
{ regex: /snapchat/i, result: 'snapchat' },
{ regex: /microsoft/i, result: 'microsoft' },
{ regex: /pinterest/i, result: 'pinterest' },
{ regex: /reddit/i, result: 'reddit' },
{ regex: /spotify/i, result: 'spotify' },
{ regex: /criteo/i, result: 'criteo' },
{ regex: /taboola/i, result: 'taboola' },
{ regex: /outbrain/i, result: 'outbrain' },
{ regex: /yahoo/i, result: 'yahoo' },
{ regex: /marketo/i, result: 'marketo' },
{ regex: /eloqua/i, result: 'eloqua' },
{ regex: /substack/i, result: 'substack' },
{ regex: /line/i, result: 'line' },
{ regex: /yext/i, result: 'yext' },
{ regex: /teads/i, result: 'teads' },
{ regex: /yandex/i, result: 'yandex' },
{ regex: /baidu/i, result: 'baidu' },
{ regex: /amazon|ctv/i, result: 'amazon' },
{ regex: /dailymotion/i, result: 'dailymotion' },
{ regex: /twitch/i, result: 'twitch' },
{ regex: /direct/i, result: 'direct' },
];

// Tracking params - based on the checkpoints we have in rum-enhancer now
// const organicTrackingParams = ['srsltid']; WE DO NOT HAVE THIS AS OF NOW
const paidTrackingParams = ['paid'];
Expand Down Expand Up @@ -160,6 +197,39 @@ const RULES = (domain) => ([
{ type: 'owned', category: 'uncategorized', referrer: any, utmSource: any, utmMedium: any, tracking: any },
]);

export function extractTrafficHints(bundle) {
const findEvent = (checkpoint, source = '') => bundle.events.find((e) => e.checkpoint === checkpoint && (!source || e.source === source)) || {};

const referrer = findEvent('enter').source || '';
const utmSource = findEvent('utm', 'utm_source').target || '';
const utmMedium = findEvent('utm', 'utm_medium').target || '';
const tracking = findEvent('paid').checkpoint || findEvent('email').checkpoint || '';

return {
url: bundle.url,
weight: bundle.weight,
referrer,
utmSource,
utmMedium,
tracking,
};
}

/**
* Returns the name of the vendor obtained from respective order: referrer, utmSource, utmMedium.
* For example: facebook instead of www.facebook.com
* @param {*} referrer
*/
export function classifyVendor(referrer, utmSource, utmMedium) {
const result = vendorClassifications.find(({ regex }) => {
if (regex.test(referrer)) return true;
if (regex.test(utmSource)) return true;
if (regex.test(utmMedium)) return true;
return false;
});
return result ? result.result : '';
}

export function classifyTrafficSource(url, referrer, utmSource, utmMedium, trackingParams) {
const secondLevelDomain = getSecondLevelDomain(url);
const rules = RULES(secondLevelDomain);
Expand All @@ -174,9 +244,11 @@ export function classifyTrafficSource(url, referrer, utmSource, utmMedium, track
&& rule.utmMedium(sanitize(utmMedium))
&& rule.tracking(trackingParams)
));
const vendor = classifyVendor(referrerDomain, utmSource, utmMedium);

return {
type,
category,
vendor,
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ function convertToOpportunity(traffic) {
screenshot: '',
trackedPageKPIName: 'Bounce Rate',
trackedPageKPIValue: bounceRate,
trackedKPISiteAverage: '',
pageViews: total,
samples: total, // todo: get the actual number of samples
metrics: [{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,49 @@
*/

import trafficAcquisition from '../traffic-acquisition.js';
import { getCTRByUrl, getSiteAvgCTR } from '../../common/aggregateFns.js';
import { getCTRByUrlAndVendor, getSiteAvgCTR } from '../../common/aggregateFns.js';

const DAILY_EARNED_THRESHOLD = 5000;
const CTR_THRESHOLD_RATIO = 0.95;
const DAILY_PAGEVIEW_THRESHOLD = 1000;
const VENDORS_TO_CONSIDER = 5;

const MAIN_TYPES = ['paid', 'earned', 'owned'];

function convertToOpportunity(traffic) {
const {
url, total, ctr, paid, owned, earned, siteAvgCTR,
url, total, ctr, paid, owned, earned, sources, siteAvgCTR, ctrByUrlAndVendor,
} = traffic;

return {
const vendors = sources.reduce((acc, { type, views }) => {
const [trafficType, , vendor] = type.split(':');
if (!vendor) {
return acc;
}
if (MAIN_TYPES.includes(trafficType)) {
acc[vendor] = acc[vendor] || {
total: 0, owned: 0, earned: 0, paid: 0,
};
acc[vendor].total += views;
acc[vendor][trafficType] += views;
}
return acc;
}, {});

const topVendors = Object.entries(vendors)
.sort((a, b) => b[1].total - a[1].total).slice(0, VENDORS_TO_CONSIDER);
const opportunity = {
type: 'high-organic-low-ctr',
page: url,
screenshot: '',
trackedPageKPIName: 'Click Through Rate',
trackedPageKPIValue: ctr,
trackedKPISiteAverage: siteAvgCTR,
pageViews: total,
samples: total, // todo: get the actual number of samples
metrics: [{
type: 'traffic',
vendor: '*',
value: {
total,
paid,
Expand All @@ -40,12 +62,35 @@ function convertToOpportunity(traffic) {
},
}, {
type: 'ctr',
vendor: '*',
value: {
page: ctr,
siteAverage: siteAvgCTR,
},
}],
};
opportunity.metrics.push(...topVendors.flatMap(([vendor, {
total: _total, owned: _owned, earned: _earned, paid: _paid,
}]) => {
const trafficMetrics = {
type: 'traffic',
vendor,
value: {
total: _total,
owned: _owned,
earned: _earned,
paid: _paid,
},
};
const ctrMetrics = {
type: 'ctr',
vendor,
value: {
page: ctrByUrlAndVendor[vendor],
},
};
return [trafficMetrics, ctrMetrics];
}));
return opportunity;
}

function hasHighOrganicTraffic(interval, traffic) {
Expand All @@ -61,13 +106,18 @@ function handler(bundles, opts = {}) {
const { interval = 7 } = opts;

const trafficByUrl = trafficAcquisition.handler(bundles);
const ctrByUrl = getCTRByUrl(bundles);
const ctrByUrlAndVendor = getCTRByUrlAndVendor(bundles);
const siteAvgCTR = getSiteAvgCTR(bundles);

return trafficByUrl.filter((traffic) => traffic.total > interval * DAILY_PAGEVIEW_THRESHOLD)
.filter(hasHighOrganicTraffic.bind(null, interval))
.filter((traffic) => hasLowerCTR(ctrByUrl[traffic.url], siteAvgCTR))
.map((traffic) => ({ ...traffic, ctr: ctrByUrl[traffic.url], siteAvgCTR }))
.filter((traffic) => hasLowerCTR(ctrByUrlAndVendor[traffic.url].value, siteAvgCTR))
.map((traffic) => ({
...traffic,
ctr: ctrByUrlAndVendor[traffic.url].value,
siteAvgCTR,
ctrByUrlAndVendor: ctrByUrlAndVendor[traffic.url].vendors,
}))
.map(convertToOpportunity);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ function getRageClickOpportunities(rageClickInstances) {
screenshot: '',
trackedPageKPIName: OPPORTUNITY_DESCRIPTION,
trackedPageKPIValue: '',
trackedKPISiteAverage: '',
pageViews: rageClickInstances[url].pageViews,
samples: rageClickInstances[url].samples,
metrics: [],
Expand Down
Loading

0 comments on commit a2d3770

Please sign in to comment.