Filtering Touchpoints from MTA

Updated by Omar Elmaghrabi

This page provides sample Snowflake queries that enable you to filter certain touch points out of your log level MTA datasets in order to produce performance reporting.

Use Case: Applying a Custom Attribution Window to Log Level MTA

This query filters out certain touchpoints from log level MTA and produces the log level MTA dataset.

This query operates in 2 stages:

  • filtered_mta cte = filters touchpoints out of log level MTA dataset that occur >30D before the conversion and re-allocate credit touchpoints for first touch, last touch, and even weight attribution model
  • log_level_final cte = apply a right outer join to re-insert any conversions as a "direct attributed" conversion for any conversions that are excluded from filtered_mta because all of the touchpoints were filtered out
The below query syntax is compatible with Snowflake SQL. Slight adjustments are needed for compatibility with Redshift and BigQuery.

This query will return NULL for event_id if all marketing events are filtered out for a given conversion.
SET start_date = '2024-03-30';
SET end_date = '2024-04-02';

WITH filtered_mta AS (
--CTE to filter events out of log level MTA
SELECT
m.action,
m.base_id,
m.uid,
m.hash_ip_events,
m.user_agent_events,
m.new_to_file,
m.currency_code,
m.fx_rate_to_usd,
m.conversion_hash_id,
m.conversion_key,
m.date,
m.timestamp_conv,
m.timestamp_events,
m.onsite_count,
m.marketing_type,
m.event_id,
m.matches,
m.original_url,
m.url_parameters,
m.utm_parameters,
m.request_referrer,
m.tier_1,
m.tier_2,
m.tier_3,
m.tier_4,
m.tier_5,
m.spend_key,
m.platform,
CASE WHEN min(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN 1 ELSE 0 END AS filtered_first_touch,
CASE WHEN min(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN m.total_events * m.revenue_even ELSE 0 END AS filtered_first_touch_revenue,
CASE WHEN max(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN 1 ELSE 0 END AS filtered_last_touch,
CASE WHEN max(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN m.total_events * m.revenue_even ELSE 0 END AS filtered_last_touch_revenue,
div0(1, count(*) over (partition BY m.date, m.conversion_hash_id)) AS filtered_even,
div0(1,count(*) over (partition BY m.date, m.conversion_hash_id)) * m.total_events * m.revenue_even AS filtered_even_revenue,
div0(m.normalized, sum(m.normalized) over (partition BY m.date, m.conversion_hash_id)) as filtered_normalized,
div0(m.normalized, sum(m.normalized) over (partition BY m.date, m.conversion_hash_id)) * (m.total_events * m.revenue_even) as filtered_normalized_revenue,
rank() over (partition BY m.date, m.conversion_hash_id ORDER BY sequence_number ASC) filtered_sequence_number,
count(*) over (partition BY m.date, m.conversion_hash_id) AS filtered_total_events,
m.rb_sync_id,
m.updated_at
FROM log_level_mta m --replace with mta table name
WHERE
m.date >= $start_date
and m.date <= $end_date
--filter to apply 30D attribution window across all marketing touchpoints
and timestamp_events >= dateadd(day, -30, timestamp_conv)

), log_level_final as (

--CTE to join the filtered log level MTA back against all converions to address edge case where all marketing touches are removed for a conversion
SELECT
coalesce(f.action, m2.action) AS action,
coalesce(f.base_id, m2.base_id) AS base_id,
coalesce(f.uid, m2.uid) AS uid,
coalesce(f.hash_ip_events, m2.hash_ip_events) AS hash_ip_events,
coalesce(f.user_agent_events, m2.user_agent_events) AS user_agent_events,
coalesce(f.new_to_file, m2.new_to_file) AS new_to_file,
coalesce(f.currency_code, m2.currency_code) AS currency_code,
coalesce(f.fx_rate_to_usd, m2.fx_rate_to_usd) AS fx_rate_to_usd,
coalesce(f.conversion_hash_id, m2.conversion_hash_id) AS conversion_hash_id,
coalesce(f.conversion_key, m2.conversion_key) AS conversion_key,
coalesce(f.date, m2.date) AS date,
coalesce(f.timestamp_conv, m2.timestamp_conv) AS timestamp_conv,
coalesce(f.timestamp_events, m2.timestamp_conv) AS timestamp_events,
coalesce(f.onsite_count, m2.onsite_count) AS onsite_count,
coalesce(f.marketing_type, 'conv_only') AS marketing_type,
f.event_id,
coalesce(f.matches, m2.matches) AS matches,
coalesce(f.original_url, m2.original_url) AS original_url,
coalesce(f.url_parameters, m2.url_parameters) AS url_parameters,
coalesce(f.utm_parameters, m2.utm_parameters) AS utm_parameters,
coalesce(f.request_referrer, m2.request_referrer) AS request_referrer,
coalesce(f.tier_1, 'Direct') AS tier_1,
f.tier_2,
f.tier_3,
f.tier_4,
f.tier_5,
f.spend_key,
coalesce(f.filtered_first_touch, 1) AS filtered_first_touch,
coalesce(f.filtered_first_touch_revenue, m2.revenue_first_touch) AS filtered_first_touch_revenue,
coalesce(f.filtered_last_touch, 1) AS filtered_last_touch,
coalesce(f.filtered_last_touch_revenue, m2.revenue_first_touch) AS filtered_last_touch_revenue,
coalesce(f.filtered_even, 1) AS filtered_even,
coalesce(f.filtered_even_revenue, m2.revenue_first_touch) AS filtered_even_revenue,
coalesce(f.filtered_normalized, 1) AS filtered_normalized,
coalesce(f.filtered_normalized_revenue, m2.revenue_first_touch) AS filtered_normalized_revenue,
coalesce(f.filtered_sequence_number, 1) AS filtered_sequence_number,
coalesce(f.filtered_total_events, 1) AS filtered_total_events,
coalesce(f.rb_sync_id, m2.rb_sync_id) AS rb_sync_id,
coalesce(f.updated_at, m2.updated_at) AS updated_at
FROM filtered_mta f
RIGHT OUTER JOIN (
--filter MTA to only return 1 entry per conversion before joining back onto filtered_mta
select *
from log_level_mta m --replace with mta table name; this is the same table as in the above CTE
where
first_touch = 1
AND date >= $start_date
AND date <= $end_date
) AS m2
ON m2.date = f.date
AND m2.conversion_key = f.conversion_key
)

SELECT
*
FROM log_level_final;

Use Case: Filtering log level MTA to only click-based touchpoints

Leverage the marketing_type column in the log level MTA dataset to only include click-based touchpoints.

You can modify the where clause in the filtered_mta cte in the above query to only include certain event types - see event types documentation here :

WHERE marketing_type in ('onsite')

Use Case: Applying a Custom Touchpoint Filtering to Buckets Breakdown (MTA Tiers)

This query filters out certain touchpoints from log level MTA and produces the buckets breakdown (MTA Tiers) dataset.

The query operates in 4 stages:

  • filtered_mta cte = filters touchpoints out of log level MTA dataset (in this example, touchpoints that occur >30D before the conversion) and re-allocate credit touchpoints for first touch, last touch, and even weight attribution model
  • log_level_final cte = apply a right outer join to re-insert any conversions as a "direct attributed" conversion for any conversions that are excluded from filtered_mta because all of the touchpoints were filtered out
  • log_level_pivot cte = prepare the log level MTA data to be joined against the spend dataset by grouping the attributed conversions and revenue at the same level of granularity that spend is ingested into Rockerbox on a per-platform basis
  • mta_tiers cte = prepare the buckets breakdown data to be joined against log_level_pivot by removing any records where spend = 0
  • perform a full outer join between log_level_pivot and mta_tiers to match up all attributed conversions with spend
--Query to reproduce buckets breakdown (MTA Tiers) dataset after filtering out touchpoints from log level MTA

SET start_date = '2024-03-20';
SET end_date = '2024-04-02';

WITH filtered_mta AS (
--CTE to filter events out of log level MTA
SELECT
m.new_to_file,
m.currency_code,
m.fx_rate_to_usd,
m.conversion_hash_id,
m.conversion_key,
m.date,
m.tier_1,
m.tier_2,
m.tier_3,
m.tier_4,
m.tier_5,
m.spend_key,
m.platform,
CASE WHEN min(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN 1 ELSE 0 END AS filtered_first_touch,
CASE WHEN min(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN m.total_events * m.revenue_even ELSE 0 END AS filtered_first_touch_revenue,
CASE WHEN max(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN 1 ELSE 0 END AS filtered_last_touch,
CASE WHEN max(m.sequence_number) over (partition by m.date, m.conversion_hash_id) = m.sequence_number THEN m.total_events * m.revenue_even ELSE 0 END AS filtered_last_touch_revenue,
div0(1, count(*) over (partition BY m.date, m.conversion_hash_id)) AS filtered_even,
div0(1,count(*) over (partition BY m.date, m.conversion_hash_id)) * m.total_events * m.revenue_even AS filtered_even_revenue,
div0(m.normalized, sum(m.normalized) over (partition BY m.date, m.conversion_hash_id)) as filtered_normalized,
div0(m.normalized, sum(m.normalized) over (partition BY m.date, m.conversion_hash_id)) * (m.total_events * m.revenue_even) as filtered_normalized_revenue
FROM purchase_mta m--log_level_mta m --replace with mta table name
WHERE
m.date >= $start_date
and m.date <= $end_date
--filter to apply 30D attribution window across all marketing touchpoints
and timestamp_events >= dateadd(day, -30, timestamp_conv)

), log_level_final as (

--CTE to join the filtered log level MTA back against all converions to address edge case where all marketing touches are removed for a conversion
SELECT
coalesce(f.new_to_file, m2.new_to_file) AS new_to_file,
coalesce(f.currency_code, m2.currency_code) AS currency_code,
coalesce(f.fx_rate_to_usd, m2.fx_rate_to_usd) AS fx_rate_to_usd,
coalesce(f.conversion_hash_id, m2.conversion_hash_id) AS conversion_hash_id,
coalesce(f.conversion_key, m2.conversion_key) AS conversion_key,
coalesce(f.date, m2.date) AS date,
coalesce(f.tier_1, 'Direct') AS tier_1,
f.tier_2,
f.tier_3,
f.tier_4,
f.tier_5,
f.spend_key,
coalesce(f.filtered_first_touch, 1) AS filtered_first_touch,
coalesce(f.filtered_first_touch_revenue, m2.revenue_first_touch) AS filtered_first_touch_revenue,
coalesce(f.filtered_last_touch, 1) AS filtered_last_touch,
coalesce(f.filtered_last_touch_revenue, m2.revenue_first_touch) AS filtered_last_touch_revenue,
coalesce(f.filtered_even, 1) AS filtered_even,
coalesce(f.filtered_even_revenue, m2.revenue_first_touch) AS filtered_even_revenue,
coalesce(f.filtered_normalized, 1) AS filtered_normalized,
coalesce(f.filtered_normalized_revenue, m2.revenue_first_touch) AS filtered_normalized_revenue
FROM filtered_mta f
RIGHT OUTER JOIN (
--filter MTA to only return 1 entry per conversion before joining back onto filtered_mta
select *
from purchase_mta --replace with MTA table name
where
first_touch = 1
AND date >= $start_date
AND date <= $end_date
) AS m2
ON m2.date = f.date
AND m2.conversion_key = f.conversion_key

), log_level_pivot as (

select
date,
tier_1,
tier_2,
tier_3,
tier_4,
tier_5,
spend_key,

sum(case when new_to_file = 1 THEN filtered_first_touch ELSE 0 END) as ntf_first_touch,
sum(filtered_first_touch) as first_touch,
sum(case when new_to_file = 1 THEN filtered_first_touch_revenue ELSE 0 END) as ntf_revenue_first_touch,
sum(filtered_first_touch_revenue) as revenue_first_touch,

sum(case when new_to_file = 1 THEN filtered_last_touch ELSE 0 END) as ntf_last_touch,
sum(filtered_last_touch) as last_touch,
sum(case when new_to_file = 1 THEN filtered_last_touch_revenue ELSE 0 END) as ntf_revenue_last_touch,
sum(filtered_last_touch_revenue) as revenue_last_touch,

sum(case when new_to_file = 1 THEN filtered_even ELSE 0 END) as ntf_even,
sum(filtered_even) as even,
sum(case when new_to_file = 1 THEN filtered_even_revenue ELSE 0 END) as ntf_revenue_even,
sum(filtered_even_revenue) as revenue_even,

sum(case when new_to_file = 1 THEN filtered_normalized ELSE 0 END) as ntf_normalized,
sum(filtered_normalized) as normalized,
sum(case when new_to_file = 1 THEN filtered_normalized_revenue ELSE 0 END) as ntf_revenue_normalized,
sum(filtered_normalized_revenue) as revenue_normalized

FROM log_level_final
GROUP BY 1,2,3,4,5,6,7

), mta_tiers as (

select
t.date,
t.tier_1,
t.tier_2,
t.tier_3,
t.tier_4,
t.tier_5,
t.platform,
t.platform_join_key,
t.currency_code,
t.fx_rate_to_usd,
spend
from mta_tiers_table t --replace with your buckets breakdown/mta tiers table
where
t.date >= $start_date
and t.date <= $end_date
and t.spend > 0

)

select
coalesce(l.date, t.date) as date,
coalesce(l.tier_1, t.tier_1) as tier_1,
coalesce(l.tier_2, t.tier_2) as tier_2,
coalesce(l.tier_3, t.tier_3) as tier_3,
coalesce(l.tier_4, t.tier_4) as tier_4,
coalesce(l.tier_5, t.tier_5) as tier_5,
coalesce(l.spend_key, t.platform_join_key) as spend_key,
t.platform,
t.currency_code,
t.fx_rate_to_usd,
t.spend,

l.ntf_first_touch,
l.first_touch,
l.ntf_revenue_first_touch,
l.revenue_first_touch,

l.ntf_last_touch,
l.last_touch,
l.ntf_revenue_last_touch,
l.revenue_last_touch,

l.ntf_even,
l.even,
l.ntf_revenue_even,
l.revenue_even,

l.ntf_normalized,
l.normalized,
l.ntf_revenue_normalized,
l.revenue_normalized

from log_level_pivot l
full outer join mta_tiers t
on t.platform_join_key = l.spend_key
and t.date = l.date;


How did we do?