Customer and Marketing Analysis in R

Clean the Environment

# Clear environment of variables and functions
rm(list = ls(all = TRUE)) 
# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)

Load Libraries

library(tidyverse) 
library(dplyr) # joins
library(RODBC) # connect to SQL Server
library(janitor) # pretty cross-tabs
library(kableExtra) # pretty html tables
library(formattable)
library(gridExtra)
library(scales)
library(pastecs)
library(GGally)
library(ChannelAttribution)
library(markovchain) 

Load Data

# load from csv file
df<- read.csv('../files/SampleData.csv')

# load from SQL Server
conn <- odbcConnect("localhost")
Emp <- sqlQuery(conn,"select * from capstone.dbo.Emp")
odbcClose(conn)

Data Overview

head(df,20)
summary(df)
str(df)

# distinct nominal/ordinal data 
df %>%
  select(column) %>%
  distinct()

Data Union

df_1<-  all_acquired %>% 
  select(touchNum, REAgentID, TalkTimeMinutes, touchTime, LeadChannel_SalesActivity) %>%
  mutate(acquired = "1")
df_0<- not_acquired %>%
  mutate(acquired = "0")
df_all<- union(df_1, df_0)

Bar and Histogram Plot

#Count bargraph 
count_bargraph <- function(x) {
  x + geom_bar(position = "dodge") +
    theme_bw() + 
    theme(panel.border = element_blank(), 
                       panel.grid.major = element_blank(),
                       panel.grid.minor = element_blank()) + 
    labs(y ="Count") +
    coord_flip()
}

#Count histogram
count_hist<- function(x){
  x + geom_histogram(bins = 30)+
    theme_bw() + 
    theme(panel.border = element_blank(), 
                       panel.grid.major = element_blank(),
                       panel.grid.minor = element_blank()) + 
    labs(y ="Count") +
    coord_flip()
}
count_bargraph(ggplot(agents, aes(x = LeadChannel_SalesActivity)))
count_bargraph(ggplot(agents, aes(x = acquired)))
count_hist(ggplot(agents, aes(x = touchNum)))
count_hist(ggplot(agents, aes(x = TalkTimeMinutes)))

agents %>%
  ggplot(aes(x = touchNum, fill =acquired, color = acquired)) +
  geom_histogram(position = "dodge")

Density Plot

agents %>%
  ggplot(aes(x = TalkTimeMinutes, fill =acquired, color = acquired)) +
  geom_density(alpha = .3)+
  xlim(0, 10)

Calculate Conversion Rate

agents %>%
  select(LeadChannel_SalesActivity, acquired, REAgentID) %>%
  group_by(LeadChannel_SalesActivity, acquired) %>%
  summarise(counts = n_distinct(REAgentID)) %>%
  spread(acquired, counts) %>%
  summarise(PercentageAcquired = `1` / (`1` + `0`)) %>%
  arrange(desc(PercentageAcquired)) %>%
  kable(align = c("c", "c")) %>% 
  kable_styling(bootstrap_options = c("striped", "condensed", "bordered")) %>% 
  column_spec(1:2, color = "#000000") 

Markov and Heuristics

# aggregating channels to the paths for each customer
acquired_attr <- all_acquired_agent %>%
  group_by(REAgentID) %>%
  summarise(path = paste(LeadChannel_SalesActivity, collapse = '>'),
            conv = 1,
            conv_null = 0) %>%
  ungroup()

# calculating the models (Markov and heuristics)
mod1 <- markov_model(acquired_attr,
                     var_path = 'path',
                     var_conv = 'conv',
                     var_null = 'conv_null',
                     out_more = TRUE)

h_mod2 <- heuristic_models(acquired_attr, var_path = 'path', var_conv = 'conv')

h_mod2 %>% 
  kable(align = c("c", "c", "c", "c")) %>% 
  kable_styling(bootstrap_options = c("striped", "condensed", "bordered")) %>% 
  column_spec(1:4, color = "#000000") 

Combine Tables

# Get total cost for all the channels
all_acquired <- all_acquired %>%
  left_join(channel_lead_cost_df, by = c("LeadChannel_SalesActivity" = "channel_name"))

# Join table with npv to get revenue
all_acquired <- all_acquired %>%
  left_join(npv_channel_df, by = c("LeadChannel_SalesActivity" = "channel_name")) 

# Since we have not earned any revenue yet, we need only the cost from this dataset to calculate ROI
not_acquired <- not_acquired %>%
  left_join(channel_lead_cost_df, by = c("LeadChannel_SalesActivity" = "channel_name"))

not_acquired_channels_cost <- not_acquired %>%
  select(Channel = LeadChannel_SalesActivity, channel_cost) %>%
  group_by(Channel) %>%
  summarise(revenue = 0, cost = sum(channel_cost))  

First Touch Model

all_acquired_ft <- all_acquired %>%
  mutate(percent_weight = case_when(touchNum == 1 ~ 100,
                            TRUE ~ 0))
# Get revenue and cost by channel
acquired_channels_revenue_cost_ft <- all_acquired_ft %>%
  mutate(lead_revenue = percent_weight*npv/10000) %>%
  select(Channel = LeadChannel_SalesActivity, lead_revenue, channel_cost) %>%
  group_by(Channel) %>%
  summarise(revenue = sum(lead_revenue), cost = sum(channel_cost))

final_ft <- getTotalCostRevenue(acquired_channels_revenue_cost_ft, not_acquired_channels_cost) 

final_ft %>% 
    kable(align = c("c", "c", "c")) %>% 
    kable_styling(bootstrap_options = c("striped", "condensed", "bordered")) %>% 
    column_spec(1:2, color = "#000000") %>%
    add_header_above(c( "First Touch - ROI" = 2))

#Graphs 
basic_theme <- 
  theme(legend.position = "none") +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#000000")) +
  theme(axis.title = element_text(size=10, face="bold", color="#000000")) +
  theme(axis.title.y = element_text(angle=90)) +
  theme(axis.title.x = element_text(hjust = .5)) +
  theme_minimal() +
  theme(axis.line = element_line(color = "#000000"),
        axis.ticks = element_line(color = "#000000"),
        axis.text = element_text(color = "#000000")) +
  theme(plot.title = element_text(hjust = 0.5))

#First Touch graph
FirstTouchROIGraph <- 
  final_ft %>%
  filter(Channel != "Unknown") %>%
  ggplot(aes(x = reorder(Channel, ROI), y = ROI)) +
  geom_bar(position = "dodge", stat = "identity", fill = "#F25F09")+
  coord_flip() +
  basic_theme +
  xlab("")+
  ylab("") +
  ggtitle("First Touch ROI") + 
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
  
FirstTouchROIGraph

Last Touch Model

all_acquired_lt_touch <- all_acquired %>%
  select(REAgentID, touchNum) %>%
  group_by(REAgentID) %>%
  top_n(1) %>%
  mutate(percent_weight=100)

all_acquired_lt <- all_acquired %>%
  left_join(all_acquired_lt_touch, by=c("REAgentID", "touchNum")) %>%
  mutate(percent_weight = if_else(is.na(percent_weight), 0, 100))

# Get revenue and cost by channel
acquired_channels_revenue_cost_lt <- all_acquired_lt %>%
  mutate(lead_revenue = percent_weight*npv/10000) %>%
  select(Channel = LeadChannel_SalesActivity, lead_revenue, channel_cost) %>%
  group_by(Channel) %>%
  summarise(revenue = sum(lead_revenue), cost = sum(channel_cost))

final_lt <- getTotalCostRevenue(acquired_channels_revenue_cost_lt, not_acquired_channels_cost) 

final_lt %>% 
    kable(align = c("c", "c", "c")) %>% 
    kable_styling(bootstrap_options = c("striped", "condensed", "bordered")) %>% 
    column_spec(1:2, color = "#000000") %>%
    add_header_above(c( "Last Touch - ROI" = 2))

#Last Touch graph
LastTouchROIGraph <- 
  final_lt %>%
  filter(Channel != "Unknown") %>%
  ggplot(aes(x = reorder(Channel, ROI), y = ROI)) +
  geom_bar(position = "dodge", stat = "identity", fill = "#F25F09")+
  coord_flip() +
  basic_theme +
  xlab("")+
  ylab("") +
  ggtitle("Last Touch ROI") + 
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
  
LastTouchROIGraph

Linear Touch Model

all_acquired_linear_touch <- all_acquired %>%
  select(REAgentID, touchNum) %>%
  group_by(REAgentID) %>%
  summarize(touchpoints = max(touchNum)) %>%
  mutate(percent_weight = 100/touchpoints)

all_acquired_linear <- all_acquired %>%
  left_join(all_acquired_linear_touch, by=c("REAgentID"))

# Get revenue and cost by channel
acquired_channels_revenue_cost_linear <- all_acquired_linear %>%
  mutate(lead_revenue = percent_weight*npv/10000) %>%
  select(Channel = LeadChannel_SalesActivity, lead_revenue, channel_cost) %>%
  group_by(Channel) %>%
  summarise(revenue = sum(lead_revenue), cost = sum(channel_cost))

final_linear <- getTotalCostRevenue(acquired_channels_revenue_cost_linear, not_acquired_channels_cost)


final_linear %>% 
    kable(align = c("c", "c", "c")) %>% 
    kable_styling(bootstrap_options = c("striped", "condensed", "bordered")) %>% 
    column_spec(1:2, color = "#000000") %>% 
    add_header_above(c( "Linear Touch - ROI" = 2))

 #Linear Touch graph
LinearTouchROIGraph <- 
  final_linear %>%
  filter(Channel != "Unknown") %>%
  ggplot(aes(x = reorder(Channel, ROI), y = ROI)) +
  geom_bar(position = "dodge", stat = "identity", fill = "#F25F09")+
  coord_flip() +
  basic_theme +
  xlab("")+
  ylab("") +
  ggtitle("Linear Touch ROI") + 
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
  
LinearTouchROIGraph

Time Decay Model

# compare Acquisition Date and Touch Date to get days from acquisition
# convert touchTime from factor to date variable
all_acquired_td <- all_acquired %>%
  mutate(AcquisitionDate = as.Date(AcquisitionDate)) %>%
  mutate(Days_to_acquisition = as.numeric(AcquisitionDate  - touchTime))


# By default, the time decay attribution model has a half life of 7 days. It means that the interaction which happened 7 days prior to conversion gets half the credit of interaction that occurred on the day of conversion.
# Value formula -> N(t) = N(0)(1/2)^(t/t1/2) = N(0)* 2^-(t/t1/2)
# Here N(0) = 100%
# t1/2 = 7
# t = Days_to_acquisition
half_life <- 7

all_acquired_td <- all_acquired_td %>%
  mutate(decay_weight = 100 * (2 ^ (-1*Days_to_acquisition/half_life)))

# Get total decay weight to get a ratio of decay_weight/total_weight as percent_weight
all_acquired_total_weight_td <- all_acquired_td %>%
  select(c(REAgentID, decay_weight)) %>%
  group_by(REAgentID) %>%
  summarise(Total_weight = sum(decay_weight))

# Joining the two dataframes
all_acquired_td <-  all_acquired_td %>%
  left_join(all_acquired_total_weight_td, by="REAgentID")

all_acquired_td <- all_acquired_td %>%
  mutate(percent_weight = decay_weight*100/Total_weight)

# Get revenue and cost by channel
acquired_channels_revenue_cost_td <- all_acquired_td %>%
  mutate(lead_revenue = percent_weight*npv/10000) %>%
  select(Channel = LeadChannel_SalesActivity, lead_revenue, channel_cost) %>%
  group_by(Channel) %>%
  summarise(revenue = sum(lead_revenue), cost = sum(channel_cost))

final_td <- getTotalCostRevenue(acquired_channels_revenue_cost_td, not_acquired_channels_cost) 


final_td %>% 
    kable(align = c("c", "c", "c")) %>% 
    kable_styling(bootstrap_options = c("striped", "condensed", "bordered")) %>% 
    column_spec(1:2, color = "#000000") %>% 
    add_header_above(c( "Time Decay - ROI" = 2))

#Time Decay graph
TimeDecayROIGraph <- 
  final_td %>%
  filter(Channel != "Unknown") %>%
  ggplot(aes(x = reorder(Channel, ROI), y = ROI)) +
  geom_bar(position = "dodge", stat = "identity", fill = "#F25F09")+
  coord_flip() +
  basic_theme +
  xlab("")+
  ylab("") +
  ggtitle("Time Decay ROI") + 
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
  
TimeDecayROIGraph

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s