RFM分析
#generate sales date for RFM analysis
sales <- data.frame(
sample(1000:1999,replace=T,size=10000),
abs(round(rnorm(10000,28,13))),
as.Date("2014/1/1") + 700*sort(stats::runif(10000)),
data.frame(sample(1:4,replace=T,size=10000))
)
names(sales) <- c("CustomerID", "Sales Value", "Date", "Category")
sales$Recency=recency
data.clean <- sales[sales$Category<=2,]
calculate.recency=function(x,date){
days=round(as.numeric(date-x))
return(days)
}
date=as.Date("2016-1-1")
recency=calculate.recency(sales$Date,date)
#get RFM data
RFM.Recency <- abs(aggregate(data.clean$Recency, list(data.clean$CustomerID), min))
names(RFM.Recency) <- c("CustomerID", "Recency")
RFM.Frequency <- aggregate(data.clean$"Sales Value", list(data.clean$CustomerID), length)
names(RFM.Frequency) <- c("CustomerID", "Frequency")
RFM.Money <- aggregate(data.clean$"Sales Value", list(data.clean$CustomerID), mean)
names(RFM.Money) <- c("CustomerID", "Money")
#merge to RFM data frame
RFM <- merge(RFM.Recency, RFM.Frequency, "CustomerID")
RFM <- merge(RFM, RFM.Money, "CustomerID")
#create RFM levels
getRFMinterval <- function(RFMdata, interval, original){
#get min value
maxData <- max(RFMdata)
minData <- min(RFMdata)
factorInterval <- (maxData-minData)/interval
difference <- RFMdata - minData
return(
if(!original){
ifelse(difference>0, ceiling(difference/factorInterval), 1)
}
else {
ifelse(difference>0, difference/factorInterval, 1)
}
)
}
RFM$RankR <- getRFMinterval(RFM$Recency, 3, F)
RFM$RankF <- getRFMinterval(RFM$Frequency, 3, F)
RFM$RankM <- getRFMinterval(RFM$Money,3, F)
#create weight
RFM$Weight <- RFM$RankR*100+RFM$RankF*10+RFM$RankM
原文網址:http://ljy.logdown.com/posts/2014/12/27/rfm-analysis-using-r