RFM分析

#generate sales date for RFM analysis
sales <- data.frame(
  sample(1000:1999,replace=T,size=10000),
  abs(round(rnorm(10000,28,13))),
  as.Date("2014/1/1") + 700*sort(stats::runif(10000)),
  data.frame(sample(1:4,replace=T,size=10000))
)
names(sales) <- c("CustomerID", "Sales Value", "Date", "Category")

sales$Recency=recency
data.clean <- sales[sales$Category<=2,]


calculate.recency=function(x,date){
  days=round(as.numeric(date-x))
  return(days)
}
date=as.Date("2016-1-1")
recency=calculate.recency(sales$Date,date)


#get RFM data
RFM.Recency <- abs(aggregate(data.clean$Recency, list(data.clean$CustomerID), min))
names(RFM.Recency) <- c("CustomerID", "Recency")
RFM.Frequency <- aggregate(data.clean$"Sales Value", list(data.clean$CustomerID), length)
names(RFM.Frequency) <- c("CustomerID", "Frequency")
RFM.Money <- aggregate(data.clean$"Sales Value", list(data.clean$CustomerID), mean)
names(RFM.Money) <- c("CustomerID", "Money")

#merge to RFM data frame
RFM <- merge(RFM.Recency, RFM.Frequency, "CustomerID")
RFM <- merge(RFM, RFM.Money, "CustomerID")

#create RFM levels
getRFMinterval <- function(RFMdata, interval, original){
  #get min value
  maxData <- max(RFMdata)
  minData <- min(RFMdata)
  factorInterval <- (maxData-minData)/interval
  difference <- RFMdata - minData

  return(
    if(!original){
      ifelse(difference>0, ceiling(difference/factorInterval), 1)  
    }
    else {
      ifelse(difference>0, difference/factorInterval, 1)  
    }
  )
}

RFM$RankR <- getRFMinterval(RFM$Recency, 3, F)
RFM$RankF <- getRFMinterval(RFM$Frequency, 3, F)
RFM$RankM <- getRFMinterval(RFM$Money,3, F)

#create weight
RFM$Weight <- RFM$RankR*100+RFM$RankF*10+RFM$RankM

原文網址:http://ljy.logdown.com/posts/2014/12/27/rfm-analysis-using-r

results matching ""

    No results matching ""