泰坦尼克号例子(决策树)
install.packages("rpart.plot")
getwd()
setwd("Q:\\R")
train=read.csv("train.csv")
test=read.csv("test.csv")
head(train)
head(test)
combi=rbind(train,test)
combi$Name=as.character(combi$Name)
combi$Title=sapply(combi$Name,FUN=function(x){strsplit(x,split = '[,.]')[[1]][2]})
combi$Title[combi$Title%in%c(" Mlle"," Mme")]=" Mlle"
combi$Title[combi$Title%in%c(" Capt"," Don"," Sir")]=" Sir"
combi$Title[combi$Title%in%c(" Jonkheer"," Dona"," the Countess"," Lady")]=" Lady"
combi$Title=as.factor(combi$Title)
combi$FamilySize=as.numeric(combi$SibSp+combi$Parch+1)
combi$FamilySize[combi$FamilySize>6]="Large"
combi$FamilySize[combi$FamilySize<=2]="Small"
combi$FamilySize[combi$FamilySize>2 & combi$FamilySize<=6]="Middle"
combi$FamilySize=as.factor(combi$FamilySize)
train=combi[1:891,]
test=combi[892:1309,]
library(rpart)
library(rpart.plot)
titanic_tree=rpart(Survived~Pclass+Sex+Age+SibSp+Parch+Fare+Embarked+Title+FamilySize,data=train,method="class")
Prediction=predict(titanic_tree,test,type="class")
