Árbol de decisión en R

Se construyen árboles de decisión utilizando los paquetes party, evtree, tree, rpart, rpart.plot  y randomForest.


library(rpart)
library(rpart.plot)
library(tree)
library(party)
library(randomForest)
library(evtree)

data(iris)
View(iris)
str(iris)
attach(iris)
nrow(iris)
## paquete tree

arb1 <- tree(Species ~., iris)
arb1

node), split, n, deviance, yval, (yprob)
      * denotes terminal node

 1) root 150 329.600 setosa ( 0.33333 0.33333 0.33333 )  
   2) Petal.Length < 2.45 50   0.000 setosa ( 1.00000 0.00000 0.00000 ) *
   3) Petal.Length > 2.45 100 138.600 versicolor ( 0.00000 0.50000 0.50000 )  
     6) Petal.Width < 1.75 54  33.320 versicolor ( 0.00000 0.90741 0.09259 )  
      12) Petal.Length < 4.95 48   9.721 versicolor ( 0.00000 0.97917 0.02083 )  
        24) Sepal.Length < 5.15 5   5.004 versicolor ( 0.00000 0.80000 0.20000 ) *
        25) Sepal.Length > 5.15 43   0.000 versicolor ( 0.00000 1.00000 0.00000 ) *
      13) Petal.Length > 4.95 6   7.638 virginica ( 0.00000 0.33333 0.66667 ) *
     7) Petal.Width > 1.75 46   9.635 virginica ( 0.00000 0.02174 0.97826 )  
      14) Petal.Length < 4.95 6   5.407 virginica ( 0.00000 0.16667 0.83333 ) *
      15) Petal.Length > 4.95 40   0.000 virginica ( 0.00000 0.00000 1.00000 ) *

summary(arb1)

plot(arb1)
text(arb1)















misclass.tree(arb1, detail=TRUE)
predict(arb1)
predict(arb1,type="class")
table(predict(arb1))
arb11<- snip.tree(arb1, nodes = c(12, 7))
plot(iris[, 3],iris[, 4], type="n",
xlab="petal length", ylab="petal width")
text(iris[, 3], iris[, 4], c("s", "c", "v")[iris[, 5]])
partition.tree(arb11, add = TRUE, cex = 1.5)



###
##paquete party
arb2 <- ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)
print(arb2)

Model formula:
Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width

Fitted party:
[1] root
|   [2] Petal.Length <= 1.9: setosa (n = 50, err = 0.0%)
|   [3] Petal.Length > 1.9
|   |   [4] Petal.Width <= 1.7
|   |   |   [5] Petal.Length <= 4.8: versicolor (n = 46, err = 2.2%)
|   |   |   [6] Petal.Length > 4.8: versicolor (n = 8, err = 50.0%)
|   |   [7] Petal.Width > 1.7: virginica (n = 46, err = 2.2%)

Number of inner nodes:    3
Number of terminal nodes: 4

plot(arb2)














plot(arb2, type="simple")














predict(arb2,type="response")
table(predict(arb2))

##paquete rpart
arb3 <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=entrena)
print(arb3)

n= 102 

node), split, n, loss, yval, (yprob)
      * denotes terminal node

1) root 102 65 virginica (0.33333333 0.30392157 0.36274510)  
  2) Petal.Length< 2.45 34  0 setosa (1.00000000 0.00000000 0.00000000) *
  3) Petal.Length>=2.45 68 31 virginica (0.00000000 0.45588235 0.54411765)  
    6) Petal.Length< 4.75 27  0 versicolor (0.00000000 1.00000000 0.00000000) *
    7) Petal.Length>=4.75 41  4 virginica (0.00000000 0.09756098 0.90243902)

plot(arb3)
text(arb3,use.n=T)
predict(arb3)
rpart.plot(arb3)














pre20<-predict(arb3)
pre20
table(pre20)

##paquete party con entrenamiento y prueba

muestra1<-sample(2, nrow(iris), replace=TRUE, prob=c(0.7, 0.3))
entrena1 <- iris[muestra==1,]
prueba1 <- iris[muestra==2,]
arb4 <- ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=entrena)
table(predict(arb4), entrena$Species)
print(arb4)

Model formula:
Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width

Fitted party:
[1] root
|   [2] Petal.Length <= 1.9: setosa (n = 34, err = 0.0%)
|   [3] Petal.Length > 1.9
|   |   [4] Petal.Width <= 1.6
|   |   |   [5] Petal.Length <= 4.6: versicolor (n = 23, err = 0.0%)
|   |   |   [6] Petal.Length > 4.6: versicolor (n = 10, err = 30.0%)
|   |   [7] Petal.Width > 1.6: virginica (n = 35, err = 2.9%)

Number of inner nodes:    3
Number of terminal nodes: 4

plot(arb4)














plot(arb4, type="simple")














predict(arb4,type="response")
reprueba1<-(predict(arb4,newdata=prueba1))
table(reprueba1,prueba1$Species)
###
##paquete rpart  con entrenamiento y prueba
muestra2<-sample(2, nrow(iris), replace=TRUE, prob=c(0.7, 0.3))
entrena2 <- iris[muestra==1,]
prueba2 <- iris[muestra==2,]
arb5 <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=entrena)
print(arb5)

n= 102 
node), split, n, loss, yval, (yprob)
      * denotes terminal node

1) root 102 65 virginica (0.33333333 0.30392157 0.36274510)  
  2) Petal.Length< 2.45 34  0 setosa (1.00000000 0.00000000 0.00000000) *
  3) Petal.Length>=2.45 68 31 virginica (0.00000000 0.45588235 0.54411765)  
    6) Petal.Length< 4.75 27  0 versicolor (0.00000000 1.00000000 0.00000000) *
    7) Petal.Length>=4.75 41  4 virginica (0.00000000 0.09756098 0.90243902) *

plot(arb5)
text(arb5,use.n=T)














predict(arb5)
rpart.plot(arb5)














predict(arb5)
table(predict(arb5))
##
## con random forest
muestra3 <- sample(2, nrow(iris), replace=TRUE, prob=c(0.7, 0.3))
entrena3 <- iris[ind==1,]
prueba3 <- iris[ind==2,]
arb6 <- randomForest(Species ~ ., data=entrena3, ntree=100, proximity=TRUE)
table(predict(arb6), entrena3$Species)
print(arb6)

Call:
 randomForest(formula = Species ~ ., data = entrena3, ntree = 100,      proximity = TRUE) 
               Type of random forest: classification
                     Number of trees: 100
No. of variables tried at each split: 2

        OOB estimate of  error rate: 5.83%
Confusion matrix:
           setosa versicolor virginica class.error
setosa         34          0         0  0.00000000
versicolor      0         33         3  0.08333333
virginica       0          3        30  0.09090909

attributes(arb6)
plot(arb6)
importance(arb6)

             MeanDecreaseGini
Sepal.Length         6.377817
Sepal.Width          2.194198
Petal.Length        30.708210
Petal.Width         28.476507

varImpPlot(arb6)














predi <- predict(arb6, newdata=prueba3)
predi
table(predi, prueba3$Species)
plot(margin(arb6, prueba3$Species))














##paquete evtree
arb7<-evtree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)
arb7

Model formula:
Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width

Fitted party:
[1] root
|   [2] Petal.Width < 1: setosa (n = 50, err = 0.0%)
|   [3] Petal.Width >= 1
|   |   [4] Petal.Length < 5
|   |   |   [5] Petal.Width < 1.7: versicolor (n = 47, err = 0.0%)
|   |   |   [6] Petal.Width >= 1.7: virginica (n = 7, err = 14.3%)
|   |   [7] Petal.Length >= 5: virginica (n = 46, err = 4.3%)

Number of inner nodes:    3
Number of terminal nodes: 4

summary(arb7)
plot(arb7)














plot(arb7, type="simple")














predict(arb7)
tabla<-cbind(Species,pre10);ta
table(predict(árbol4))

No hay comentarios.:

Publicar un comentario