Aplicaciones Estadísticas en R: Árbol de decisión en R

Se construyen árboles de decisión utilizando los paquetes party, evtree, tree, rpart, rpart.plot y randomForest.

library(rpart)
library(rpart.plot)
library(tree)
library(party)
library(randomForest)
library(evtree)

data(iris)
View(iris)
str(iris)
attach(iris)
nrow(iris)
## paquete tree

arb1 <- tree(Species ~., iris)
arb1

node), split, n, deviance, yval, (yprob)
* denotes terminal node

1) root 150 329.600 setosa ( 0.33333 0.33333 0.33333 )
2) Petal.Length < 2.45 50 0.000 setosa ( 1.00000 0.00000 0.00000 ) *
3) Petal.Length > 2.45 100 138.600 versicolor ( 0.00000 0.50000 0.50000 )
6) Petal.Width < 1.75 54 33.320 versicolor ( 0.00000 0.90741 0.09259 )
12) Petal.Length < 4.95 48 9.721 versicolor ( 0.00000 0.97917 0.02083 )
24) Sepal.Length < 5.15 5 5.004 versicolor ( 0.00000 0.80000 0.20000 ) *
25) Sepal.Length > 5.15 43 0.000 versicolor ( 0.00000 1.00000 0.00000 ) *
13) Petal.Length > 4.95 6 7.638 virginica ( 0.00000 0.33333 0.66667 ) *
7) Petal.Width > 1.75 46 9.635 virginica ( 0.00000 0.02174 0.97826 )
14) Petal.Length < 4.95 6 5.407 virginica ( 0.00000 0.16667 0.83333 ) *
15) Petal.Length > 4.95 40 0.000 virginica ( 0.00000 0.00000 1.00000 ) *

summary(arb1)

plot(arb1)
text(arb1)

misclass.tree(arb1, detail=TRUE)
predict(arb1)
predict(arb1,type="class")
table(predict(arb1))
arb11<- snip.tree(arb1, nodes = c(12, 7))
plot(iris[, 3],iris[, 4], type="n",
xlab="petal length", ylab="petal width")
text(iris[, 3], iris[, 4], c("s", "c", "v")[iris[, 5]])
partition.tree(arb11, add = TRUE, cex = 1.5)

###

##paquete party

arb2 <- ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)

print(arb2)

Model formula:

Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width

Fitted party:

[1] root

| [2] Petal.Length <= 1.9: setosa (n = 50, err = 0.0%)

| [3] Petal.Length > 1.9

| | [4] Petal.Width <= 1.7

| | | [5] Petal.Length <= 4.8: versicolor (n = 46, err = 2.2%)

| | | [6] Petal.Length > 4.8: versicolor (n = 8, err = 50.0%)

| | [7] Petal.Width > 1.7: virginica (n = 46, err = 2.2%)

Number of inner nodes: 3

Number of terminal nodes: 4

plot(arb2)

plot(arb2, type="simple")

predict(arb2,type="response")

table(predict(arb2))

##paquete rpart

arb3 <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=entrena)

print(arb3)

n= 102

node), split, n, loss, yval, (yprob)

* denotes terminal node

1) root 102 65 virginica (0.33333333 0.30392157 0.36274510)

2) Petal.Length< 2.45 34 0 setosa (1.00000000 0.00000000 0.00000000) *

3) Petal.Length>=2.45 68 31 virginica (0.00000000 0.45588235 0.54411765)

6) Petal.Length< 4.75 27 0 versicolor (0.00000000 1.00000000 0.00000000) *

7) Petal.Length>=4.75 41 4 virginica (0.00000000 0.09756098 0.90243902)

plot(arb3)

text(arb3,use.n=T)

predict(arb3)

rpart.plot(arb3)

pre20<-predict(arb3)

pre20

table(pre20)

##paquete party con entrenamiento y prueba

muestra1<-sample(2, nrow(iris), replace=TRUE, prob=c(0.7, 0.3))

entrena1 <- iris[muestra==1,]

prueba1 <- iris[muestra==2,]

arb4 <- ctree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=entrena)

table(predict(arb4), entrena$Species)

print(arb4)

Model formula:

Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width

Fitted party:

[1] root

| [2] Petal.Length <= 1.9: setosa (n = 34, err = 0.0%)

| [3] Petal.Length > 1.9

| | [4] Petal.Width <= 1.6

| | | [5] Petal.Length <= 4.6: versicolor (n = 23, err = 0.0%)

| | | [6] Petal.Length > 4.6: versicolor (n = 10, err = 30.0%)

| | [7] Petal.Width > 1.6: virginica (n = 35, err = 2.9%)

Number of inner nodes: 3

Number of terminal nodes: 4

plot(arb4)

plot(arb4, type="simple")

predict(arb4,type="response")

reprueba1<-(predict(arb4,newdata=prueba1))

table(reprueba1,prueba1$Species)

###

##paquete rpart con entrenamiento y prueba

muestra2<-sample(2, nrow(iris), replace=TRUE, prob=c(0.7, 0.3))

entrena2 <- iris[muestra==1,]

prueba2 <- iris[muestra==2,]

arb5 <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,data=entrena)

print(arb5)

n= 102

node), split, n, loss, yval, (yprob)

* denotes terminal node

1) root 102 65 virginica (0.33333333 0.30392157 0.36274510)

2) Petal.Length< 2.45 34 0 setosa (1.00000000 0.00000000 0.00000000) *

3) Petal.Length>=2.45 68 31 virginica (0.00000000 0.45588235 0.54411765)

6) Petal.Length< 4.75 27 0 versicolor (0.00000000 1.00000000 0.00000000) *

7) Petal.Length>=4.75 41 4 virginica (0.00000000 0.09756098 0.90243902) *

plot(arb5)

text(arb5,use.n=T)

predict(arb5)

rpart.plot(arb5)

predict(arb5)

table(predict(arb5))

## con random forest

muestra3 <- sample(2, nrow(iris), replace=TRUE, prob=c(0.7, 0.3))

entrena3 <- iris[ind==1,]

prueba3 <- iris[ind==2,]

arb6 <- randomForest(Species ~ ., data=entrena3, ntree=100, proximity=TRUE)

table(predict(arb6), entrena3$Species)

print(arb6)

Call:

randomForest(formula = Species ~ ., data = entrena3, ntree = 100, proximity = TRUE)

Type of random forest: classification

Number of trees: 100

No. of variables tried at each split: 2

OOB estimate of error rate: 5.83%

Confusion matrix:

setosa versicolor virginica class.error

setosa 34 0 0 0.00000000

versicolor 0 33 3 0.08333333

virginica 0 3 30 0.09090909

attributes(arb6)

plot(arb6)

importance(arb6)

MeanDecreaseGini

Sepal.Length 6.377817

Sepal.Width 2.194198

Petal.Length 30.708210

Petal.Width 28.476507

varImpPlot(arb6)

predi <- predict(arb6, newdata=prueba3)

predi

table(predi, prueba3$Species)

plot(margin(arb6, prueba3$Species))

##paquete evtree

arb7<-evtree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width)

arb7

Model formula:

Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width

Fitted party:

[1] root

| [2] Petal.Width < 1: setosa (n = 50, err = 0.0%)

| [3] Petal.Width >= 1

| | [4] Petal.Length < 5

| | | [5] Petal.Width < 1.7: versicolor (n = 47, err = 0.0%)

| | | [6] Petal.Width >= 1.7: virginica (n = 7, err = 14.3%)

| | [7] Petal.Length >= 5: virginica (n = 46, err = 4.3%)

Number of inner nodes: 3

Number of terminal nodes: 4

summary(arb7)

plot(arb7)

plot(arb7, type="simple")

predict(arb7)

tabla<-cbind(Species,pre10);ta

table(predict(árbol4))

Aplicaciones Estadísticas en R

Árbol de decisión en R

No hay comentarios.:

Publicar un comentario

Archivo del Blog