-
Notifications
You must be signed in to change notification settings - Fork 2.1k
googlegroupsformatter
bhive01 edited this page Feb 15, 2011
·
14 revisions
require(mclust)
require(ggplot2)
require(ellipse)
#if function, input is original dataset
orig.data<-iris[1:4]
data.mclust<-Mclust(orig.data)
BIC.data<-as.data.frame(data.mclust$BIC)
BIC.data$NumComp<-rownames(BIC.data)
melted.BIC<-melt(BIC.data, var.ids= "NumComp")
ggplot(melted.BIC, aes(x=as.numeric(NumComp), y=value, colour=variable, group=variable))+
scale_x_continuous("Number of Components")+
scale_y_continuous("BIC")+
scale_colour_hue("")+
geom_point()+
geom_line()+
theme_bw()
#plotting correlation matrix of original data colored by model classification
#stripped from plotmatrix()
mapping=aes(colour=as.factor(data.mclust$classification), shape=as.factor(data.mclust$classification))
grid <- expand.grid(x = 1:ncol(orig.data), y = 1:ncol(orig.data))
grid <- subset(grid, x != y)
all <- do.call("rbind", lapply(1:nrow(grid), function(i) {
xcol <- grid[i, "x"]
ycol <- grid[i, "y"]
data.frame(xvar = names(orig.data)[ycol], yvar = names(orig.data)[xcol],
x = orig.data[, xcol], y = orig.data[, ycol], orig.data)
}))
all$xvar <- factor(all$xvar, levels = names(orig.data))
all$yvar <- factor(all$yvar, levels = names(orig.data))
densities <- do.call("rbind", lapply(1:ncol(orig.data), function(i) {
data.frame(xvar = names(orig.data)[i], yvar = names(orig.data)[i],
x = orig.data[, i])
}))
mapping <- defaults(mapping, aes_string(x = "x", y = "y"))
class(mapping) <- "uneval"
ggplot(all, mapping) +
facet_grid(xvar ~ yvar, scales = "free") +
geom_point(na.rm = TRUE) +
stat_density(aes(x = x, y = ..scaled.. * diff(range(x)) + min(x)), data = densities, position = "identity", colour = "grey20", geom = "line") +
opts(legend.postion= "none")
#cant get rid of the damn legend
#it would be a bunch faster if it only plotted 1 v 2 and not also 2 v 1... cest la vie
#scatter plot of width by length
#color and shape by classification
#center of cluster by data.mclust$parameters$mean
get.ellipses <- function(coords, mclust.fit){
centers <- mclust.fit$parameters$mean[coords, ]
vars <- mclust.fit$parameters$variance$sigma[coords, coords, ]
ldply(1:ncol(centers), function(cluster){
data.frame(ellipse(vars[,,cluster], centre = centers[, cluster],
level = 0.5), classification = cluster)
})
}
iris.el <- get.ellipses(c("Sepal.Length", "Sepal.Width"), data.mclust)
orig.data$classification <- data.mclust$classification
ggplot(orig.data, aes(Sepal.Length, Sepal.Width, colour = factor(classification))) +
geom_point(aes(shape = classification))+
geom_path(data = iris.el,
aes(group = classification, linetype = classification))