Skip to content

googlegroupsformatter

bhive01 edited this page Feb 15, 2011 · 14 revisions
require(mclust)
require(ggplot2)
require(ellipse)

#if function, input is original dataset
orig.data<-iris[1:4]

data.mclust<-Mclust(orig.data)

BIC.data<-as.data.frame(data.mclust$BIC)
BIC.data$NumComp<-rownames(BIC.data)
melted.BIC<-melt(BIC.data, var.ids= "NumComp")

ggplot(melted.BIC, aes(x=as.numeric(NumComp), y=value, colour=variable, group=variable))+
scale_x_continuous("Number of Components")+
scale_y_continuous("BIC")+
scale_colour_hue("")+
geom_point()+
geom_line()+
theme_bw()


#plotting correlation matrix of original data colored by model classification
#stripped from plotmatrix()
mapping=aes(colour=as.factor(data.mclust$classification), shape=as.factor(data.mclust$classification))

grid <- expand.grid(x = 1:ncol(orig.data), y = 1:ncol(orig.data))
grid <- subset(grid, x != y)
all <- do.call("rbind", lapply(1:nrow(grid), function(i) {
							   xcol <- grid[i, "x"]
							   ycol <- grid[i, "y"]
							   data.frame(xvar = names(orig.data)[ycol], yvar = names(orig.data)[xcol], 
										  x = orig.data[, xcol], y = orig.data[, ycol], orig.data)
							   }))
all$xvar <- factor(all$xvar, levels = names(orig.data))
all$yvar <- factor(all$yvar, levels = names(orig.data))
densities <- do.call("rbind", lapply(1:ncol(orig.data), function(i) {
									 data.frame(xvar = names(orig.data)[i], yvar = names(orig.data)[i], 
												x = orig.data[, i])
									 }))
mapping <- defaults(mapping, aes_string(x = "x", y = "y"))
class(mapping) <- "uneval"
ggplot(all, mapping) + 
facet_grid(xvar ~ yvar, scales = "free") + 
geom_point(na.rm = TRUE) + 
stat_density(aes(x = x, y = ..scaled.. * diff(range(x)) + min(x)), data = densities, position = "identity", colour = "grey20", geom = "line") +
opts(legend.postion= "none")

#cant get rid of the damn legend
#it would be a bunch faster if it only plotted 1 v 2 and not also 2 v 1... cest la vie


#scatter plot of width by length
#color and shape by classification


#center of cluster by data.mclust$parameters$mean

get.ellipses <- function(coords, mclust.fit){
	centers <- mclust.fit$parameters$mean[coords, ]
	vars <- mclust.fit$parameters$variance$sigma[coords, coords, ]
	ldply(1:ncol(centers), function(cluster){
		  data.frame(ellipse(vars[,,cluster], centre = centers[, cluster],
							 level = 0.5), classification = cluster)
		  })
}

iris.el <- get.ellipses(c("Sepal.Length", "Sepal.Width"), data.mclust)
orig.data$classification <- data.mclust$classification

ggplot(orig.data, aes(Sepal.Length, Sepal.Width, colour = factor(classification))) +
geom_point(aes(shape = classification))+
geom_path(data = iris.el,
aes(group = classification, linetype = classification))

Note: The ggplot2 wiki is no longer maintained, please use the ggplot2 website instead!

Clone this wiki locally