Label and color leaf dendrogram

Here is a solution for this question using a new package called "dendextend", built exactly for this sort of thing.

You can see many examples in the presentations and vignettes of the package, in the "usage" section in the following URL: https://github.com/talgalili/dendextend

Here is the solution for this question: (notice the importance of how to re-order the colors to first fit the data, and then to fit the new order of the dendrogram)

####################
## Getting the data:

sample = data.frame(matrix(floor(abs(rnorm(20000)*100)),ncol=200))
groupCodes <- c(rep("Cont",25), rep("Tre1",25), rep("Tre2",25), rep("Tre3",25))
rownames(sample) <- make.unique(groupCodes)

colorCodes <- c(Cont="red", Tre1="green", Tre2="blue", Tre3="yellow")

distSamples <- dist(sample)
hc <- hclust(distSamples)
dend <- as.dendrogram(hc)

####################
## installing dendextend for the first time:

install.packages('dendextend')

####################
## Solving the question:

# loading the package
library(dendextend)
# Assigning the labels of dendrogram object with new colors:
labels_colors(dend) <- colorCodes[groupCodes][order.dendrogram(dend)]
# Plotting the new dendrogram
plot(dend)


####################
## A sub tree - so we can see better what we got:
par(cex = 1)
plot(dend[[1]], horiz = TRUE)

enter image description here


You could convert you hclust object into a dendrogram and use ?dendrapply to modify the properties (attributes like color, label, ...) of each node, e.g.:

## stupid toy example
samples <- matrix(c(1, 1, 1,
                    2, 2, 2,
                    5, 5, 5,
                    6, 6, 6), byrow=TRUE, nrow=4)

## set sample IDs to A-D
rownames(samples) <- LETTERS[1:4]

## perform clustering
distSamples <- dist(samples)
hc <- hclust(distSamples)

## function to set label color
labelCol <- function(x) {
  if (is.leaf(x)) {
    ## fetch label
    label <- attr(x, "label") 
    ## set label color to red for A and B, to blue otherwise
    attr(x, "nodePar") <- list(lab.col=ifelse(label %in% c("A", "B"), "red", "blue"))
  }
  return(x)
}

## apply labelCol on all nodes of the dendrogram
d <- dendrapply(as.dendrogram(hc), labelCol)

plot(d)

enter image description here

EDIT: Add code for your minimal example:

    sample = data.frame(matrix(floor(abs(rnorm(20000)*100)),ncol=200))
groupCodes <- c(rep("A",25), rep("B",25), rep("C",25), rep("D",25))

## make unique rownames (equal rownames are not allowed)
rownames(sample) <- make.unique(groupCodes)

colorCodes <- c(A="red", B="green", C="blue", D="yellow")


## perform clustering
distSamples <- dist(sample)
hc <- hclust(distSamples)

## function to set label color
labelCol <- function(x) {
  if (is.leaf(x)) {
    ## fetch label
    label <- attr(x, "label")
    code <- substr(label, 1, 1)
    ## use the following line to reset the label to one letter code
    # attr(x, "label") <- code
    attr(x, "nodePar") <- list(lab.col=colorCodes[code])
  }
  return(x)
}

## apply labelCol on all nodes of the dendrogram
d <- dendrapply(as.dendrogram(hc), labelCol)

plot(d)

enter image description here