convert object of class "dist" into data frame in r

library(maps)
data(us.cities)

d <- dist(head(us.cities[c("lat", "long")]))

##           1         2         3         4         5
## 2 20.160489                                        
## 3 23.139853 40.874243                              
## 4 15.584303  9.865374 38.579820                    
## 5 27.880674  7.882037 48.707100 15.189882          
## 6 26.331187 41.720457  6.900101 41.036931 49.328558

library(reshape2)

df <- melt(as.matrix(d), varnames = c("row", "col"))

df[df$row > df$col,]
##    row col     value
## 2    2   1 20.160489
## 3    3   1 23.139853
## 4    4   1 15.584303
## 5    5   1 27.880674
## 6    6   1 26.331187
## 9    3   2 40.874243
## 10   4   2  9.865374
## 11   5   2  7.882037
## 12   6   2 41.720457
## 16   4   3 38.579820
## 17   5   3 48.707100
## 18   6   3  6.900101
## 23   5   4 15.189882
## 24   6   4 41.036931
## 30   6   5 49.328558

I would actually write a function something like this:

myFun <- function(inDist) {
  if (class(inDist) != "dist") stop("wrong input type")
  A <- attr(inDist, "Size")
  B <- if (is.null(attr(inDist, "Labels"))) sequence(A) else attr(inDist, "Labels")
  if (isTRUE(attr(inDist, "Diag"))) attr(inDist, "Diag") <- FALSE
  if (isTRUE(attr(inDist, "Upper"))) attr(inDist, "Upper") <- FALSE
  data.frame(
    row = B[unlist(lapply(sequence(A)[-1], function(x) x:A))],
    col = rep(B[-length(B)], (length(B)-1):1),
    value = as.vector(inDist))
}

Now, imagine we are starting with (note the non-numeric row and column names):

dd <- as.dist((1 - cor(USJudgeRatings)[1:5, 1:5])/2)
#            CONT       INTG       DMNR       DILG
# INTG 0.56659545                                 
# DMNR 0.57684427 0.01769236                      
# DILG 0.49380400 0.06424445 0.08157452           
# CFMG 0.43154385 0.09295712 0.09332092 0.02060062

We can change it with a simple:

myFun(dd)
#     row  col      value
# 1  INTG CONT 0.56659545
# 2  DMNR CONT 0.57684427
# 3  DILG CONT 0.49380400
# 4  CFMG CONT 0.43154385
# 5  DMNR INTG 0.01769236
# 6  DILG INTG 0.06424445
# 7  CFMG INTG 0.09295712
# 8  DILG DMNR 0.08157452
# 9  CFMG DMNR 0.09332092
# 10 CFMG DILG 0.02060062

A quick performance comparison:

set.seed(1)
x <- matrix(rnorm(1000*1000), nrow = 1000)
dd <- dist(x)

## Jake's function
fun2 <- function(inDist) {
  df <- melt(as.matrix(inDist), varnames = c("row", "col"))
  df[as.numeric(df$row) > as.numeric(df$col), ]
}

all(fun2(dd) == myFun(dd))
# [1] TRUE
system.time(fun2(dd))
#    user  system elapsed 
#   0.346   0.002   0.349 
system.time(myFun(dd))
#    user  system elapsed 
#   0.012   0.000   0.015