Which language is shortest?
R, 821 - 44 = 777 bytes
Updated results: please see the edit history to make sense of all the comments below.
language num_answers avg_count median_count
1 RUBY 49 49.97959 30.0
2 CJAM 48 32.64583 22.0
3 PYTH 48 21.02083 14.0
4 PYTHON 2 46 86.78261 77.0
5 JULIA 43 58.90698 45.0
6 HASKELL 41 74.65854 56.0
7 PHP 40 73.52500 48.0
8 PERL 36 53.30556 34.0
9 PYTHON 3 34 90.91176 90.5
10 POWERSHELL 33 60.24242 44.0
11 C 32 221.84375 79.5
12 R 32 77.40625 62.5
13 JAVA 29 170.68966 158.0
14 JAVASCRIPT (ES6) 29 90.79310 83.0
15 JAVASCRIPT 28 68.39286 61.0
16 C# 25 193.92000 130.0
17 MATHEMATICA 23 56.04348 47.0
18 MATLAB 22 67.45455 55.0
19 TI-BASIC 19 47.05263 37.0
20 APL 18 16.55556 15.0
The code, which I could shorten a bit more:
W=library;W(XML);W(plyr)
X=xpathSApply;Y=xmlValue;D=data.frame;H=htmlParse;S=sprintf
Z="http://codegolf.stackexchange.com/"
R=function(FUN,...)do.call(rbind,Map(FUN,...))
G=function(url){d=H(url)
a=as.double(sub(".*?(\\d+)a.*","\\1",X(d,"//div[starts-with(@class,'status')]",Y)))
u=paste0(Z,X(d,"//*[contains(@class,'question-hyperlink')]",xmlGetAttr,"href"))
D(u,a)}
u=S("%s/questions/tagged/code-golf?page=%i",Z,1:50)
q=R(G,u)
u=with(q,head(u[a>20],50))
A=function(url){u=S("%s?page=%i",url,1:10)
f=function(u){d=H(u)
h=X(d, "//div[@class='post-text']//h1",Y)
p="^(.*?),.*? (\\d+)( [Bb]ytes)?$"
k=grep(p,h,v=T)
l=toupper(sub(p,"\\1",k))
c=as.double(sub(p,"\\2",k))
D(l,c)}
R(f,u)}
a=R(A,u)
L=names(tail(sort(table(a$l)),20))
x=subset(a,l%in%L)
arrange(ddply(x, "l",summarise,n=length(c),a=mean(c),m=quantile(c,0.5)),-n)
De-golfed:
library(XML)
library(plyr)
LoopBind <- function(FUN, ...) do.call(rbind, Map(FUN, ...))
GetQuestions <- function(url) {
d = htmlParse(url)
a=as.double(sub(".*?(\\d+)a.*","\\1",xpathSApply(d, "//div[starts-with(@class, 'status')]", xmlValue)))
u=paste0("http://codegolf.stackexchange.com/",xpathSApply(d, "//*[contains(@class, 'question-hyperlink')]", xmlGetAttr, "href"))
data.frame(u, a)
}
u <- sprintf("http://codegolf.stackexchange.com/questions/tagged/code-golf?page=%i", 1:50)
q <- do.call(rbind, Map(GetQuestions, u))
u <- with(q, head(u[a > 20], 50))
GetAnswers <- function(url) {
u=sprintf("%s?page=%i",url,1:10)
f=function(u) {
d = htmlParse(u)
h = xpathSApply(d, "//div[@class='post-text']//h1", xmlValue)
p = "^(.*?),.*? (\\d+)( [Bb]ytes)?$"
k = grep(p,h,v=T)
l = toupper(sub(p,"\\1",k))
c = as.double(sub(p,"\\2",k))
data.frame(language=l,c)
}
LoopBind(f,u)
}
a=LoopBind(GetAnswers, u)
L=names(tail(sort(table(a$l)),20))
x=subset(a,language%in%L)
arrange(ddply(x, "language", summarise, num_answers = length(c), avg_count = mean(c), median_count = quantile(c,0.5)),
-num_answers)
Python 2, 934 - 44 (url stuff) = 890 bytes
Using the API:
from urllib2 import urlopen as u
from gzip import GzipFile as f
from StringIO import StringIO as s;x="https://api.stackexchange.com/2.2%s&site=codegolf"
import re;j=u(x%'/search/advanced?pagesize=50&order=desc&sort=creation&answers=20&tagged=code-golf');q=s(j.read());g=f(fileobj=q);true=1;false=0;l=';'.join(str(a['question_id'])for a in eval(g.read())['items']);w=[]
def r(p):
j=u(x%('/questions/%s/answers?page=%s&filter=!9YdnSMlgz&pagesize=100'%(l,p)));g.seek(0);q.truncate();q.write(j.read());q.seek(0);k=eval(g.read());w.extend(a['body_markdown']for a in k['items'])
if k['has_more']:r(p+1)
r(1);x={};s=sorted
for m in w:
try:
l,n=re.match("(.*?),.*?([0-9]+)[^0-9]*$",m.splitlines()[0]).groups();l=re.subn("# ?","",l,1)[0].upper()
if l not in x:x[l]=[]
x[l]+=[(l,int(n))]
except:pass
for l in s(x,cmp,lambda a:len(x[a]),1)[:20]:
v=s(x[l])
print l,len(v),sum(map(lambda a:a[1],v))/len(v),v[len(v)/2][1]
Note that this code does not pay attention to the API throttling.
Output:
RUBY 60 430 32
PYTH 57 426 16
CJAM 56 35 23
C 52 170 76
PYTHON 2 51 88 79
JULIA 42 63 48
HASKELL 42 81 63
JAVASCRIPT (ES6) 41 96 83
PERL 40 44 27
PYTHON 3 37 91 89
PHP 36 98 59
JAVASCRIPT 36 743 65
POWERSHELL 35 86 44
JAVA 32 188 171
R 30 73 48
MATLAB 25 73 51
MATHEMATICA 24 57 47
APL 22 14 13
SCALA 21 204 59
TI-BASIC 21 42 24