sql - How to melt by all combination pairs of a column? -
i have following table, , want sum number of visits per path each pair of channels, - sum of visits paths in pair of channels appear together.
input:
channel pathnum visits 1 c1 1 5 2 c2 1 5 3 c3 1 5 4 c1 2 3 5 c2 2 3 6 c1 3 1 7 c4 4 4 8 c5 5 13 9 c6 5 13 10 c6 6 7 11 c6 6 7 output:
channel1 channel2 visits 1 c1 c1 9 2 c1 c2 8 3 c1 c3 5 4 c1 c4 0 5 c1 c5 0 6 c1 c6 0 7 c2 c2 8 8 c2 c3 5 9 c2 c4 0 10 c2 c5 0 11 c2 c6 0 12 c3 c3 5 13 c3 c4 0 14 c3 c5 0 15 c3 c6 0 16 c4 c4 4 17 c4 c5 0 18 c4 c6 0 19 c5 c5 13 20 c5 c6 13 21 c6 c6 20 here r code using loop above example:
df1 = data.frame(channel=c("c1","c2","c3","c1","c2","c1","c4","c5","c6","c6","c6"), pathnum = c(1,1,1,2,2,3,4,5,5,6,6), visits=c(5,5,5,3,3,1,4,13,13,7,7), stringsasfactors=false) df2 =dcast(data = df1, pathnum ~ channel, fun.aggregate=function(x){sum(x)/length(x)}) channel1=null channel2=null vis=null (i in 1:length(unique(df1$channel))){ (j in i:length(unique(df1$channel))){ channel1=c(channel1, unique(df1$channel)[i]) channel2=c(channel2, unique(df1$channel)[j]) vis=c(vis,sum(df2[!is.na(df2[,unique(df1$channel)[i]]) & !is.na(df2[,unique(df1$channel)[j]]) & df2[,unique(df1$channel)[j]]>0 , unique(df1$channel)[i]])) } } outframe = data.frame(channel1=channel1, channel2=channel2, visits=vis) this works small example, wondering if there way of doing dplyr or sqlite.
here's 1 way:
require(data.table) require(gtools) dt <- data.table(df1) # combos uc <- unique(dt$channel) cn <- combinations(length(uc),2,repeats.allowed=true) cn[] <- uc[cn] # combos conditional on path sharedpaths <- unique(dt)[,{ cn <- combinations(.n,2,repeats.allowed=true) list(v=visits[1],c1=channel[cn[,1]],c2=channel[cn[,2]]) },by=pathnum] # merge , sum setkey(sharedpaths,c1,c2) sharedpaths[j(as.data.table(cn)),sum(v,na.rm=true),by=.eachi]
Comments
Post a Comment