Thursday, May 15, 2014

drop repeated vars keeping first row for given var



## function
## drop repeated vars but keeping first row for given var
droprep = function(df,repvar,sortvar)
{
    tab = table(df[,repvar])
    nametab = names(tab)
    uniquelist = nametab[tab==1]
    replist =  nametab[tab>1]
    ind = df[,repvar] %in% uniquelist
    res = df[ind,]
    tempo = df[!ind,]
    tempo = tempo[order(tempo[,sortvar]),]
    for(vv in replist)
    {
        rowa = tempo[tempo[,repvar]==vv,][1,]
        res = rbind(res,rowa)
    }
    return(res)
}

Contributors

google