import os
sdir = '/hapmap/'
basebedname = 'hapmap3'
ddir = '/erlotinib/'
temponame = 'tempo'
phenoname = 'erlotinib'
os.system('plink --bfile ' + sdir + basebedname + ' --recode12 --out ' + ddir + temponame )
## join tempo.ped with phenodata.txt
## and generate pheno.ped file
shortfilename = ddir + "phenodata.txt" ## FID,IID,dad,mom,sex,pheno1,...
longfilename = ddir + temponame + ".ped" ## FID,IID,dad,mom,sex,pheno,snp1,...
outfilename = ddir + phenoname + ".ped"
index = {}
shortfile= open( shortfilename, "r" )
for row in shortfile:
part = row.split(None,2)
iid = part[1] ## IID is the second column in phenofile
index[iid] = row.rstrip()
## rstrip gets rid of \n and other white spaces
shortfile.close()
outfile = open( outfilename, "w")
longfile= open( longfilename, "r" )
for row in longfile:
part = row.split(None,6)
iid = part[1]
restofline = part[6]
if iid in index:
pheno = index[iid]
outfile.write( pheno + ' ' + restofline)
longfile.close()
outfile.close()
## generate pheno.dat file
infilename = ddir + temponame + '.map'
outfilename = ddir + phenoname + '.dat'
infile = open(infilename,'r')
outfile = open(outfilename,'w')
## write covariates and phenotype names
outfile.write('C igrowth\n')
outfile.write('T logAUC\n')
## write one row per snp
for linea in infile:
part = linea.split(None,2)
newlinea = 'M ' + part[1] + '\n'
outfile.write(newlinea)
infile.close()
outfile.close()
Friday, April 20, 2012
python script to convert plink bed/bim to qtdt format
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment