import os sdir = '/hapmap/' basebedname = 'hapmap3' ddir = '/erlotinib/' temponame = 'tempo' phenoname = 'erlotinib' os.system('plink --bfile ' + sdir + basebedname + ' --recode12 --out ' + ddir + temponame ) ## join tempo.ped with phenodata.txt ## and generate pheno.ped file shortfilename = ddir + "phenodata.txt" ## FID,IID,dad,mom,sex,pheno1,... longfilename = ddir + temponame + ".ped" ## FID,IID,dad,mom,sex,pheno,snp1,... outfilename = ddir + phenoname + ".ped" index = {} shortfile= open( shortfilename, "r" ) for row in shortfile: part = row.split(None,2) iid = part[1] ## IID is the second column in phenofile index[iid] = row.rstrip() ## rstrip gets rid of \n and other white spaces shortfile.close() outfile = open( outfilename, "w") longfile= open( longfilename, "r" ) for row in longfile: part = row.split(None,6) iid = part[1] restofline = part[6] if iid in index: pheno = index[iid] outfile.write( pheno + ' ' + restofline) longfile.close() outfile.close() ## generate pheno.dat file infilename = ddir + temponame + '.map' outfilename = ddir + phenoname + '.dat' infile = open(infilename,'r') outfile = open(outfilename,'w') ## write covariates and phenotype names outfile.write('C igrowth\n') outfile.write('T logAUC\n') ## write one row per snp for linea in infile: part = linea.split(None,2) newlinea = 'M ' + part[1] + '\n' outfile.write(newlinea) infile.close() outfile.close()
Friday, April 20, 2012
python script to convert plink bed/bim to qtdt format
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment