> prompt > mget * .
Tuesday, July 24, 2012
Friday, April 20, 2012
python script to convert plink bed/bim to qtdt format
import os sdir = '/hapmap/' basebedname = 'hapmap3' ddir = '/erlotinib/' temponame = 'tempo' phenoname = 'erlotinib' os.system('plink --bfile ' + sdir + basebedname + ' --recode12 --out ' + ddir + temponame ) ## join tempo.ped with phenodata.txt ## and generate pheno.ped file shortfilename = ddir + "phenodata.txt" ## FID,IID,dad,mom,sex,pheno1,... longfilename = ddir + temponame + ".ped" ## FID,IID,dad,mom,sex,pheno,snp1,... outfilename = ddir + phenoname + ".ped" index = {} shortfile= open( shortfilename, "r" ) for row in shortfile: part = row.split(None,2) iid = part[1] ## IID is the second column in phenofile index[iid] = row.rstrip() ## rstrip gets rid of \n and other white spaces shortfile.close() outfile = open( outfilename, "w") longfile= open( longfilename, "r" ) for row in longfile: part = row.split(None,6) iid = part[1] restofline = part[6] if iid in index: pheno = index[iid] outfile.write( pheno + ' ' + restofline) longfile.close() outfile.close() ## generate pheno.dat file infilename = ddir + temponame + '.map' outfilename = ddir + phenoname + '.dat' infile = open(infilename,'r') outfile = open(outfilename,'w') ## write covariates and phenotype names outfile.write('C igrowth\n') outfile.write('T logAUC\n') ## write one row per snp for linea in infile: part = linea.split(None,2) newlinea = 'M ' + part[1] + '\n' outfile.write(newlinea) infile.close() outfile.close()
Friday, April 13, 2012
create new theme for ggplot
from: http://sape.inf.usi.ch/quick-reference/ggplot2/themes theme_complete_bw <- function(base_size = 12) { structure(list( axis.line = theme_blank(), axis.text.x = theme_text(size = base_size * 0.8 , lineheight = 0.9, colour = "black", vjust = 1), axis.text.y = theme_text(size = base_size * 0.8, lineheight = 0.9, colour = "black", hjust = 1), axis.ticks = theme_segment(colour = "black"), axis.title.x = theme_text(size = base_size, vjust = 0.5), axis.title.y = theme_text(size = base_size, angle = 90, vjust = 0.5), axis.ticks.length = unit(0.15, "cm"), axis.ticks.margin = unit(0.1, "cm"), legend.background = theme_rect(colour=NA), legend.key = theme_rect(fill = NA, colour = "black", size = 0.25), legend.key.size = unit(1.2, "lines"), legend.text = theme_text(size = base_size * 0.8), legend.title = theme_text(size = base_size * 0.8, face = "bold", hjust = 0), legend.position = "right", panel.background = theme_rect(fill = NA, colour = "black", size = 0.25), panel.border = theme_blank(), panel.grid.major = theme_line(colour = "black", size = 0.05), panel.grid.minor = theme_line(colour = "black", size = 0.05), panel.margin = unit(0.25, "lines"), strip.background = theme_rect(fill = NA, colour = NA), strip.text.x = theme_text(colour = "black", size = base_size * 0.8), strip.text.y = theme_text(colour = "black", size = base_size * 0.8, angle = -90), plot.background = theme_rect(colour = NA, fill = "white"), plot.title = theme_text(size = base_size * 1.2), plot.margin = unit(c(1, 1, 0.5, 0.5), "lines") ), class = "options") }
Thursday, April 12, 2012
# cdf: cd's to frontmost window of Finder (by Yarko)
copy the following in ~/.bash_profile # cdf: cd's to frontmost window of Finder cdf () { currFolderPath=$( /usr/bin/osascript <<" EOT" tell application "Finder" try set currFolder to (folder of the front window as alias) on error set currFolder to (path to desktop folder as alias) end try POSIX path of currFolder end tell EOT ) # ignore the cd if it's my desktop if [ "$currFolderPath" != "$HOME/Desktop/" ] then echo "cd to \"$currFolderPath\"" cd "$currFolderPath" else echo "no finder path to cd to..." fi }
Friday, February 3, 2012
Thursday, February 2, 2012
python script extract variants from large file based on smaller file
#small script to extract variants from a large file based on a smaller file (by Vasa) largeChromosomes = {} largePositions = {} largeFile = open('ref1k.rs.snps.txt','r') for line in largeFile: line = line.split('\t') chrm = line [0] rs = line[1] pos = line[2] largeChromosomes[rs] = chrm largePositions[rs] = pos smallFile = open('reck.txt','r') fileOut = open('reck.1kpositions.txt','w') for line in smallFile: line = line.split('\t') rs = line[1] largeChrom = largeChromosomes[rs] largePos = largePositions[rs] lineOut = ' '.join( [rs, largeChrom, largePos] ) print >> fileOut, lineOut Below is a more stylish script that does the same: create a file named select.py, make it excutable and run > python select.py -s [name|number] selector_file -c [name|number] large_file -o output_file #!/usr/bin/env python ### # March 29th, 2011 # Yarko Tymciurak # yarkot@uchicago.edu # # Research Computing Group # Biostatistics Lab # University of Chicago #### ''' Given two files, select (output) lines of the second (data) file which contain one of the data items in the first (selector) file. ''' from __future__ import print_function from collections import namedtuple from optparse import OptionParser import sys if sys.version_info[0] < 3 and sys.version_info[0] == 2 and sys.version_info[1] < 6: print( 'This program needs a minimum of Python version 2.6 to run. You are using:\n%s' % sys.version, file=sys.stderr) exit(1) LINEEND = '\r\n' # for rstrip # FIELDSEP = None # for split error_log = sys.stderr def main(): useage = '''useage: %prog [-s [name|number] selector_file [-c [name|number] data_file [-o output_file]" Given two input files, the values of a specified column of a selector file will be used to select output. Data in the specified "compare" column of the data_file will be checked against the selector data. If a compare data item from the data_file exists in the selector set, then that data_file row (line) will be output. You may also specify an output file. - If a field name is given, the first line of the file is assumed to contain field names. - If a field number is given, the n-th field is used (left-to-right, beginning with 1); all lines are assumed to be data - Fields are assumed to be TAB separated. example: % %prog -s IID relationships.txt -c 2 some.ped -o filtered.ped This will use the "IID" column from relationships. Any row in "some.ped" which has column-2 data matching an IID data will be output to "filtered.ped" This is equivalent: % %prog -sIID relationships.txt -c2 some.ped -ofiltered.ped ''' parser = OptionParser(useage) parser.add_option( "-s", "--select-column", dest="select_column", help="column from which to build selection matching data") parser.add_option( "-c", "--compare-column", dest="compare_column", help="column to compare against selection criteria") parser.add_option( "-o", "--output", dest="output", help="output file for selected data") parser.add_option( "-p", "--separator", dest="FIELDSEP", help="set field separator (each string will be one separator).\n\ default separator: any successive whitespace") (options,args) = parser.parse_args() output = options.output select_column = proper_type(options.select_column) compare_column = proper_type(options.compare_column) compare_asindex = type(compare_column) is int global FIELDSEP FIELDSEP = options.FIELDSEP # I depend on this being 'None' when not set if (select_column is None) or (compare_column is None): error_log.write("Need to specify select column (-s) and compare column (-c).\nTry -h option for help.\n") exit(1) if len(args)<2: parser.print_help() exit(1) fname = args[0] fp = sys.stdin if fname == '-' else must_open(fname) fo = sys.stdout if output is None else must_open(output, 'w') selectors = get_selectors(fp, select_column) if type(select_column) is int: select_column -= 1 # adjust for zero-based array indexing for fname in args[1:]: global n n = 0 fp = sys.stdin if fname == '-' else must_open(fname) line = fp.readline() # assume first line is a header line, if named data field: if not compare_asindex: dat = namedtuple("dat", line.rstrip(LINEEND).split(FIELDSEP)) line = fp.readline() else: maxsplit = compare_column compare_column -= 1 # i.e. 2nd column => row[1] while not (line == ''): n+=1 # split the line fields if compare_asindex: line_element = line.rstrip(LINEEND).split(FIELDSEP, maxsplit) else: drow = dat(*line.rstrip(LINEEND).split(FIELDSEP)) line_element = drow._asdict() # output if the specified data field is in the selector try: if line_element[compare_column] in selectors: fo.write(line) except Exception as e: print( e, "\nMaybe you need to specify an alternate field separator to process your data?", file=sys.stdout); return line = fp.readline() def get_selectors(fp, column): ''' using the global option, select a column from the file, and return a dict (hash performance) with all the values ''' selectors = {} global FIELDSEP # Get column option: # - if numeric, split the fields and use number as an index, # - else use named.tuples, and use the first line as the header asindex = type(column) is int if not asindex: # get the first line, which we expect to be the column header line = fp.readline() hdr = namedtuple("hdr", line.rstrip(LINEEND).split(FIELDSEP)) else: # can only limit splits on numerics maxsplit = column column -= 1 # i.e. 2nd column => row[1] # now go thru the file: line = fp.readline() while not (line==''): # prepare so we index correctly either way: if asindex: row = line.rstrip(LINEEND).split(FIELDSEP, maxsplit) else: drow = hdr(*line.rstrip(LINEEND).split(FIELDSEP)) row = drow._asdict() selectors[row[column]]='' # we only care about key, not value line = fp.readline() return selectors def must_open(f, c='r'): try: fp = open(f,c) except IOError as e: error_log.write('Unable to open %s : %s\n' % (f,e)) exit(2) return fp def proper_type(i): return int(i) if (i and i.isdigit()) else i if __name__=='__main__': main() ##----------
Wednesday, January 11, 2012
Friday, January 6, 2012
latexdiff - mark up differences between latex files
Determine and mark up significant differences between latex files. http://www.ctan.org/tex-archive/support/latexdiff This perl script will generate a latex file with changes marked up (similar to Word) from two latex files latexdiff-so fileold.tex filenew.tex > dif.tex
ps2pdf to all ps files in current directory
for x in $(ls *.ps); do echo $x; ps2pdf $x $(echo $x|sed -e 's/ps$/pdf/');done or more efficiently according to Yarko's comment for x in *.ps; do echo $x; ps2pdf $x ${x/%ps/pdf}; done
Subscribe to:
Posts (Atom)