#!/bin/env python2.6 """ This is our first BME 205 program illustrating some basic file I/O, printing, conditionals, and data structures. This program reads 6-column BED data and outputs a list of genes ranked by score (from lowest to highest). Input data can either be piped to the program: cat sample_data.bed | ./BME_205_Python_lecture_firstProg.py or name of file containing input data can be given as an argument: ./BME_205_Python_lecture_firstProg.py sample_data.bed Blame Andrew Uzilov (auzilov@ucsc.edu), Fall 2009. """ import sys # Write to STDERR using usual print syntax. print >> sys.stderr, 'We are running this Python version:', sys.version_info # Write to STDERR using string formatting operator (%). print >> sys.stderr, 'Arguments are: %s' % sys.argv # Read data from file or STDIN? if len (sys.argv) == 2: # Read from file; open file for reading. infile = open (sys.argv[1], 'r') elif len (sys.argv) == 1: # Read from STDIN, which is always "open for reading". infile = sys.stdin else: # Oops, incorrect command-line invocation. # Print usage and scare the user with an error message. print >> sys.stderr, \ __doc__, \ '\nERROR: You gave the program %d args; only 0 or 1 args allowed.' \ % (len (sys.argv) - 1) sys.exit (1) # Read in BED data. genesScores = [] # list of 2-tuples, each tuple is: (gene, score) for line in infile: line = line.rstrip ('\n\r') # strip newline and MS-DOS ^M if line[:5] == 'track': continue # skip 'track' lines fields = line.split() assert len (fields) == 6, 'Not a 6-column BED file' gene = fields[3] score = int (fields[4]) genesScores.append ((gene, score)) # append 2-tuple def compareGenes (a, b): """Comparator function for gene/score tuples. Each input argument is a 2-tuple: (gene, score). """ if a[1] < b[1]: return -1 elif a[1] > b[1]: return 1 else: return 0 # Sort genes by their scores. sortedGenes = sorted (genesScores, compareGenes) # Output sorted gene list. for gene, score in sortedGenes: print "gene %s score %d" % (gene, score)