# Python script to take CRSP output with the fields DATE, TICKER and RET and return # a .csv of the form DATE,TICKER1,TICKER2,...,TICKERn with returns grouped by date. # Missing or non-numeric values are represented as NA so that R can deal with them gracefully. # Note: You'll want to include the TICKER in your CRSP down load and you'll want the # "period returns" --- these are the ones that handle dividends correctly. import re; import getopt; import sys; usage = "Usage: convert.py -i filepath -o filepath\n"; inputFilePath = ""; outputFilePath = ""; try: opts, args = getopt.getopt(sys.argv[1:], "i:o:"); for opt, arg in opts: if opt == "-i": inputFilePath = arg; elif opt == "-o": outputFilePath = arg; except getopt.GetoptError: print usage; if inputFilePath == "" or outputFilePath == "": print usage; inputFile = None; try: inputFile = open(inputFilePath, "r"); except IOError, (errno, strerror): print "I/O error(%s) attempting to open %s: %s" % (errno, inputFilePath, strerror); exit(0); map = {}; fields = inputFile.readline().split(","); for i in range(len(fields)): field = fields[i].replace("\n", ""); key = ""; if field == "DATE": key = "DATE"; elif field == "TICKER": key = "TICKER"; elif field == "RET": key = "RET"; if key != "": map[key] = i; if not "DATE" in map or not "TICKER" in map or not "RET" in map: print inputFilePath, "is missing at least one of DATE, TICKER and RET." exit(0); database = {}; for line in inputFile: values = line.split(","); date = values[map["DATE"]].replace("\n", ""); ticker = values[map["TICKER"]].replace("\n", ""); ret = values[map["RET"]].replace("\n", ""); if re.compile("-?\d+(\.\d+)?").match(values[3]): if not date in database: database[date] = {}; database[date][ticker] = ret; inputFile.close(); dates = database.keys(); dates.sort(); tickers = []; for date in dates: tickers = tickers + database[date].keys(); tickers = list(set(tickers)); tickers.sort(); outputFile = None; try: outputFile = open(outputFilePath, "w"); except IOError, (errno, strerror): print "I/O error(%s) attempting to open %s: %s" % (errno, outputFilePath, strerror); exit(0); firstLine = "DATE"; for ticker in tickers: firstLine = firstLine + "," + ticker; firstLine = firstLine + "\n"; outputFile.write(firstLine); for date in dates: currentLine = date; for ticker in tickers: if not ticker in database[date]: currentLine = currentLine + "," + "NA"; else: currentLine = currentLine + "," + database[date][ticker]; currentLine = currentLine + "\n"; outputFile.write(currentLine); outputFile.close();