#!/usr/bin/python import argparse parser = argparse.ArgumentParser(description='Cleanup Mapleleaves program tables') #parser.add_argument('inputFile', metavar='1', type=string #parser.add_argument('inputFile', help='original table snippet from Gnumeric', dest='inputFile') parser.add_argument('inputFile', help='original table snippet from Gnumeric') parser.add_argument('outputFile', help='new file with clean html') args = parser.parse_args() #print vars(args) #argparse.Namespace(origFile='inputFile') print 'Sanitising {0}'.format(args.inputFile) print 'Writing to {0}'.format(args.outputFile) #origFile = open(args['inputFile'], 'r') origFile = open(args.inputFile, 'r') cleanFile = open(args.outputFile, 'w') #thisLine = origFile.readline() #print thisLine inHeader = False for thisLine in origFile.readlines(): thisLine = thisLine.strip() # regular strippage thisLine = thisLine.replace(' font-size:9pt;', '').replace(' font-size:10pt;', '').replace(' font-size:11pt;', '').replace(' style=""', '').replace(' valign="bottom"', '').replace(' ', ' ').replace('', '') thisLine = thisLine.replace('', '').replace('', '').replace('', '').replace('', ' ') lineSoFar = thisLine # if it is a table header thisLine = thisLine.replace('Datum', 'Datum') thisLine = thisLine.replace('Tijd', 'Tijd') thisLine = thisLine.replace('Tijd', 'Tijd') thisLine = thisLine.replace('Veld', 'Veld') thisLine = thisLine.replace('Poule', 'Poule') thisLine = thisLine.replace('Code', 'Code') thisLine = thisLine.replace('Team Thuis', 'Team Thuis') thisLine = thisLine.replace('Team Uit', 'Team Uit') thisLine = thisLine.replace('Plaats/Sporthal', 'Plaats/Sporthal') thisLine = thisLine.replace('Scheidsrechters', 'Scheidsrechters') thisLine = thisLine.replace('Schrijvers', 'Schrijvers') thisLine = thisLine.replace('Zaaldienst', 'Zaaldienst') thisLine = thisLine.replace('Vertrektijd', 'Vertrektijd') thisLine = thisLine.replace('Veld', 'Veld') thisLine = thisLine.replace('Code', 'Code') thisLine = thisLine.replace('Thuis', 'Thuis') thisLine = thisLine.replace('Uit', 'Uit') thisLine = thisLine.replace('Hal', 'Hal') thisLine = thisLine.replace('Adres', 'Adres') thisLine = thisLine.replace('postcode', 'Postcode') thisLine = thisLine.replace('Plaats', 'Plaats') thisLine = thisLine.replace('Vertrektijd', 'Vertrektijd') thisLine = thisLine.replace('Scheidsrechter', 'Scheidsrechter') thisLine = thisLine.replace('Zaaldienst', 'Zaaldienst') if (thisLine != lineSoFar): inHeader = True if (inHeader): thisLine = thisLine.replace(' ', ' ') if thisLine == '': inHeader = False #print thisLine cleanFile.write(thisLine + '\n') origFile.close() cleanFile.close()