#!/usr/bin/python
import argparse
parser = argparse.ArgumentParser(description='Cleanup Mapleleaves program tables')
#parser.add_argument('inputFile', metavar='1', type=string
#parser.add_argument('inputFile', help='original table snippet from Gnumeric', dest='inputFile')
parser.add_argument('inputFile', help='original table snippet from Gnumeric')
parser.add_argument('outputFile', help='new file with clean html')
args = parser.parse_args()
#print vars(args)
#argparse.Namespace(origFile='inputFile')
print 'Sanitising {0}'.format(args.inputFile)
print 'Writing to {0}'.format(args.outputFile)
#origFile = open(args['inputFile'], 'r')
origFile = open(args.inputFile, 'r')
cleanFile = open(args.outputFile, 'w')
#thisLine = origFile.readline()
#print thisLine
inHeader = False
for thisLine in origFile.readlines():
thisLine = thisLine.strip()
# regular strippage
thisLine = thisLine.replace(' font-size:9pt;', '').replace(' font-size:10pt;', '').replace(' font-size:11pt;', '').replace(' style=""', '').replace(' valign="bottom"', '').replace(' ', ' ').replace('', '')
thisLine = thisLine.replace('', '').replace('', '').replace('', '').replace('
| ', ' | ')
lineSoFar = thisLine
# if it is a table header
thisLine = thisLine.replace('Datum | ', 'Datum | ')
thisLine = thisLine.replace('Tijd | ', 'Tijd | ')
thisLine = thisLine.replace('Tijd | ', 'Tijd | ')
thisLine = thisLine.replace('Veld | ', 'Veld | ')
thisLine = thisLine.replace('Poule | ', 'Poule | ')
thisLine = thisLine.replace('Code | ', 'Code | ')
thisLine = thisLine.replace('Team Thuis | ', 'Team Thuis | ')
thisLine = thisLine.replace('Team Uit | ', 'Team Uit | ')
thisLine = thisLine.replace('Plaats/Sporthal | ', 'Plaats/Sporthal | ')
thisLine = thisLine.replace('Scheidsrechters | ', 'Scheidsrechters | ')
thisLine = thisLine.replace('Schrijvers | ', 'Schrijvers | ')
thisLine = thisLine.replace('Zaaldienst | ', 'Zaaldienst | ')
thisLine = thisLine.replace('Vertrektijd | ', 'Vertrektijd | ')
thisLine = thisLine.replace('Veld | ', 'Veld | ')
thisLine = thisLine.replace('Code | ', 'Code | ')
thisLine = thisLine.replace('Thuis | ', 'Thuis | ')
thisLine = thisLine.replace('Uit | ', 'Uit | ')
thisLine = thisLine.replace('Hal | ', 'Hal | ')
thisLine = thisLine.replace('Adres | ', 'Adres | ')
thisLine = thisLine.replace('postcode | ', 'Postcode | ')
thisLine = thisLine.replace('Plaats | ', 'Plaats | ')
thisLine = thisLine.replace('Vertrektijd | ', 'Vertrektijd | ')
thisLine = thisLine.replace('Scheidsrechter | ', 'Scheidsrechter | ')
thisLine = thisLine.replace('Zaaldienst | ', 'Zaaldienst | ')
if (thisLine != lineSoFar):
inHeader = True
if (inHeader):
thisLine = thisLine.replace(' | ', ' | ')
if thisLine == '':
inHeader = False
#print thisLine
cleanFile.write(thisLine + '\n')
origFile.close()
cleanFile.close()