diff --git a/bin/sanitise_mapleleaves_programma.py b/bin/sanitise_mapleleaves_programma.py deleted file mode 100755 index e6cc5fc..0000000 --- a/bin/sanitise_mapleleaves_programma.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/python -import argparse - -parser = argparse.ArgumentParser(description='Cleanup Mapleleaves program tables') - -#parser.add_argument('inputFile', metavar='1', type=string -#parser.add_argument('inputFile', help='original table snippet from Gnumeric', dest='inputFile') -parser.add_argument('inputFile', help='original table snippet from Gnumeric') -parser.add_argument('outputFile', help='new file with clean html') - -args = parser.parse_args() -#print vars(args) -#argparse.Namespace(origFile='inputFile') - -print 'Sanitising {0}'.format(args.inputFile) -print 'Writing to {0}'.format(args.outputFile) - -#origFile = open(args['inputFile'], 'r') -origFile = open(args.inputFile, 'r') -cleanFile = open(args.outputFile, 'w') - -#thisLine = origFile.readline() -#print thisLine - -inHeader = False - -for thisLine in origFile.readlines(): - thisLine = thisLine.strip() - # regular strippage - thisLine = thisLine.replace(' font-size:9pt;', '').replace(' font-size:10pt;', '').replace(' font-size:11pt;', '').replace(' style=""', '').replace(' valign="bottom"', '').replace(' ', ' ').replace('', '') - thisLine = thisLine.replace('', '').replace('', '').replace('', '').replace('', ' ') - - lineSoFar = thisLine - - # if it is a table header - thisLine = thisLine.replace('Datum', 'Datum') - thisLine = thisLine.replace('Tijd', 'Tijd') - thisLine = thisLine.replace('Tijd', 'Tijd') - thisLine = thisLine.replace('Veld', 'Veld') - thisLine = thisLine.replace('Poule', 'Poule') - thisLine = thisLine.replace('Code', 'Code') - thisLine = thisLine.replace('Team Thuis', 'Team Thuis') - thisLine = thisLine.replace('Team Uit', 'Team Uit') - thisLine = thisLine.replace('Plaats/Sporthal', 'Plaats/Sporthal') - thisLine = thisLine.replace('Scheidsrechters', 'Scheidsrechters') - thisLine = thisLine.replace('Schrijvers', 'Schrijvers') - thisLine = thisLine.replace('Zaaldienst', 'Zaaldienst') - thisLine = thisLine.replace('Vertrektijd', 'Vertrektijd') - - thisLine = thisLine.replace('Veld', 'Veld') - thisLine = thisLine.replace('Code', 'Code') - thisLine = thisLine.replace('Thuis', 'Thuis') - thisLine = thisLine.replace('Uit', 'Uit') - thisLine = thisLine.replace('Hal', 'Hal') - thisLine = thisLine.replace('Adres', 'Adres') - thisLine = thisLine.replace('postcode', 'Postcode') - thisLine = thisLine.replace('Plaats', 'Plaats') - thisLine = thisLine.replace('Vertrektijd', 'Vertrektijd') - - thisLine = thisLine.replace('Scheidsrechter', 'Scheidsrechter') - thisLine = thisLine.replace('Zaaldienst', 'Zaaldienst') - - if (thisLine != lineSoFar): - inHeader = True - - if (inHeader): - thisLine = thisLine.replace(' ', ' ') - if thisLine == '': - inHeader = False - - #print thisLine - cleanFile.write(thisLine + '\n') - -origFile.close() -cleanFile.close()