20241108: Scrabblewoordenboek as source

This commit is contained in:
2024-11-19 15:17:21 +01:00
parent c0c0ecb28b
commit 49e40201dc

View File

@@ -12,6 +12,10 @@ with open('wikiwoordenboek_basiswoorden.lst', 'r', encoding='utf-8') as wordfile
wikiwoorden_words = wordfile.readlines() wikiwoorden_words = wordfile.readlines()
print(f'wikiwoorden basic list contains {len(wikiwoorden_words)} words') print(f'wikiwoorden basic list contains {len(wikiwoorden_words)} words')
with open('scrabblewoorden.txt', 'r', encoding='utf-8') as wordfile:
scrabble_words = wordfile.readlines()
print(f'scrabblewoorden list contains {len(scrabble_words)} words')
with open('basiswoorden-gekeurd.txt', 'r', encoding='utf-8') as wordfile: with open('basiswoorden-gekeurd.txt', 'r', encoding='utf-8') as wordfile:
basis_words = wordfile.readlines() basis_words = wordfile.readlines()
print(f'opentaal basic list contains {len(basis_words)} words') print(f'opentaal basic list contains {len(basis_words)} words')
@@ -33,7 +37,7 @@ print()
all_words_count = 0 all_words_count = 0
dictionary_list = [] dictionary_list = []
result_list = [] result_list = []
for word in wikiwoorden_words + basis_words + flexies_words: for word in wikiwoorden_words + scrabble_words + basis_words + flexies_words:
all_words_count += 1 all_words_count += 1
word = word.strip() word = word.strip()
if word.isalpha() and word.lower() == word: if word.isalpha() and word.lower() == word:
@@ -47,7 +51,8 @@ if USE_OPENTAAL:
# Use basis_words if you want to use the big but difficult OpenTaal list # Use basis_words if you want to use the big but difficult OpenTaal list
source_words = basis_words source_words = basis_words
else: else:
source_words = wikiwoorden_words # Combine the basic words and the Scrabble word lists
source_words = wikiwoorden_words + scrabble_words
for word in source_words: for word in source_words:
word = word.strip() word = word.strip()
@@ -63,7 +68,7 @@ if USE_OPENTAAL:
filtered_set = nl_set.difference(en_set) filtered_set = nl_set.difference(en_set)
filtered_list = sorted(list(filtered_set), key=str.casefold) filtered_list = sorted(list(filtered_set), key=str.casefold)
else: else:
filtered_list = sorted(list(wikiwoorden_words), key=str.casefold) filtered_list = sorted(list(set(result_list)), key=str.casefold)
print(f'words total: {all_words_count}') print(f'words total: {all_words_count}')
print(f'words in dictionary: {len(dictionary_list)}') print(f'words in dictionary: {len(dictionary_list)}')