import string import re import random import sys CHAIN_LENGTH = int(raw_input("Chain Length: ")) MIN_WORD_LEN = int(raw_input("Mininum Word Length: ")) MAX_WORD_LEN = int(raw_input("Maximum Word Length: ")) PREFIX = str(raw_input("Enter letters to start the word with (blank for random): ")) print "Parsing corpus..." def strings(len): if len == 0: return [] if len == 1: return list(string.lowercase) return elem_add(strings(len-1), list(string.lowercase)) def elem_add(list1, list2): ret = [] for x in list1: for y in list2: ret.append(x + y) return ret dict = {} dict = dict.fromkeys(strings(CHAIN_LENGTH), 0) file = file("comc.txt") text = re.sub("[^a-zA-Z\s]", "", file.read()) text = text.lower() words = text.split() for word in words: begin = -1 while 1: begin = begin + 1 end = begin + CHAIN_LENGTH if end > len(word): break dict[word[begin:end]] += 1 sum = sum(dict.values()) for k in dict.keys(): dict[k] = float(dict[k]) / float(sum) # make a random word! print "Press enter for random word." while 1: raw_input() word_len = random.randint(MIN_WORD_LEN, MAX_WORD_LEN) if PREFIX == "": start = random.choice(list(string.lowercase)) else: start = PREFIX letters = list(start) while len(letters) < word_len: total = 0 for k, v in dict.iteritems(): if k[0] == letters[-1]: total += v running_sum = 0 rand = random.uniform(0, total) for k, v in dict.iteritems(): if k[0] == letters[-1]: running_sum += v if running_sum > rand: letters.extend(list(k)[1:]) break print "".join(letters)