- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- A module with a simplified version of a word-lexicon holds terms of
- the form: (concept, forms). The concept is a basic word such as
- \"age\" and the forms are all the forms in which the concept occurs in
- a text, such as \"aging\", \"ages\" and so forth.
- """
- def new_lexicon():
- """
- Creates an empty lexicon.
- """
- return {}
- def add_term(concept, lexicon):
- """
- Add a term to the lexicon.
- @type concept: string
- @param concept: the word that is stored as a concept
- @type lexicon: a lexicon (created by this module)
- @param lexicon: the lexicon where the concept is stored
- """
- lexicon[concept] = (concept, [])
- def add_form(form, concept, lexicon):
- """
- Add a form to the lexicon for given concept
- @type form: string
- @param form: the word that is stored as a new form
- @type concept: string
- @param concept: the concept for wich the new form is its form
- @type lexicon: a lexicon (created by this module)
- @param lexicon: the lexicon where the concept is stored
- """
- term = lexicon[concept]
- term[1].append(form)
- lexicon[form] = term
- def get_term(form, lexicon):
- """
- Return a term (concept plus forms) from the lexicon.
- @type form: string
- @param form: the form
- @type lexicon: a lexicon (created by this module)
- @param lexicon: the lexicon where the concept is stored
- """
- return lexicon[form]
- def get_all_forms(lexicon):
- """
- Return all forms occurring in the lexicon.
- @type lexicon: a lexicon (created by this module)
- @param lexicon: the lexicon where the concept is stored
- """
- return lexicon.keys()
- def get_frequence_table(lexicon):
- """
- Return a frequency table over concepts in the lexicon.
- Number of occurrences are returned as integer values in a dictionary.
- @type lexicon: a lexicon (created by this module)
- @param lexicon: the lexicon where the concept is stored
- @return: a dictionary with concept as keys and an integer as value.
- """
- freq_table = {}
- for term in lexicon.values():
- concept = term[0]
- if not concept in freq_table.keys():
- freq_table[concept] = 1
- else:
- freq_table[concept] = freq_table[concept] + 1
- return freq_table
- # Test code
- # Examples in swedish
- if __name__ == '__main__':
- lexicon = new_lexicon()
- add_term('ro', lexicon)
- add_form('rodde', 'ro', lexicon)
- add_form('ror', 'ro', lexicon)
- add_form('roende', 'ro', lexicon)
- print "Hela lexikonet: ", lexicon
- add_term('bygga', lexicon)
- add_form('byggde', 'bygga', lexicon)
- add_form('bygger', 'bygga', lexicon)
- add_form('byggare', 'bygga', lexicon)
- add_form('byggande', 'bygga', lexicon)
- print "Term för 'ror': " " + str(get_term('ror', lexicon))
- print "Term för 'rodde': " + str(get_term('rodde', lexicon))
- print "Alla former i lexikonet:" + str(get_all_forms(lexicon))
- print "Frekvenstabell: " + str(get_frequence_table(lexic