PYTHON 12
Generate-word-cloud.py Guest on 8th November 2020 07:01:34 AM
  1. #!/usr/bin/env python2
  2. # -*- coding:utf8 -*-
  3. """
  4. A simple Python script to generate a square wordcloud from a file or a bunch of files.
  5. https://github.com/Naereen/generate-word-cloud.py
  6.  
  7. Requires https://github.com/amueller/word_cloud/
  8.  
  9. .. note:: Copyright 2016 Lilian Besson
  10. .. warning:: License GPLv3.
  11.  
  12. ---
  13.  
  14. Examples
  15. --------
  16. $ generate-word-cloud.py --help
  17. Gives help.
  18.  
  19. $ generate-word-cloud.py ./hamlet.txt
  20. Generate a wordcloud from the textfile hamlet.txt, saving to hamlet.png.
  21.  
  22. $ generate-word-cloud.py -o wordcloud.png ./*.txt
  23. Generate a wordcloud from all the txt files in the current directory, save it to wordcloud.png.
  24.  
  25. ------
  26.  
  27. .. sidebar:: Last version?
  28.  
  29.   Take a look to the latest version at https://github.com/Naereen/generate-word-cloud.py
  30.  
  31. .. note::
  32.  
  33.   This program is free software: you can redistribute it and/or modify
  34.   it under the terms of the GNU General Public License as published by
  35.   the Free Software Foundation, either version 3 of the License, or
  36.   (at your option) any later version.
  37.  
  38.   generate-word-cloud.py is distributed in the hope that it will be useful,
  39.   but WITHOUT ANY WARRANTY; without even the implied warranty of
  40.   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  41.  
  42.   See the GNU General Public License for more details.
  43.   You should have received a copy of the GNU General Public License v3 along with generate-word-cloud.py.
  44.   If not, see <http://perso.crans.org/besson/LICENSE.html>.
  45. """
  46.  
  47. from __future__ import print_function  # Python 2/3 compatible
  48.  
  49. from sys import exit, argv
  50. import matplotlib.pyplot as plt
  51. from wordcloud import WordCloud
  52. from os import path
  53. # import argparse  # DONE : switch to docopt (https://github.com/docopt/docopt)
  54. from docopt import docopt
  55.  
  56. try:
  57.     try:
  58.         from ansicolortags import printc
  59.     except ImportError as e:
  60.         print("Optional dependancy (ansicolortags) is not available, using regular print function.")
  61.         print("  You can install it with : 'pip install ansicolortags' (or sudo pip)...")
  62.         from ANSIColors import printc
  63. except ImportError:
  64.     printc = print
  65.  
  66. # Options
  67. version = '0.2'
  68. show = False
  69.  
  70.  
  71. def readfiles(filenames):
  72.     """ Return the content of each file, concatenated as one big string.
  73.  
  74.    - Path could be relative or absolute, but nothing fancy is done here.
  75.    """
  76.     text = ""
  77.     # Read the whole text for each file
  78.     for filename in filenames:
  79.         try:
  80.             text += open(filename, 'r').read()
  81.             text += r"\n"
  82.         except Exception as e:
  83.             printc("<ERROR>Error, exception: <reset>{}.".format(e))
  84.             printc("<red>Skipping file <black>'{}'<reset>...".format(filename))
  85.     # return "\n".join(open(filename, 'r').read() for filename in filenames)
  86.     return text
  87.  
  88.  
  89. def generate(text, max_words=200, width=800, height=600):
  90.     """ Generate a word cloud image from the given text (one huge string). """
  91.     # Take relative word frequencies into account, lower max_font_size
  92.     # https://amueller.github.io/word_cloud/generated/wordcloud.WordCloud.html#wordcloud.WordCloud
  93.     max_words = int(max_words) if max_words is not None else  200
  94.     width     = int(width)     if width     is not None else  800
  95.     height    = int(height)    if height    is not None else  600
  96.     wc = WordCloud(max_font_size=40,
  97.                    relative_scaling=.5,
  98.                    max_words=max_words,
  99.                    width=width,
  100.                    height=height
  101.                    )
  102.     return wc.generate(text)
  103.  
  104.  
  105. def makeimage(wordcloud,
  106.               outname='wordcloud.png', title='Word cloud', show=False, force=False):
  107.     """ Display or save the wordcloud as a image. """
  108.     # Display the generated image:
  109.     try:
  110.         # 2. the matplotlib way:
  111.         plt.figure()
  112.         plt.imshow(wordcloud)
  113.         plt.axis("off")
  114.         if title:
  115.             printc("<magenta>Using title<reset> <blue>'{}'<reset>.".format(title))
  116.             plt.title(title)
  117.         if show:
  118.             printc("<green>Showing the generated image...<reset>")
  119.             plt.show()
  120.         else:
  121.             printc("<green>Saving the generated image<reset> to <blue>'{}'<reset>...".format(outname))
  122.             if (not force) and path.exists(outname):
  123.                 erase = raw_input("The outfile '{}' already exists, should I erase it ?  [y/N]".format(outname))
  124.                 if erase == 'y':
  125.                     plt.savefig(outname)
  126.                 else:
  127.                     printc("<magenta>Not erasing it...<reset>")
  128.                     printc("<green>Showing the generated image...<reset>")
  129.                     plt.show()
  130.             else:
  131.                 if force:
  132.                     printc("<WARNING> -f or --force has been used, overwriting the image '{}' <red>without<reset> asking you...".format(outname))
  133.                 plt.savefig(outname)
  134.     except Exception as e:
  135.         printc("<ERROR> Error, exception<reset>: {}".format(e))
  136.         # 1. The pil way (if you don't have matplotlib)
  137.         printc("<WARNING> Something went wrong with matplotlib, switching to PIL backend... (just showing the image, <red>not<reset> saving it!)")
  138.         image = wordcloud.to_image()
  139.         image.show()
  140.     return True
  141.  
  142.  
  143. #: Help for the cli
  144. full_docopt_text = """
  145. generate-word-cloud.py
  146.  
  147. Usage:
  148.  generate-word-cloud.py [-s | --show] [-f | --force] [-o OUTFILE | --outfile=OUTFILE]
  149.                         [-t TITLE | --title=TITLE] [-m MAX | --max=MAX]
  150.                         [-w WIDTH | --width=WIDTH] [-H HEIGHT | --height=HEIGHT]
  151.                         INFILE...
  152.  generate-word-cloud.py [-h | --help]
  153.  generate-word-cloud.py [-v | --version]
  154.  
  155. Options:
  156.  -h --help            Show this help message and exit.
  157.  -v --version         Show program's version number and exit.
  158.  -s --show            Show the image but do not save it [default False].
  159.  -f --force           Force to write the image, even if present (default is to ask before overwriting an existing file) [default False].
  160.  -o OUTFILE --outfile=OUTFILE
  161.                       Filename for the generated image [default 'wordcloud.png'].
  162.  -t TITLE --title=TITLE
  163.                       Title for the image [default None].
  164.  -m MAX --max MAX
  165.                       Max number of words to display on the cloud word [default 150].
  166.  -w WIDTH --width WIDTH
  167.                       Width of the generate image [default 400].
  168.  -H HEIGHT --height HEIGHT
  169.                       Height of the generate image [default 300].
  170.  INFILE               A text file to read.
  171.  
  172.  
  173. A simple Python script to generate a (square) wordcloud from a file INFILE (or bunch of files INFILE...).
  174. Requires https://github.com/amueller/word_cloud/ (installable with pip).
  175.  
  176. Examples:
  177. $ generate-word-cloud.py --help
  178. Gives this help.
  179.  
  180. $ generate-word-cloud.py ./hamlet.txt
  181. Generate a wordcloud from the textfile hamlet.txt, saving to 'wordcloud.png' (default).
  182.  
  183. $ generate-word-cloud.py -o mywordcloud.png ./*.txt
  184. Generate a wordcloud from all the txt files in the current directory, save it to 'mywordcloud.png'.
  185.  
  186. Copyright 2016 Lilian Besson (License GPLv3)
  187. generate-word-cloud.py is distributed in the hope that it will be useful,
  188. but WITHOUT ANY WARRANTY; without even the implied warranty of
  189. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  190. """
  191.  
  192.  
  193. def main(argv):
  194.     """ Use the arguments of the command line. """
  195.     # Use the arg parser
  196.     args = docopt(full_docopt_text, argv=argv, version="generate-word-cloud.py v{}".format(version))
  197.     # printc("<magenta>Arguments: {} <reset>".format(args))  # DEBUG
  198.  
  199.     # Read the files
  200.     printc("<green>Reading the files<reset>, from: <blue>{}<reset>.".format(args['INFILE']))
  201.     text = readfiles(args['INFILE'])
  202.     # Decide where to save it
  203.     outname = args['--outfile'] if args['--outfile'] else 'wordcloud.png'
  204.     # Generate the wordcloud
  205.     # print("Making a wordcloud from this text:\n", text)  # DEBUG
  206.     wordcloud = generate(text,
  207.                          max_words=args['--max'],
  208.                          width=args['--width'],
  209.                          height=args['--height']
  210.                          )
  211.     # Finally, saving the image
  212.     printc("<green>Making the image<reset> and saving it to <blue>{}<reset>.".format(outname))
  213.     makeimage(wordcloud,
  214.               outname=outname, title=args['--title'],
  215.               force=args['--force'], show=args['--show']
  216.               )
  217.     return 0
  218.  
  219.  
  220. if __name__ == "__main__":
  221.     exit(int(main(argv[1:])))
  222.  
  223. # End of generate-word-cloud.py

Paste is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Recent Pastes

Raw Paste

Login or Register to edit or fork this paste. It's free.