# viz.py
#
# Show a wordcloud for a precribed range of Scripture
#
# Copyright (c) 2025 CWordTM Project
# Author: Johnny Cheng <drjohnnycheng@gmail.com>
#
# Updated: 4-Jun-2024 (0.6.4), 17-Nov-2024, 15-Jan-2025 (0.7.3)
#
# URL: https://github.com/drjohnnycheng/cwordtm.git
# For license information, see LICENSE.TXT
import numpy as np
import pandas as pd
from importlib_resources import files
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from PIL import Image
from io import BytesIO
from . import util
[docs]
def plot_cloud(wordcloud, figsize, web_app=False):
"""Plot the prepared 'wordcloud'
:param wordcloud: The WordCloud object for plotting, default to None
:type wordcloud: WordCloud object
:param figsize: Size (width, height) of word cloud, default to None
:type figsize: tuple
:param web_app: The flag indicating the function is initiated from a web
application, default to False
:type web_app: bool
:return: The wordcloud figure
:rtype: matplotlib.pyplot.figure
"""
fig = plt.figure(figsize=figsize)
plt.axis("off");
plt.imshow(wordcloud)
if web_app: return fig
[docs]
def show_wordcloud(docs, clean=False, figsize=(12, 8), bg='white', image=0, web_app=False):
"""Prepare and show a wordcloud
:param docs: The collection of documents for preparing a wordcloud,
default to None
:type docs: pandas.DataFrame
:param clean: The flag whether text preprocessing is needed,
default to False
:type clean: bool, optional
:param figsize: Size (width, height) of word cloud, default to (12, 8)
:type figsize: tuple, optional
:param bg: The background color (name) of the wordcloud, default to 'white'
:type bg: str, optional
:param image: The filename of the presribed image as the mask of the wordcloud,
or 1/2/3/4 for using an internal image (heart / disc / triangle / arrow),
default to 0 (No image mask)
:type image: int or str or BytesIO, optional
:param web_app: The flag indicating the function is initiated from a web
application, default to False
:type web_app: bool
:return: The wordcloud figure
:rtype: matplotlib.pyplot.figure
"""
masks = ['heart.jpg', 'disc.jpg', 'triangle.jpg', 'arrow.jpg']
if image == 0:
mask = None
elif image in [1, 2, 3, 4]: # Internal image file
img_file = files('cwordtm.images').joinpath(masks[image-1])
mask = np.array(Image.open(img_file))
elif isinstance(image, str) and len(image) > 0:
mask = np.array(Image.open(image))
elif isinstance(image, BytesIO):
mask = np.array(Image.open(BytesIO(image.getvalue())))
else:
mask = None
if isinstance(docs, pd.DataFrame):
docs = ' '.join(list(docs.text.astype(str)))
elif isinstance(docs, pd.Series):
docs = ' '.join(list(docs.astype(str)))
elif isinstance(docs, list) or isinstance(docs, np.ndarray):
docs = ' '.join(str(doc) for doc in docs)
if clean:
docs = util.preprocess_text(docs)
# wordcloud = WordCloud(background_color=bg, colormap='Set2', mask=mask) \
wordcloud = WordCloud(background_color=bg, colormap='rainbow', mask=mask) \
.generate(docs)
return plot_cloud(wordcloud, figsize=figsize, web_app=web_app)
[docs]
def chi_wordcloud(docs, figsize=(15, 10), bg='white', image=0, web_app=False):
"""Prepare and show a Chinese wordcloud
:param docs: The collection of Chinese documents for preparing a wordcloud,
default to None
:type docs: pandas.DataFrame
:param figsize: Size (width, height) of word cloud, default to (15, 10)
:type figsize: tuple, optional
:param bg: The background color (name) of the wordcloud, default to 'white'
:type bg: str, optional
:param image: The filename of the presribed image as the mask of the wordcloud,
or 1/2/3/4 for using an internal image (heart / disc / triangle / arrow),
default to 0 (No image mask)
:type image: int or str or BytesIO, optional
:param web_app: The flag indicating the function is initiated from a web
application, default to False
:type web_app: bool
:return: The wordcloud figure
:rtype: matplotlib.pyplot.figure
"""
util.set_lang('chi')
diction = util.get_diction(docs)
masks = ['heart.jpg', 'disc.jpg', 'triangle.jpg', 'arrow.jpg']
if image == 0:
mask = None
elif image in [1, 2, 3, 4]: # Internal image file
img_file = files('cwordtm.images').joinpath(masks[image-1])
mask = np.array(Image.open(img_file))
elif isinstance(image, str) and len(image) > 0:
mask = np.array(Image.open(image))
elif isinstance(image, BytesIO):
mask = np.array(Image.open(BytesIO(image.getvalue())))
else:
mask = None
font_file = files('cwordtm.data').joinpath('msyh.ttc')
# wordcloud = WordCloud(background_color=bg, colormap='Set2',
wordcloud = WordCloud(background_color=bg, colormap='rainbow',
mask=mask, font_path=str(font_file)) \
.generate_from_frequencies(frequencies=diction)
return plot_cloud(wordcloud, figsize=figsize, web_app=web_app)