Source code for cwordtm.pivot

# pivot.py
#    
# Show a pivot table for a precribed range of Scripture
#
# Copyright (c) 2025 CWordTM Project 
# Author: Johnny Cheng <drjohnnycheng@gmail.com>
#
# Created: 24-Jun-2022
# Updated: 16-Jun-2024 (0.6.4), 25-Jan-2025, 1-Feb-2025 (0.7.4)
#
# URL: <https://github.com/drjohnnycheng/cwordtm.git>
# For license information, see LICENSE.TXT

import pandas as pd



[docs]
def stat(df, chi=False):
    """Returns a pivot table from the DataFrame 'df' storing the input Scripture,
    with columns 'book', 'book_no', 'chapter', 'verse', 'text', 'testament',
    'category', 'cat', and 'cat_no'.

    :param df: The DataFrame storing the input Scripture, default to None
    :type df: pandas.DataFrame
    :param chi: If the value is True, assume the input text is in Chinese,
        otherwise, the input text is in English, default to False
    :type chi: bool, optional
    :return: The pivot table of the input Scripture grouped by category ('cat_no')
    :rtype: pandas.DataFrame
    """
 
    stat_df = pd.pivot_table(df, index = ['book_no', 'book', 'category', 'cat_no'],
                          values = ['chapter', 'verse', 'text'],
                          aggfunc = {'chapter': lambda ch: len(ch.unique()),
                                     'verse': 'count',
                                     'text': lambda ts: sum([len(t if chi else t.split()) for t in ts])})

    stat_df = stat_df[['chapter', 'verse', 'text']].sort_index()

    stat_df2 = stat_df.groupby('cat_no').apply(lambda sub: sub.pivot_table(
                        index = ['category', 'book_no', 'book'],
                        values = ['chapter', 'verse', 'text'],
                        aggfunc = {'chapter': 'sum',
                                   'verse': 'sum',
                                   'text': 'sum'},
                        margins = True,
                        margins_name = 'Sub-Total'))

    stat_df2.loc[('', '', 'Total', '')] = stat_df2.sum() // 2
    stat_df2.index = stat_df2.index.droplevel(0)
    stat_df2.fillna('', inplace=True)
    stat_df2 = stat_df2[['chapter', 'verse', 'text']]

    # print("Book category information can be shown by invoking 'util.bible_cat_info()'")

    return stat_df2




[docs]
def pivot(df, value='text', category='category'):
    """Returns a pivot table from the DataFrame 'df' storing the input documents,
    grouped by the prescribed column.

    :param df: The DataFrame storing the input documents, default to None
    :type df: pandas.DataFrame
    :param value: The column to be grouped, default to 'text'
    :type value: str, optional
    :param category: The column to be the group-by column, default to 'category'
    :type category: str, optional
    :return: The pivot table of the input documents grouped by the prescribed column
    :rtype: pandas.DataFrame
    """
 
    if category is None or not category in df.columns:
        print("No valid column has been specified!")
        return

    stat_df = df.pivot_table(values=value,
                             index=category,
                             aggfunc='count',
                             margins=True,
                             margins_name='Total'
                            )
    return stat_df