Source code for cwordtm.pivot

# pivot.py
#    
# Show a pivot table for a precribed range of Scripture
#
# Copyright (c) 2025 CWordTM Project 
# Author: Johnny Cheng <drjohnnycheng@gmail.com>
#
# Created: 24-Jun-2022
# Updated: 16-Jun-2024 (0.6.4), 25-Jan-2025, 1-Feb-2025 (0.7.4)
#
# URL: <https://github.com/drjohnnycheng/cwordtm.git>
# For license information, see LICENSE.TXT

import pandas as pd


[docs] def stat(df, chi=False): """Returns a pivot table from the DataFrame 'df' storing the input Scripture, with columns 'book', 'book_no', 'chapter', 'verse', 'text', 'testament', 'category', 'cat', and 'cat_no'. :param df: The DataFrame storing the input Scripture, default to None :type df: pandas.DataFrame :param chi: If the value is True, assume the input text is in Chinese, otherwise, the input text is in English, default to False :type chi: bool, optional :return: The pivot table of the input Scripture grouped by category ('cat_no') :rtype: pandas.DataFrame """ stat_df = pd.pivot_table(df, index = ['book_no', 'book', 'category', 'cat_no'], values = ['chapter', 'verse', 'text'], aggfunc = {'chapter': lambda ch: len(ch.unique()), 'verse': 'count', 'text': lambda ts: sum([len(t if chi else t.split()) for t in ts])}) stat_df = stat_df[['chapter', 'verse', 'text']].sort_index() stat_df2 = stat_df.groupby('cat_no').apply(lambda sub: sub.pivot_table( index = ['category', 'book_no', 'book'], values = ['chapter', 'verse', 'text'], aggfunc = {'chapter': 'sum', 'verse': 'sum', 'text': 'sum'}, margins = True, margins_name = 'Sub-Total')) stat_df2.loc[('', '', 'Total', '')] = stat_df2.sum() // 2 stat_df2.index = stat_df2.index.droplevel(0) stat_df2.fillna('', inplace=True) stat_df2 = stat_df2[['chapter', 'verse', 'text']] # print("Book category information can be shown by invoking 'util.bible_cat_info()'") return stat_df2
[docs] def pivot(df, value='text', category='category'): """Returns a pivot table from the DataFrame 'df' storing the input documents, grouped by the prescribed column. :param df: The DataFrame storing the input documents, default to None :type df: pandas.DataFrame :param value: The column to be grouped, default to 'text' :type value: str, optional :param category: The column to be the group-by column, default to 'category' :type category: str, optional :return: The pivot table of the input documents grouped by the prescribed column :rtype: pandas.DataFrame """ if category is None or not category in df.columns: print("No valid column has been specified!") return stat_df = df.pivot_table(values=value, index=category, aggfunc='count', margins=True, margins_name='Total' ) return stat_df