ExcelMaster¶

通用 Excel 报告引擎 —— 封装 xlsxwriter，提供光标流式写入 + 图表 + 条件格式。

格式定义 — `ExcelFormatTool`¶

ExcelFormatTool ¶

ExcelFormat ¶

Initialize excel format.

源代码位于： ExcelMaster/ExcelFormatTool.py

class ExcelFormat:
    '''
    Initialize excel format.

    '''

    def __init__(self, filepath):
        '''
        This creates the workbook using the filename specified in
        filename_workbook.

        '''

        self.engine = pd.ExcelWriter(filepath, engine='xlsxwriter')
        self.workbook = self.engine.book
        self.basename = os.path.basename(filepath)
        self.base_filepath = filepath.strip(self.basename)


        ######### Basic Text-related Formatting ###############

        self.dict_cell_format = {}

        self.TEXT_NO_FORMAT = self.workbook.add_format({'font_name': 'Calibri', 'font_size': 11})
        self.dict_cell_format['TEXT_NO_FORMAT'] = self.TEXT_NO_FORMAT
        self.dict_cell_format[''] = self.TEXT_NO_FORMAT

        self.HEADER_1 = self.workbook.add_format({
            'bold': True,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 18,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_1'] = self.HEADER_1
        self.dict_cell_format['#'] = self.HEADER_1

        self.HEADER_2 = self.workbook.add_format({
            'bold': True,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 16,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_2'] = self.HEADER_2
        self.dict_cell_format['##'] = self.HEADER_2

        self.HEADER_3 = self.workbook.add_format({
            'bold': True,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 14,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_3'] = self.HEADER_3
        self.dict_cell_format['###'] = self.HEADER_3

        self.HEADER_4 = self.workbook.add_format({
            'bold': True,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 12,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_4'] = self.HEADER_4
        self.dict_cell_format['####'] = self.HEADER_4

        self.HEADER_1I = self.workbook.add_format({
            'bold': True,
            'underline': True,            
            'font_name': 'Calibri',
            'font_size': 18,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_1I'] = self.HEADER_1I
        self.dict_cell_format['#_'] = self.HEADER_1I

        self.HEADER_2I = self.workbook.add_format({
            'bold': True,
            'underline': True,            
            'font_name': 'Calibri',
            'font_size': 16,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_2I'] = self.HEADER_2I
        self.dict_cell_format['##_'] = self.HEADER_2I

        self.HEADER_3I = self.workbook.add_format({
            'bold': True,
            'underline': True,            
            'font_name': 'Calibri',
            'font_size': 14,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_3I'] = self.HEADER_3I
        self.dict_cell_format['###_'] = self.HEADER_3I

        self.HEADER_4I = self.workbook.add_format({
            'bold': True,
            'underline': True,            
            'font_name': 'Calibri',
            'font_size': 12,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['HEADER_4I'] = self.HEADER_4I
        self.dict_cell_format['####_'] = self.HEADER_4I

        self.TEXT_BOLD = self.workbook.add_format({
            'bold': True,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#000000',
            'align': 'left'
        })
        self.dict_cell_format['TEXT_BOLD'] = self.TEXT_BOLD
        self.dict_cell_format['B'] = self.TEXT_BOLD
        self.dict_cell_format['**'] = self.TEXT_BOLD

        self.TEXT_ITALIC = self.workbook.add_format({
            'bold': False,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#000000',
            'align': 'left',
            'italic': True
        })
        self.dict_cell_format['TEXT_ITALIC'] = self.TEXT_ITALIC
        self.dict_cell_format['I'] = self.TEXT_ITALIC
        self.dict_cell_format['*'] = self.TEXT_ITALIC

        self.TEXT_UNDERLINE = self.workbook.add_format({
            'bold': False,
            'underline': True,            
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#000000',
            'align': 'left',
            'italic': False
        })
        self.dict_cell_format['TEXT_UNDERLINE'] = self.TEXT_UNDERLINE
        self.dict_cell_format['U'] = self.TEXT_UNDERLINE
        self.dict_cell_format['_'] = self.TEXT_UNDERLINE

        self.TEXT_NO_FORMAT_BORDER = self.workbook.add_format({
            'bold': False,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#000000',
            'align': 'left',
            'border': 1
        })
        self.dict_cell_format['TEXT_NO_FORMAT_BORDER'] = self.TEXT_NO_FORMAT_BORDER  
        self.dict_cell_format['BORDER'] = self.TEXT_NO_FORMAT_BORDER
        self.dict_cell_format['----'] = self.TEXT_NO_FORMAT_BORDER

        self.TEXT_BORDER_CENTER = self.workbook.add_format({
            'bold': False,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#000000',
            'align': 'center',
            'valign': 'vcenter',
            'border': 1
        })
        self.dict_cell_format['TEXT_BORDER_CENTER'] = self.TEXT_BORDER_CENTER  
        self.dict_cell_format['BORDER_CENTER'] = self.TEXT_BORDER_CENTER
        self.dict_cell_format['----C'] = self.TEXT_BORDER_CENTER

        self.TEXT_RED = self.workbook.add_format({
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#FF3800',
            'align': 'left'
        })
        self.dict_cell_format['TEXT_RED'] = self.TEXT_RED
        self.dict_cell_format['RED'] = self.TEXT_RED

        self.TEXT_RED_BOLD = self.workbook.add_format({
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#FF3800',
            "bold": True,
            'align': 'left'
        })
        self.dict_cell_format['TEXT_RED_BOLD'] = self.TEXT_RED_BOLD
        self.dict_cell_format['**RED'] = self.TEXT_RED_BOLD

        self.TEXT_BOLD_UNDERLINE = self.workbook.add_format({
            'bold': True,
            'underline': True,
            'font_name': 'Calibri',
            'font_size': 11,
            'align': 'left'
        })
        self.dict_cell_format['TEXT_BOLD_UNDERLINE'] = self.TEXT_BOLD_UNDERLINE
        self.dict_cell_format['BU'] = self.TEXT_BOLD_UNDERLINE
        self.dict_cell_format['**_'] = self.TEXT_BOLD_UNDERLINE

        self.TEXT_ITALIC_UNDERLINE = self.workbook.add_format({
            'bold': False,
            'underline': True,
            'italic': True,
            'font_name': 'Calibri',
            'font_size': 11,
            'align': 'left'
        })
        self.dict_cell_format['TEXT_ITALIC_UNDERLINE'] = self.TEXT_ITALIC_UNDERLINE
        self.dict_cell_format['IU'] = self.TEXT_ITALIC_UNDERLINE
        self.dict_cell_format['*_'] = self.TEXT_ITALIC_UNDERLINE

        self.TEXT_BOLD_ITALIC_UNDERLINE = self.workbook.add_format({
            'bold': True,
            'underline': True,
            'italic': True,
            'font_name': 'Calibri',
            'font_size': 11,
            'align': 'left'
        })
        self.dict_cell_format['TEXT_BOLD_ITALIC_UNDERLINE'] = self.TEXT_BOLD_ITALIC_UNDERLINE
        self.dict_cell_format['BIU'] = self.TEXT_BOLD_ITALIC_UNDERLINE
        self.dict_cell_format['***_'] = self.TEXT_BOLD_ITALIC_UNDERLINE

        self.HEADER_3_BOLD_UNDERLINE = self.workbook.add_format({
            'bold': True,
            'underline': True,
            'font_name': 'Calibri',
            'font_size': 14,
            'align': 'left'
        })
        self.dict_cell_format['HEADER_4_BOLD_UNDERLINE'] = self.HEADER_3_BOLD_UNDERLINE
        self.dict_cell_format['H3_BU'] = self.HEADER_3_BOLD_UNDERLINE
        self.dict_cell_format['####**_'] = self.HEADER_3_BOLD_UNDERLINE

        self.HEADER_4_BOLD_UNDERLINE = self.workbook.add_format({
            'bold': True,
            'underline': True,
            'font_name': 'Calibri',
            'font_size': 12,
            'align': 'left'
        })
        self.dict_cell_format['HEADER_4_BOLD_UNDERLINE'] = self.HEADER_4_BOLD_UNDERLINE
        self.dict_cell_format['H4_BU'] = self.HEADER_4_BOLD_UNDERLINE
        self.dict_cell_format['####**_'] = self.HEADER_4_BOLD_UNDERLINE

        self.TEXT_NO_FORMAT_PERCENTAGE = self.workbook.add_format({
            'bold': False,
            'underline': False,            
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#000000',
            'num_format': '0.0000%',
            'align': 'left',    
            'border': 1
        })
        self.dict_cell_format['TEXT_NO_FORMAT_PERCENTAGE'] = self.TEXT_NO_FORMAT_PERCENTAGE
        self.dict_cell_format['NUM_PERCENTAGE'] = self.TEXT_NO_FORMAT_PERCENTAGE
        self.dict_cell_format['%'] = self.TEXT_NO_FORMAT_PERCENTAGE

        self.TEXT_SECTION_HEADER = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 12,
            'font_color': '#1F497D',
            'bg_color': '#C5D9F1',
            'align': 'center',
            'valign': 'vcenter',
            'text_wrap': False
        })
        self.dict_cell_format['TEXT_SECTION_HEADER'] = self.TEXT_SECTION_HEADER
        self.dict_cell_format['SECTION'] = self.TEXT_SECTION_HEADER

        self.TEXT_ITALIC_10 = self.workbook.add_format({
            'italic': True,
            'font_name': 'Calibri',
            'font_size': 10,
            'align': 'left'
        })
        self.dict_cell_format['TEXT_ITALIC_10'] = self.TEXT_ITALIC_10

        ######### Basic Text-related Formatting (END) ###############


        ######### TABLE HEADER FORMATTING  ##########################

        self.TABLE_ROW_COL_HEADER = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#1F4994',
            'align': 'left',
            'border': 1
        })
        self.dict_cell_format['ROW_COL_HEADER'] = self.TABLE_ROW_COL_HEADER

        self.TABLE_HEADER = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#000000',
            # No 'bg_color' key = no fill. xlsxwriter >= 3.2 rejects None via
            # Color._from_value(None) -> TypeError, so omit the key entirely.
            'align': 'center',
            'valign': 'vcenter',
            'text_wrap': False,
            'border': 1
        })
        self.dict_cell_format['TABLE_HEADER'] = self.TABLE_HEADER
        self.dict_cell_format['HEADER'] = self.TABLE_HEADER

        ######### TABLE HEADER FORMATTING (END)  ##########################

        ######## Colored Title Formatting #################################

        self.BG_BOLD_ORANGE_H4 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 12,
            'font_color': '#000000',
            'bg_color': '#FABF8F',
            'align': 'center',
            'border': 1
        })
        self.dict_cell_format['BG_BOLD_ORANGE_H4'] = self.BG_BOLD_ORANGE_H4
        self.dict_cell_format['ORANGE_H4'] = self.BG_BOLD_ORANGE_H4

        self.BG_BOLD_ORANGE_H3 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 14,
            'font_color': '#000000',
            'bg_color': '#FABF8F',
            'align': 'center',
            'border': 1
        })
        self.dict_cell_format['BG_BOLD_ORANGE_H3'] = self.BG_BOLD_ORANGE_H3
        self.dict_cell_format['ORANGE_H3'] = self.BG_BOLD_ORANGE_H3

        self.BG_BOLD_ORANGE_H2 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 16,
            'font_color': '#000000',
            'bg_color': '#FABF8F',
            'align': 'center',
            'border': 1
        })
        self.dict_cell_format['BG_BOLD_ORANGE_H2'] = self.BG_BOLD_ORANGE_H2
        self.dict_cell_format['ORANGE_H2'] = self.BG_BOLD_ORANGE_H2

        self.BG_BOLD_ORANGE_H1 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 18,
            'font_color': '#000000',
            'bg_color': '#FABF8F',
            'align': 'center',
            'border': 1
        })
        self.dict_cell_format['BG_BOLD_ORANGE_H1'] = self.BG_BOLD_ORANGE_H1
        self.dict_cell_format['ORANGE_H1'] = self.BG_BOLD_ORANGE_H1

        self.BG_FONT_BLUE_H4 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 12,
            'font_color': '#1F497D',
            'bg_color': '#C5D9F1',
            'align': 'center'  ,
            'border': 1
        })
        self.dict_cell_format['BG_FONT_BLUE_H4'] = self.BG_FONT_BLUE_H4
        self.dict_cell_format['BLUE_H4'] = self.BG_FONT_BLUE_H4

        self.BG_FONT_BLUE_H3 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 14,
            'font_color': '#1F497D',
            'bg_color': '#C5D9F1',
            'align': 'center'  ,
            'border': 1
        })
        self.dict_cell_format['BG_FONT_BLUE_H3'] = self.BG_FONT_BLUE_H3
        self.dict_cell_format['BLUE_H3'] = self.BG_FONT_BLUE_H3

        self.BG_FONT_BLUE_H2 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 16,
            'font_color': '#1F497D',
            'bg_color': '#C5D9F1',
            'align': 'center'  ,
            'border': 1
        })
        self.dict_cell_format['BG_FONT_BLUE_H2'] = self.BG_FONT_BLUE_H2
        self.dict_cell_format['BLUE_H2'] = self.BG_FONT_BLUE_H2

        self.BG_FONT_BLUE_H1 = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 18,
            'font_color': '#1F497D',
            'bg_color': '#C5D9F1',
            'align': 'center'  ,
            'border': 1
        })
        self.dict_cell_format['BG_FONT_BLUE_H1'] = self.BG_FONT_BLUE_H1
        self.dict_cell_format['BLUE_H1'] = self.BG_FONT_BLUE_H1

        ########## Colored Title Formatting (END) #################################

        self.SECTION_HEADER_PERCENTAGE = self.workbook.add_format({
            'bold': True,
            'font_name': 'Calibri',
            'font_size': 11,
            'font_color': '#1F497D',
            'bg_color': '#C5D9F1',
            'align': 'left',
            'valign': 'vcenter',
            'text_wrap': True,
            'border': 1,
            'num_format': '0.0000%',
        })
        self.dict_cell_format['SECTION_HEADER_PERCENTAGE'] = self.SECTION_HEADER_PERCENTAGE  
        self.dict_cell_format['SECTION%'] = self.SECTION_HEADER_PERCENTAGE

        self.TEXT_RIGHT_JUSTIFY = self.workbook.add_format({
            'font_name': 'Calibri',
            'font_size': 11,
            'align': 'right'
        })
        self.dict_cell_format['TEXT_RIGHT_JUSTIFY'] = self.TEXT_RIGHT_JUSTIFY    
        self.dict_cell_format['TEXT_RIGHT'] = self.TEXT_RIGHT_JUSTIFY

        ################# Number Formatting #################
        self.NUM_COMMA = self.workbook.add_format({'num_format': '#,##0', 'border': 1})
        self.dict_cell_format['NUM_COMMA'] = self.NUM_COMMA  
        self.dict_cell_format['NUM,'] = self.NUM_COMMA

        self.NUM_PERCENTAGE = self.workbook.add_format({'num_format': "0.0000%", 'border': 1})
        self.dict_cell_format['NUM_PERCENTAGE'] = self.NUM_PERCENTAGE  
        self.dict_cell_format['NUM%.4'] = self.NUM_PERCENTAGE

        self.NUM_PERCENTAGE = self.workbook.add_format({'num_format': "0.000%", 'border': 1})
        self.dict_cell_format['NUM_PERCENTAGE'] = self.NUM_PERCENTAGE  
        self.dict_cell_format['NUM%.3'] = self.NUM_PERCENTAGE

        self.NUM_PERCENTAGE = self.workbook.add_format({'num_format': "0.00%", 'border': 1})
        self.dict_cell_format['NUM_PERCENTAGE'] = self.NUM_PERCENTAGE  
        self.dict_cell_format['NUM%.2'] = self.NUM_PERCENTAGE

        self.NUM_PERCENTAGE = self.workbook.add_format({'num_format': "0.0%", 'border': 1})
        self.dict_cell_format['NUM_PERCENTAGE'] = self.NUM_PERCENTAGE  
        self.dict_cell_format['NUM%.1'] = self.NUM_PERCENTAGE


        ################# Highlight Formatting #################
        self.BG_LIGHT_YELLOW = self.workbook.add_format({"bg_color": "fcfc81"})
        self.dict_cell_format['BG_LIGHT_YELLOW'] = self.BG_LIGHT_YELLOW  
        self.dict_cell_format['YELLOW_BG'] = self.BG_LIGHT_YELLOW

    def add_new_format(self, format_dict, format_name):
        """ Add a new user-defined format. """
        if format_name not in self.dict_cell_format.keys():
            self.dict_cell_format[format_name] = self.workbook.add_format(format_dict)
            return 0
        logger.info("ERROR: Failed to add new format, the format name has already existed!")
        return 1

add_new_format ¶

add_new_format(format_dict, format_name)

Add a new user-defined format.

源代码位于： ExcelMaster/ExcelFormatTool.py

def add_new_format(self, format_dict, format_name):
    """ Add a new user-defined format. """
    if format_name not in self.dict_cell_format.keys():
        self.dict_cell_format[format_name] = self.workbook.add_format(format_dict)
        return 0
    logger.info("ERROR: Failed to add new format, the format name has already existed!")
    return 1

核心引擎 — `ExcelMaster`¶

ExcelMaster ¶

ExcelWorkbook ¶

Bases: ExcelFormat

Write anything to Excel (Workbook-level Operator).

源代码位于： ExcelMaster/ExcelMaster.py

class ExcelWorkbook(ExcelFormat):
    """ Write anything to Excel (Workbook-level Operator). """
    def __init__(self, filepath, verbose = True):
        super().__init__(filepath)
        self.verbose = verbose
        self.ws_dict = {}

    def to_cell_range_text(self, first_row, first_col, last_row, last_col):
        """ To Excel Cell Range. """
        return xl_range(first_row, first_col, last_row, last_col)

    def cell_range_to_loc(self, cell_range_text):
        """ Convert text cell range expression to a list of values. """
        cell_range_list = cell_range_text.split(":")
        res = []
        for x in cell_range_list:
            res += list(xl_cell_to_rowcol(x))
        return res

    def colletter_to_textloc(self, row_index, col_letter):
        """ Append Row Index to the given Column Letter-Formatted Index. """
        col_letter = col_letter if ":" in col_letter else (col_letter + ":" + col_letter)
        output = col_letter.split(":")
        output = ":".join([col + str(row_index) for col in output])
        return output

    def set_color_scale(self, worksheet, cell_range,
                        colors = ("#F8696B", "#FFEB84", "#63BE7B")):
        """ Set Color Scale. """
        if len(colors) == 3:
            f = {
                    "type": "3_color_scale",
                    "min_color": colors[0],
                    "mid_color": colors[1],
                    "max_color": colors[2],
                }
        elif len(colors) == 2:
            f = {
                    "type": "2_color_scale",
                    "min_color": colors[0],
                    "max_color": colors[1],
                }
        else:
            raise ValueError("Please give 2 or 3 colors!")

        if isinstance(cell_range, list) or isinstance(cell_range, tuple):
            cell_range = self.to_cell_range_text(*cell_range)

        worksheet.conditional_format(cell_range, f)
        return 0

    def set_data_bar(self, worksheet, cell_range, bar_color = "#63C384"):
        """ Set Data Bar in a Worksheet for a range of cells. """

        f = {'type': 'data_bar',
             'data_bar_2010': True,
             "bar_color": bar_color}

        if isinstance(cell_range, list) or isinstance(cell_range, tuple):
            cell_range = self.to_cell_range_text(*cell_range)

        worksheet.conditional_format(cell_range, f)
        return 0

    def set_cell_format(self, worksheet, cell_range, cformat, cell_condition = None):
        """ Set format for a range of cell. """

        if cell_condition and isinstance(cell_condition, tuple):
            criteria = cell_condition[0].lower()
            value = cell_condition[1]

        if isinstance(cell_range, list) or isinstance(cell_range, tuple):
            cell_range = self.to_cell_range_text(*cell_range)

        if isinstance(cformat, str):
            cformat = self.dict_cell_format[cformat]

        if cell_condition and isinstance(cell_condition, tuple):

            if (criteria == 'between') or (criteria == 'not between'):
                worksheet.conditional_format(cell_range, {"type": "cell", 
                                                          "criteria": criteria, 
                                                          "minimum": value[0],
                                                          "maximum": value[1], 
                                                          "format": cformat})

                return 0

            worksheet.conditional_format(cell_range, {"type": "cell", 
                                                      "criteria": criteria, 
                                                      "value": value, 
                                                      "format": cformat})
            return 0

        worksheet.conditional_format(cell_range, {'type': 'no_errors', "format": cformat}) 
        return 0

    def set_cell_format_rbyr(self, worksheet, start_row, condition_list, condition, ifelse_col, cformat = "YELLOW_BG"):
        """ Conditionally Highlight Cells Row by Row. """

        if_col = ifelse_col[0]
        else_col = ifelse_col[1]

        i = 0
        row_concur = start_row
        while i < len(condition_list):
            target = condition_list[i]

            if val_input_condition(target, condition):
                if if_col:
                    self.set_cell_format(worksheet, 
                                       cell_range=self.colletter_to_textloc(row_concur + 1, if_col), 
                                       cformat=cformat)

            else:
                if else_col:
                    self.set_cell_format(worksheet, 
                                       cell_range=self.colletter_to_textloc(row_concur + 1, else_col), 
                                       cformat=cformat)

            i += 1
            row_concur = row_concur + 1
        return 0

    def remove_tmp_img(self, img_pattern = r".tmp_image_[0-9]+.png"):
        """ Remove Temp Images. """
        tmp_imgs = list_files(location = "./", pattern = img_pattern)
        if len(tmp_imgs) > 0:
            for f in tmp_imgs:
                os.remove(f)
            return 0
        return 

    def close_workbook(self):
        '''
        Finish writing the contents of the workbook and close the file.
        '''
        self.workbook.close()
        self.remove_tmp_img(img_pattern = r".tmp_image_[0-9]+.png")
        logging.info("All temp images have been removed.")

to_cell_range_text ¶

to_cell_range_text(first_row, first_col, last_row, last_col)

To Excel Cell Range.

源代码位于： ExcelMaster/ExcelMaster.py

def to_cell_range_text(self, first_row, first_col, last_row, last_col):
    """ To Excel Cell Range. """
    return xl_range(first_row, first_col, last_row, last_col)

cell_range_to_loc ¶

cell_range_to_loc(cell_range_text)

Convert text cell range expression to a list of values.

源代码位于： ExcelMaster/ExcelMaster.py

def cell_range_to_loc(self, cell_range_text):
    """ Convert text cell range expression to a list of values. """
    cell_range_list = cell_range_text.split(":")
    res = []
    for x in cell_range_list:
        res += list(xl_cell_to_rowcol(x))
    return res

colletter_to_textloc ¶

colletter_to_textloc(row_index, col_letter)

Append Row Index to the given Column Letter-Formatted Index.

源代码位于： ExcelMaster/ExcelMaster.py

def colletter_to_textloc(self, row_index, col_letter):
    """ Append Row Index to the given Column Letter-Formatted Index. """
    col_letter = col_letter if ":" in col_letter else (col_letter + ":" + col_letter)
    output = col_letter.split(":")
    output = ":".join([col + str(row_index) for col in output])
    return output

set_color_scale ¶

set_color_scale(worksheet, cell_range, colors=('#F8696B', '#FFEB84', '#63BE7B'))

Set Color Scale.

源代码位于： ExcelMaster/ExcelMaster.py

def set_color_scale(self, worksheet, cell_range,
                    colors = ("#F8696B", "#FFEB84", "#63BE7B")):
    """ Set Color Scale. """
    if len(colors) == 3:
        f = {
                "type": "3_color_scale",
                "min_color": colors[0],
                "mid_color": colors[1],
                "max_color": colors[2],
            }
    elif len(colors) == 2:
        f = {
                "type": "2_color_scale",
                "min_color": colors[0],
                "max_color": colors[1],
            }
    else:
        raise ValueError("Please give 2 or 3 colors!")

    if isinstance(cell_range, list) or isinstance(cell_range, tuple):
        cell_range = self.to_cell_range_text(*cell_range)

    worksheet.conditional_format(cell_range, f)
    return 0

set_data_bar ¶

set_data_bar(worksheet, cell_range, bar_color='#63C384')

Set Data Bar in a Worksheet for a range of cells.

源代码位于： ExcelMaster/ExcelMaster.py

def set_data_bar(self, worksheet, cell_range, bar_color = "#63C384"):
    """ Set Data Bar in a Worksheet for a range of cells. """

    f = {'type': 'data_bar',
         'data_bar_2010': True,
         "bar_color": bar_color}

    if isinstance(cell_range, list) or isinstance(cell_range, tuple):
        cell_range = self.to_cell_range_text(*cell_range)

    worksheet.conditional_format(cell_range, f)
    return 0

set_cell_format ¶

set_cell_format(worksheet, cell_range, cformat, cell_condition=None)

Set format for a range of cell.

源代码位于： ExcelMaster/ExcelMaster.py

def set_cell_format(self, worksheet, cell_range, cformat, cell_condition = None):
    """ Set format for a range of cell. """

    if cell_condition and isinstance(cell_condition, tuple):
        criteria = cell_condition[0].lower()
        value = cell_condition[1]

    if isinstance(cell_range, list) or isinstance(cell_range, tuple):
        cell_range = self.to_cell_range_text(*cell_range)

    if isinstance(cformat, str):
        cformat = self.dict_cell_format[cformat]

    if cell_condition and isinstance(cell_condition, tuple):

        if (criteria == 'between') or (criteria == 'not between'):
            worksheet.conditional_format(cell_range, {"type": "cell", 
                                                      "criteria": criteria, 
                                                      "minimum": value[0],
                                                      "maximum": value[1], 
                                                      "format": cformat})

            return 0

        worksheet.conditional_format(cell_range, {"type": "cell", 
                                                  "criteria": criteria, 
                                                  "value": value, 
                                                  "format": cformat})
        return 0

    worksheet.conditional_format(cell_range, {'type': 'no_errors', "format": cformat}) 
    return 0

set_cell_format_rbyr ¶

set_cell_format_rbyr(worksheet, start_row, condition_list, condition, ifelse_col, cformat='YELLOW_BG')

Conditionally Highlight Cells Row by Row.

源代码位于： ExcelMaster/ExcelMaster.py

def set_cell_format_rbyr(self, worksheet, start_row, condition_list, condition, ifelse_col, cformat = "YELLOW_BG"):
    """ Conditionally Highlight Cells Row by Row. """

    if_col = ifelse_col[0]
    else_col = ifelse_col[1]

    i = 0
    row_concur = start_row
    while i < len(condition_list):
        target = condition_list[i]

        if val_input_condition(target, condition):
            if if_col:
                self.set_cell_format(worksheet, 
                                   cell_range=self.colletter_to_textloc(row_concur + 1, if_col), 
                                   cformat=cformat)

        else:
            if else_col:
                self.set_cell_format(worksheet, 
                                   cell_range=self.colletter_to_textloc(row_concur + 1, else_col), 
                                   cformat=cformat)

        i += 1
        row_concur = row_concur + 1
    return 0

remove_tmp_img ¶

remove_tmp_img(img_pattern='.tmp_image_[0-9]+.png')

Remove Temp Images.

源代码位于： ExcelMaster/ExcelMaster.py

def remove_tmp_img(self, img_pattern = r".tmp_image_[0-9]+.png"):
    """ Remove Temp Images. """
    tmp_imgs = list_files(location = "./", pattern = img_pattern)
    if len(tmp_imgs) > 0:
        for f in tmp_imgs:
            os.remove(f)
        return 0
    return 

close_workbook ¶

close_workbook()

Finish writing the contents of the workbook and close the file.

源代码位于： ExcelMaster/ExcelMaster.py

def close_workbook(self):
    '''
    Finish writing the contents of the workbook and close the file.
    '''
    self.workbook.close()
    self.remove_tmp_img(img_pattern = r".tmp_image_[0-9]+.png")
    logging.info("All temp images have been removed.")

ExcelMaster ¶

Bases: ExcelWorkbook

Write anything to Excel (Worksheet-level Operator)

源代码位于： ExcelMaster/ExcelMaster.py

class ExcelMaster(ExcelWorkbook):
    """  Write anything to Excel (Worksheet-level Operator) """
    def __init__(self, filepath, verbose, gap_number = 2, init_loc = (0, 0)):
        super().__init__(filepath, verbose)
        self.curr_row = init_loc[0]
        self.curr_col = init_loc[1]
        self.gap_number = gap_number + 1

        self.default_row_height = 20
        self.default_col_width = 64

        self.max_nrows = 1048576
        self.max_ncols = 16384

    def add_worksheet(self, name, hide_grid = True, reset_loc = True, cell_scale = True, auto_fit = False, zoom_perc = 100, tab_color = None):
        """ Add a worksheet. """
        ws = self.workbook.add_worksheet(name)

        if hide_grid:
            ws.hide_gridlines(2)

        if reset_loc:
            self.reset_curr_loc()

        if isinstance(cell_scale, tuple):
            self.set_cell_size(ws, cell_scale)
        if cell_scale is True:
            self.set_cell_size(ws)

        if auto_fit:
            ws.auto_fit()

        if tab_color:
            ws.set_tab_color(tab_color)

        ws.set_zoom(zoom_perc)
        self.ws_dict[name] = ws
        self.engine.sheets[name] = ws
        return ws

    def reset_curr_loc(self, loc = (0, 0)):
        """ Reset Current Location. """

        self.curr_row = loc[0]
        self.curr_col = loc[1]
        return 0

    def _reset_cell_size(self):
        """ Reset Cell Size to Default Value. """
        self.default_row_height = 20
        self.default_col_width = 64
        return 0

    def set_cell_size(self, worksheet, size_scale = (1, 1)):
        """ Set Cell Size in Scale. """

        if isinstance(size_scale, tuple) and len(size_scale) == 2:
            self._reset_cell_size()
            self.default_row_height = self.default_row_height * size_scale[0]
            self.default_col_width =  self.default_col_width * size_scale[1]

        for i in range(0, self.max_nrows):
            worksheet.set_row_pixels(i, height=self.default_row_height)

        worksheet.set_column_pixels(0, self.max_ncols - 1, width=self.default_col_width)
        return 0

    def get_curr_loc(self, toCell = False):
        """ Get Current Location in worksheet. """
        if toCell:
            return xl_rowcol_to_cell(self.curr_row, self.curr_col)
        return (self.curr_row, self.curr_col)

    def set_border_line(self, worksheet, valuerange, border_line = 1):
        """ Set border line for a range of cells. """
        border_fmt = self.workbook.add_format({'bottom': border_line, 'top': border_line, 'left': border_line, 'right': border_line})
        self.set_cell_format(worksheet = worksheet, cell_range = valuerange, cformat = border_fmt)
        return 0

    def merge_col(self, worksheet, loc = None, nrows = 1, ncols = 1, text = "", skipby = 'row', cformat = 'BLUE_H4', retCellRange = None):
        """ Merge columns in a single row. """

        start_row = loc[0] if loc else self.curr_row
        start_col = loc[1] if loc else self.curr_col
        written_range = [start_row, start_col, start_row + nrows - 1, start_col + ncols - 1]

        worksheet.merge_range(*written_range, text, self.dict_cell_format[cformat])

        if self.verbose:
            logging.info(f"Merged Cells: {self.to_cell_range_text(*written_range)}")

        # Skipped by Rows/Columns
        if skipby == 'row':
            self.curr_row = (start_row + nrows + self.gap_number)
        if skipby == 'col':
            self.curr_col = (start_col + ncols + self.gap_number)

        # Return written location by Cell Text/Value Range
        if retCellRange == "text":
            return self.to_cell_range_text(*written_range)
        if retCellRange == "value":
            return written_range

        return 0

    def write_dataframe(self, worksheet, df, loc = None, title = None, index = False, header = True, skipby = 'row', titleformat = "BLUE_H4", headerformat = "TABLE_HEADER", valueformat="----", retCellRange = None):
        """ Write a dataframe to excel file. """

        start_row = loc[0] if loc else self.curr_row
        start_col = loc[1] if loc else self.curr_col

        ncols = df.shape[1] 
        nrows = df.shape[0]

        ## Get Number of Index Columns
        index_ncols = 0
        if index:
            index_ncols = len(df.index.names)
            ncols += index_ncols

        ## Get Number of Header Rows
        header_nrows = 0
        if header:
            header_nrows = len(df.columns.names)
            nrows += header_nrows

        ## Get Number of Title Rows
        title_nrows = 0
        if title:
            title_nrows = 1
            nrows += title_nrows

        ## Get Header Range (Include Index)
        if header:
            header_start_row = (start_row + title_nrows)
            header_start_col = start_col
            header_end_row = max((start_row + header_nrows - 1), header_start_row)
            header_end_col = (start_col + ncols - 1)
            header_range = self.to_cell_range_text(header_start_row, header_start_col, 
                                                   header_end_row, header_end_col)

        ## Write DataFrame
        if title:
            self.merge_col(worksheet = worksheet, loc = loc, ncols = ncols, cformat=titleformat, 
                           text = title, skipby = None)
            df.to_excel(self.engine, sheet_name = worksheet.name, startrow = start_row + title_nrows, 
                        startcol = start_col, header = header, index = index)
        else:
            df.to_excel(self.engine, sheet_name = worksheet.name, startrow = start_row, 
                        startcol = start_col, header = header, index = index)


        ## Get Value Range (Include Index)
        value_start_row = (start_row + title_nrows + header_nrows)
        value_start_col = start_col
        value_end_row = (start_row + nrows - 1)
        value_end_col = (start_col + ncols - 1)
        value_range = [value_start_row, value_start_col, value_end_row, value_end_col]

        ## Set Format
        self.set_cell_format(worksheet = worksheet, cell_range = value_range, cformat = valueformat)
        if header:
            self.set_cell_format(worksheet = worksheet, cell_range = header_range, cformat = headerformat)

        written_range = [start_row, start_col, (start_row + nrows - 1), (start_col + ncols - 1)]

        if self.verbose:
            logging.info(f"Table Written in Cell Range: {self.to_cell_range_text(*written_range)}")

        if skipby == 'row':
            self.curr_row = (start_row + nrows + self.gap_number)
        if skipby == 'col':
            self.curr_col = (start_col + ncols + self.gap_number)

        if retCellRange == "text":
            return self.to_cell_range_text(*written_range)
        if retCellRange == "value":
            return written_range

        return 0

    def _get_image_size(self, figPath, figScale = (1, 1), retSizeInCell = True):
        """ Get Image Size in Excel Cells. """
        img = Image.open(figPath)
        w, h = img.size
        (width, height) = (img.width * figScale[0], img.height * figScale[1])
        h_in_cell = np.floor(height/self.default_row_height)
        w_in_cell = np.ceil(width/self.default_col_width)
        img.close()
        if retSizeInCell:
            return (w_in_cell, h_in_cell)
        return (width, height)

    def _resize_image(self, imgPath, resize, outPath, size_in_cell = True):
        """ Resize image. """
        # Open the image
        image = Image.open(imgPath)
        new_image = image.resize((resize[0], resize[1]))
        if size_in_cell:
            # Resize the image
            new_image = image.resize((resize[1] * self.default_col_width, resize[0] * self.default_row_height))
        # Save the resized image
        new_image.save(outPath)
        image.close()
        return 0

    def insert_image(self, worksheet, figPath, figScale = (1, 1), loc = None, skipby = 'row', retCellRange = None):
        """ Insert an image to the sheet. """
        start_row = loc[0] if loc else self.curr_row
        start_col = loc[1] if loc else self.curr_col

        start_cell = xl_rowcol_to_cell(start_row, start_col)
        figsize_in_cells = self._get_image_size(figPath = figPath, figScale = figScale, retSizeInCell = True)

        worksheet.insert_image(start_cell, figPath, {"x_scale": figScale[0], "y_scale": figScale[1]})

        # Skipped by Rows/Columns
        if skipby == 'row':
            self.curr_row = (start_row + int(figsize_in_cells[1]) + self.gap_number)
        if skipby == 'col':
            self.curr_col = (start_col + int(figsize_in_cells[0]) + self.gap_number)

        written_range = [start_row, start_col, 
                         start_row + int(figsize_in_cells[1]), 
                         start_col + int(figsize_in_cells[0])]

        # Return written location by Cell Text/Value Range
        if retCellRange == "text":
            return self.to_cell_range_text(*written_range)
        if retCellRange == "value":
            return written_range

        if self.verbose:
            logging.info(f"Image Written in Cell Range: {self.to_cell_range_text(*written_range)}")

        return 0

    def __add_chart_data_tab(self, tabname, hide = True, max_num = 99999):
        """ Add a temp tab for chart data. """
        i = 0
        while i <= max_num:
            tabname_i = (tabname + str(i))
            if tabname_i not in self.ws_dict:
                ws = self.add_worksheet(tabname_i, reset_loc = False, 
                                        cell_scale = None, 
                                        auto_fit = False, 
                                        zoom_perc = 100)
                ws.hide()
                return ws
            i += 1
        return -1

    def _transpose_df_for_chart(self, df, y_list, x = None):
        """ Transpose Dataframe for Chart Structrue. """

        _df = df.copy()
        _df.columns.name = None
        _df.index.name = None

        if x is None:
            _df = _df[y_list].T
            _df = _df.reset_index(drop=False)
        else:
            x = x if isinstance(x, list) else [x]
            _df = tanspose_dataframe(_df[[*x, *y_list]], x)

        _df.columns.name = None
        _df.index.name = None
        return _df

    def __convert_to_none_tuple(self, x = None):
        """ Convert None Type to Tuple of Nones. """
        if x is None:
            return (None, None)
        return x

    def __validate_input_chart_obj(self, chart_type, input_chart):
        """ Validate if the given append_to_chart object is aligned with chart_type argument. """

        if chart_type == "line":
            return isinstance(input_chart, xlsxwriter.chart_line.ChartLine)
        if chart_type in ["column", "stacked_column"]:
            return isinstance(input_chart, xlsxwriter.chart_column.ChartColumn)
        if chart_type == "pie":
            return isinstance(input_chart, xlsxwriter.chart_pie.ChartPie)

    def write_chart(self, worksheet, df, y_list, x = None, title = "", 
                    chart_size = (30, 13), chart_type = "line",
                    y_axis_range = (None, None), y_num_format = None,
                    y2_axis = False, loc = None, retChart = False, 
                    skipby = "row", outputData = False, retCellRange = None,
                    xy_axes_name = ("", ""), major_gridlines = False,
                    legend = "bottom", line_marker = "circle", line_type = "solid", 
                    chart_style = None, append_to_chart = None): 
        """ Write line chart to Excel worksheet."""

        if isinstance(chart_type, str):
            if chart_type == "line":
                chart_type = {'type': 'line'}
            elif chart_type == "column":
                chart_type = {'type': 'column'}
            elif chart_type == "stacked_column":
                chart_type = {'type': 'column', "subtype": "stacked"}
            elif chart_type == "pie":
                chart_type = {"type": "pie"}
            else:
                raise ValueError(" Please select one from 'line', 'column', 'stacked_column' or 'pie'. ")

        start_row = loc[0] if loc else self.curr_row
        start_col = loc[1] if loc else self.curr_col

        y_axis_range = self.__convert_to_none_tuple(y_axis_range)

        df_t = self._transpose_df_for_chart(df = df, y_list = y_list, x = x)
        if x is None:
            x = list(df.index.names)
        if isinstance(x, str):
            x = [x]
        y = "index" if len(x) == 1 else tuple(["index"] + [''] * (len(x) - 1))
        x_list = [x for x in df_t.columns if x != y]

        data_nrows = df_t.shape[0]
        data_ncols = df_t.shape[1]

        raw_data_ws = self.__add_chart_data_tab("__CHRT_DATA_")

        ## if y not given, then use index value as y series.
        data_index_as_y = True
        if y is None:
            sel_df = df_t[[*x_list]]
        else:
            sel_df = df_t[[y, *x_list]]
            sel_df = sel_df.set_index(y)
            sel_df.index.name = None

        ## if output chart data to current worksheet.
        if outputData:
            raw_data_ws = worksheet
            df_range = self.write_dataframe(worksheet = raw_data_ws, df = sel_df, index = data_index_as_y, title = None, skipby=None, retCellRange="value", loc = (start_row, start_col))
            chart_loc = (df_range[2] + self.gap_number, df_range[1])
        else:
            df_range = self.write_dataframe(worksheet = raw_data_ws, df = sel_df, index = data_index_as_y, title = None, skipby=None, retCellRange="value", loc = (0,0))
            chart_loc = (start_row, start_col)

        x_count = len(x_list)
        nrows = df_t.shape[0]

        data_row_anchor = df_range[0]
        data_col_anchor = df_range[1]
        row_shift = 0

        category_range = [df_range[0], df_range[1] + 1, 
                          df_range[0] + (len(x) - 1), df_range[3]]

        if self.verbose:
            logging.info(f"Category Cell Range: {self.to_cell_range_text(*category_range)}")

        chart = self.workbook.add_chart(chart_type)
        if append_to_chart is not None:
            if self.__validate_input_chart_obj(chart_type = chart_type["type"], input_chart = append_to_chart):
                chart = append_to_chart
            else:
                warnings.warn("WARNING: Can only append data to the chart that has the same chart type as your given one.")

        value_start = len(x) + 1 if len(x) > 1 else len(x)
        for row_shift in range(value_start, value_start + nrows, 1):

            value_range = [data_row_anchor + row_shift, data_col_anchor + 1, 
                           data_row_anchor + row_shift, data_col_anchor + x_count]

            chart.add_series({
                'name':        [raw_data_ws.name, (data_row_anchor + row_shift), data_col_anchor],
                'categories':  [raw_data_ws.name, *category_range],
                'values':      [raw_data_ws.name, *value_range],
                'marker':      {'type': line_marker},
                "line" :       {'dash_type': line_type},
                'data_labels': {'percentage': True} if chart_type['type'] == 'pie' else None,
                'y2_axis':     y2_axis,
            })
            if self.verbose:
                logging.info("Values Cell Range: %s", self.to_cell_range_text(*value_range))

        chart.set_title({'name': title})
        chart.set_legend({'position': str(legend)})
        chart.set_size({"height": chart_size[0] * self.default_row_height, # by row 15
                        "width": chart_size[1] * self.default_col_width}) # by col 8.43

        chart.set_x_axis({"name": xy_axes_name[0]})

        set_y_axis_dict = {"name": xy_axes_name[1], 
                           "major_gridlines": {"visible": int(major_gridlines)},
                           'min': y_axis_range[0], 
                           'max': y_axis_range[1],
                           'num_format': y_num_format}
        if y2_axis:
            chart.set_y2_axis(set_y_axis_dict)
        else:
            chart.set_y_axis(set_y_axis_dict)

        # Setting Chart Style
        if chart_style:
            chart.set_style(chart_style)

        # Need to Return Chart before Insert it to the worksheet.
        if retChart:
            return chart

        worksheet.insert_chart(
            chart_loc[0], 
            chart_loc[1], 
            chart
        )

        written_range = [start_row, start_col, int(chart_loc[0] + chart_size[0]), int(chart_loc[1] + chart_size[1])]
        if outputData:

            written_range = [start_row, start_col, 
                             (start_row + data_nrows + self.gap_number + chart_size[0]),
                             max(df_range[3], start_col + chart_size[1])]

        if skipby == 'row':
            if outputData:
                self.curr_row = (start_row + data_nrows + self.gap_number + chart_size[0] + self.gap_number - 1)
            else:
                self.curr_row = (start_row + chart_size[0] + self.gap_number - 1)
        if skipby == 'col':
            self.curr_col = (start_col + int(chart_size[1]) + self.gap_number)

        if self.verbose:
            logging.info(f"Chart Written in Cell Range: {self.to_cell_range_text(*written_range)}")

        if retCellRange == "text":
            return self.to_cell_range_text(*written_range)
        if retCellRange == "value":
            return written_range

        return 0

    def write_combined_chart(self, worksheet, chart1, chart2, 
                             loc = None, chart_size = (30, 13), 
                             skipby = "row", retCellRange = None):
        """ Combined two chart objects and then write to Excel worksheet. """

        start_row = loc[0] if loc else self.curr_row
        start_col = loc[1] if loc else self.curr_col

        chart1.combine(chart2)
        worksheet.insert_chart(xl_rowcol_to_cell(start_row, start_col), chart1)

        written_range = [start_row, start_col, (start_row + chart_size[0]), (start_col + chart_size[1])]

        if skipby == 'row':
            self.curr_row = (start_row + chart_size[0] + self.gap_number - 1)
        if skipby == 'col':
            self.curr_col = (start_col + chart_size[1] + self.gap_number)

        if self.verbose:
            logging.info(f"Chart Written in Cell Range: {self.to_cell_range_text(*written_range)}")

        if retCellRange == "text":
            return self.to_cell_range_text(*written_range)
        if retCellRange == "value":
            return written_range

        return 0


    def write_duo_chart(self, worksheet, df,
                        y1_list, y2_list = None, x = None,
                        c1_type = "column", c2_type = "line", 
                        y1_axis_range = (0, 1), y2_axis_range = None,
                        y1_num_format = None, y2_num_format = None,
                        y1_line_marker = "circle", y2_line_marker = "circle",
                        y1_line_type = "solid", y2_line_type = "solid",
                        loc = None, title = "", chart_size = (30, 13),
                        xy_axes_name = ("", ""), major_gridlines = False,
                        retChart = False, retCellRange = None,
                        skipby = "row"):
        """ Write duo-chart sharing the same x axis. """

        start_row = loc[0] if loc else self.curr_row
        start_col = loc[1] if loc else self.curr_col

        y2_axis = True
        if y2_list is None:
            y2_axis = False

        if y2_axis_range is None:
            y2_axis_range = y1_axis_range

        xy_name = (xy_axes_name[0], xy_axes_name[1])
        xy2_name = (xy_axes_name[0], xy_axes_name[1])
        if y2_axis and len(xy_axes_name) >= 3:
            xy_name = (xy_axes_name[0], xy_axes_name[1])
            xy2_name = (xy_axes_name[0], xy_axes_name[2])
        if y2_axis and len(xy_axes_name) < 3:
            xy_name = (xy_axes_name[0], xy_axes_name[1])
            xy2_name = (xy_axes_name[0], "")

        chart1 = self.write_chart(df = df, 
                                  x = x, 
                                  y_list = y1_list, 
                                  worksheet = worksheet, 
                                  title = title,
                                  chart_type = c1_type, 
                                  chart_size = chart_size,
                                  y_axis_range = y1_axis_range,
                                  xy_axes_name = xy_name,
                                  major_gridlines = major_gridlines,
                                  y_num_format = y1_num_format,
                                  line_type = y1_line_type,
                                  line_marker = y1_line_marker,
                                  retChart = True)

        chart2 = self.write_chart(df = df, 
                                  x = x,
                                  y_list = y2_list, 
                                  worksheet=worksheet, 
                                  title = title, 
                                  chart_type = c2_type, 
                                  chart_size = chart_size,
                                  y_axis_range = y2_axis_range,
                                  y2_axis = y2_axis,
                                  xy_axes_name = xy2_name,
                                  major_gridlines = False,
                                  y_num_format = y2_num_format,
                                  line_type = y2_line_type,
                                  line_marker = y2_line_marker,
                                  retChart=True)

        # Need to Return Chart before Insert it to the worksheet.
        if retChart:
            return (chart1, chart2)

        cell_range = self.write_combined_chart(worksheet, 
                                  chart1, chart2, 
                                  loc = loc, chart_size = chart_size, 
                                  skipby = skipby, retCellRange = retCellRange)

        return cell_range

    def write_text_by_dict(self, worksheet, dict_cells):
        """ Write text using Python dictionary. """
        for cell in dict_cells:
            list_contents = dict_cells[cell]

            if ':' in cell:
                text, cell_format = list_contents
                worksheet.merge_range(cell, text, self.dict_cell_format[cell_format])

            else:
                if len(list_contents) == 1:
                    list_items = []

                    # Text formats begin with '~~~'
                    list_mixed_items = list_contents[0]
                    for item in list_mixed_items:
                        if item.startswith('~~~'):
                            cell_format = item.split('~~~')[1]
                            item = self.dict_cell_format[cell_format]

                            list_items.append(item)
                        else:
                            list_items.append(item)

                    worksheet.write_rich_string(cell, *list_items)

                else:
                    text, cell_format = list_contents
                    worksheet.write(cell, text, self.dict_cell_format[cell_format])

    def __split_line_by_format_sign(self, line):
        """ To split line by specified format sign '{}'. """

        ## find all format sign '{}'
        format_sign = re.findall(".*?{(.*?)}.*?", line)
        format_sign = format_sign if len(format_sign) != 0 else ['']
        format_sign = [x.lstrip().rstrip() for x in format_sign]

        ## find all cell formats
        cell_format_sign = re.findall(".*?\[\[(.*?)\]\]$", line.strip().replace("\n", ""))
        cell_format_sign = [x.lstrip().rstrip() for x in cell_format_sign]

        ## clean up text by removing cell format express
        if len(cell_format_sign) > 0:
            line = line.split("[[")[0]

        ## find text before the first format sign appeared.
        text_bf_curly = re.findall(r"(.*?)\{", line)
        text_bf_curly = [text_bf_curly[0]] if len(text_bf_curly) != 0 else ['']
        text_bf_curly = [x.strip("\n") for x in text_bf_curly]

        ## find text after the first format sign appeared.
        text_af_curly = re.findall(r"\}\s*(.*?)(?=\s*\{|$)", line) if re.search(".*?{(.*?)}.*?", line) else [line]
        text_af_curly = [x.strip("\n") for x in text_af_curly]

        return text_bf_curly, format_sign, text_af_curly, cell_format_sign

    def __parse_line_by_format_sign(self, worksheet, loc, line):
        """ Parse text by format sign. """

        text_bf_curly, format_sign, text_af_curly, cell_format_sign = self.__split_line_by_format_sign(line)

        start_text = text_bf_curly[0]
        # if len(start_text) > 0 or len(format_sign) >= 2:
        if len(format_sign) >= 2:
            # if one line has more than 2 formats specified.

            res = [loc, start_text] if start_text != '' else [loc]
            assert len(format_sign) == len(text_af_curly)

            for fmt, s2 in zip(format_sign, text_af_curly):
                res.append(self.dict_cell_format[fmt] if fmt else self.dict_cell_format[""])
                res.append(s2.strip('\n')) # remove enter sign

            # Write line by line
            if self.verbose:
                logging.info(res)

            worksheet.write_rich_string(*res)

        if len(format_sign) < 2:
            # if one line has more than 2 formats specified.

            res = [loc, start_text] if start_text != '' else [loc]
            assert len(format_sign) == len(text_af_curly)

            fmt = format_sign[0]
            text = text_af_curly[0].strip("\n")
            res.append(text)
            res.append(self.dict_cell_format[fmt])

            # Write line by line
            if self.verbose:
                logging.info(res)

            worksheet.write_string(*res)

        if len(cell_format_sign) > 0:
            self.set_cell_format(worksheet = worksheet, cell_range = loc, cformat = cell_format_sign[0])
        return 0

    def write_text_content(self, worksheet, input_text = None, txt_path = None, loc = None, retCellRange = None):
        """ Write text content line by line to the worksheet. """

        row_anchor = loc[0] if loc else self.curr_row
        col_anchor = loc[1] if loc else self.curr_col

        start_row = row_anchor
        start_col = col_anchor

        if (input_text) and (txt_path):
            raise ValueError("Please give either input_text or txt_path, not both !!!")

        if (input_text is None) and (txt_path is None):
            raise ValueError("Please specify either input_text or txt_path.")

        if txt_path is not None:
            file_notes = open(txt_path, 'r')
            textlines = file_notes.readlines()

        if input_text is not None:
            textlines = input_text.split("\n")

        for line in textlines:
            # Write text line by line
            loc = xl_rowcol_to_cell(row_anchor, col_anchor)

            if " [>] " in line:
                line_split = line.split(" [>] ")
                col_shift = 0
                for sub_line in line_split:
                    sub_line = sub_line.rstrip()
                    self.__parse_line_by_format_sign(worksheet = worksheet, loc = loc, line = sub_line)
                    col_shift += 1
                    loc = xl_rowcol_to_cell(row_anchor, col_anchor + col_shift)
            else:
                self.__parse_line_by_format_sign(worksheet = worksheet, loc = loc, line = line)

            row_anchor += 1

        self.curr_row = (row_anchor - 1)

        end_row = row_anchor
        end_col = col_anchor

        written_range = [start_row, start_col, end_row, end_col]

        if self.verbose:
            logging.info(f"Text Written in Cell Range: {self.to_cell_range_text(*written_range)}")

        if retCellRange == "text":
            return self.to_cell_range_text(*written_range)
        if retCellRange == "value":
            return written_range

        return 0

    @staticmethod
    def plot_boxplot(df, x, y, y_percentage = False, colored_box = True, color_grp = (10, 1), title = "", fontsize = 14, figsize = (8, 6), show_fig = True, img_path = None, transp_bg = False):
        """ Plot Box Plot Chart using matplotlib. """

        plt.style.use('default')

        fig, ax = plt.subplots(1, 1, figsize = figsize, dpi=200)

        box_plot_data = convert_to_boxplot_data(df, x, y, True)

        bplot = ax.boxplot(box_plot_data.values(), patch_artist=colored_box, widths = 0.4)
        ax.set_xticklabels(box_plot_data.keys(), fontsize = fontsize)
        ax.set_ylabel(string_proc(y), color='black', fontsize = fontsize)
        ax.set_xlabel(string_proc(x), color='black', fontsize = fontsize)
        ax.tick_params(axis='both', labelsize=fontsize)
        ax.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)
        ax.set_title(title, fontsize=fontsize + 4)

        if colored_box:
            colors = color_input_validation(color_grp, len(box_plot_data.keys()))
            for patch, color in zip(bplot['boxes'], colors):
                patch.set_facecolor(color)
                patch.set_alpha(.7)

        if y_percentage:
            plt.gca().yaxis.set_major_formatter(PercentFormatter(100)) 
        plt.tight_layout()

        if show_fig:
            plt.show()

        if img_path:
            fig.savefig(img_path, transparent = transp_bg, dpi=200)

        plt.close()
        return fig, ax

    def write_boxplot(self, ws, df, x, y, y_percentage = False, colored_box = True, color_grp = (10, 1), title = "", 
                      fontsize = 14, figsize = (30, 13), show_fig = False, img_path = None, transp_bg = False,
                      loc = None, skipby = "row", retCellRange = None):
        """ Write boxplot to Worksheet. """

        currTime = getCurrentDateTime()
        rn = str(random.randrange(0, 1000))
        rn_num = currTime + rn
        tmp_image_path = f"./.tmp_image_{rn_num}.png"

        ExcelMaster.plot_boxplot(df = df, 
                                 x = x, y = y, 
                                 y_percentage = y_percentage, 
                                 colored_box = colored_box, 
                                 color_grp = color_grp, 
                                 title = title, 
                                 fontsize = fontsize, 
                                 show_fig = show_fig, 
                                 transp_bg = transp_bg,
                                 img_path = tmp_image_path)

        self._resize_image(tmp_image_path, figsize, tmp_image_path)
        ret_range = self.insert_image(ws, figPath=tmp_image_path, figScale=(1, 1), loc = loc, skipby = skipby, retCellRange = retCellRange)

        return ret_range

add_worksheet ¶

add_worksheet(name, hide_grid=True, reset_loc=True, cell_scale=True, auto_fit=False, zoom_perc=100, tab_color=None)

Add a worksheet.

源代码位于： ExcelMaster/ExcelMaster.py

def add_worksheet(self, name, hide_grid = True, reset_loc = True, cell_scale = True, auto_fit = False, zoom_perc = 100, tab_color = None):
    """ Add a worksheet. """
    ws = self.workbook.add_worksheet(name)

    if hide_grid:
        ws.hide_gridlines(2)

    if reset_loc:
        self.reset_curr_loc()

    if isinstance(cell_scale, tuple):
        self.set_cell_size(ws, cell_scale)
    if cell_scale is True:
        self.set_cell_size(ws)

    if auto_fit:
        ws.auto_fit()

    if tab_color:
        ws.set_tab_color(tab_color)

    ws.set_zoom(zoom_perc)
    self.ws_dict[name] = ws
    self.engine.sheets[name] = ws
    return ws

reset_curr_loc ¶

reset_curr_loc(loc=(0, 0))

Reset Current Location.

源代码位于： ExcelMaster/ExcelMaster.py

def reset_curr_loc(self, loc = (0, 0)):
    """ Reset Current Location. """

    self.curr_row = loc[0]
    self.curr_col = loc[1]
    return 0

set_cell_size ¶

set_cell_size(worksheet, size_scale=(1, 1))

Set Cell Size in Scale.

源代码位于： ExcelMaster/ExcelMaster.py

def set_cell_size(self, worksheet, size_scale = (1, 1)):
    """ Set Cell Size in Scale. """

    if isinstance(size_scale, tuple) and len(size_scale) == 2:
        self._reset_cell_size()
        self.default_row_height = self.default_row_height * size_scale[0]
        self.default_col_width =  self.default_col_width * size_scale[1]

    for i in range(0, self.max_nrows):
        worksheet.set_row_pixels(i, height=self.default_row_height)

    worksheet.set_column_pixels(0, self.max_ncols - 1, width=self.default_col_width)
    return 0

get_curr_loc ¶

get_curr_loc(toCell=False)

Get Current Location in worksheet.

源代码位于： ExcelMaster/ExcelMaster.py

def get_curr_loc(self, toCell = False):
    """ Get Current Location in worksheet. """
    if toCell:
        return xl_rowcol_to_cell(self.curr_row, self.curr_col)
    return (self.curr_row, self.curr_col)

set_border_line ¶

set_border_line(worksheet, valuerange, border_line=1)

Set border line for a range of cells.

源代码位于： ExcelMaster/ExcelMaster.py

def set_border_line(self, worksheet, valuerange, border_line = 1):
    """ Set border line for a range of cells. """
    border_fmt = self.workbook.add_format({'bottom': border_line, 'top': border_line, 'left': border_line, 'right': border_line})
    self.set_cell_format(worksheet = worksheet, cell_range = valuerange, cformat = border_fmt)
    return 0

merge_col ¶

merge_col(worksheet, loc=None, nrows=1, ncols=1, text='', skipby='row', cformat='BLUE_H4', retCellRange=None)

Merge columns in a single row.

源代码位于： ExcelMaster/ExcelMaster.py

def merge_col(self, worksheet, loc = None, nrows = 1, ncols = 1, text = "", skipby = 'row', cformat = 'BLUE_H4', retCellRange = None):
    """ Merge columns in a single row. """

    start_row = loc[0] if loc else self.curr_row
    start_col = loc[1] if loc else self.curr_col
    written_range = [start_row, start_col, start_row + nrows - 1, start_col + ncols - 1]

    worksheet.merge_range(*written_range, text, self.dict_cell_format[cformat])

    if self.verbose:
        logging.info(f"Merged Cells: {self.to_cell_range_text(*written_range)}")

    # Skipped by Rows/Columns
    if skipby == 'row':
        self.curr_row = (start_row + nrows + self.gap_number)
    if skipby == 'col':
        self.curr_col = (start_col + ncols + self.gap_number)

    # Return written location by Cell Text/Value Range
    if retCellRange == "text":
        return self.to_cell_range_text(*written_range)
    if retCellRange == "value":
        return written_range

    return 0

write_dataframe ¶

write_dataframe(worksheet, df, loc=None, title=None, index=False, header=True, skipby='row', titleformat='BLUE_H4', headerformat='TABLE_HEADER', valueformat='----', retCellRange=None)

Write a dataframe to excel file.

源代码位于： ExcelMaster/ExcelMaster.py

def write_dataframe(self, worksheet, df, loc = None, title = None, index = False, header = True, skipby = 'row', titleformat = "BLUE_H4", headerformat = "TABLE_HEADER", valueformat="----", retCellRange = None):
    """ Write a dataframe to excel file. """

    start_row = loc[0] if loc else self.curr_row
    start_col = loc[1] if loc else self.curr_col

    ncols = df.shape[1] 
    nrows = df.shape[0]

    ## Get Number of Index Columns
    index_ncols = 0
    if index:
        index_ncols = len(df.index.names)
        ncols += index_ncols

    ## Get Number of Header Rows
    header_nrows = 0
    if header:
        header_nrows = len(df.columns.names)
        nrows += header_nrows

    ## Get Number of Title Rows
    title_nrows = 0
    if title:
        title_nrows = 1
        nrows += title_nrows

    ## Get Header Range (Include Index)
    if header:
        header_start_row = (start_row + title_nrows)
        header_start_col = start_col
        header_end_row = max((start_row + header_nrows - 1), header_start_row)
        header_end_col = (start_col + ncols - 1)
        header_range = self.to_cell_range_text(header_start_row, header_start_col, 
                                               header_end_row, header_end_col)

    ## Write DataFrame
    if title:
        self.merge_col(worksheet = worksheet, loc = loc, ncols = ncols, cformat=titleformat, 
                       text = title, skipby = None)
        df.to_excel(self.engine, sheet_name = worksheet.name, startrow = start_row + title_nrows, 
                    startcol = start_col, header = header, index = index)
    else:
        df.to_excel(self.engine, sheet_name = worksheet.name, startrow = start_row, 
                    startcol = start_col, header = header, index = index)


    ## Get Value Range (Include Index)
    value_start_row = (start_row + title_nrows + header_nrows)
    value_start_col = start_col
    value_end_row = (start_row + nrows - 1)
    value_end_col = (start_col + ncols - 1)
    value_range = [value_start_row, value_start_col, value_end_row, value_end_col]

    ## Set Format
    self.set_cell_format(worksheet = worksheet, cell_range = value_range, cformat = valueformat)
    if header:
        self.set_cell_format(worksheet = worksheet, cell_range = header_range, cformat = headerformat)

    written_range = [start_row, start_col, (start_row + nrows - 1), (start_col + ncols - 1)]

    if self.verbose:
        logging.info(f"Table Written in Cell Range: {self.to_cell_range_text(*written_range)}")

    if skipby == 'row':
        self.curr_row = (start_row + nrows + self.gap_number)
    if skipby == 'col':
        self.curr_col = (start_col + ncols + self.gap_number)

    if retCellRange == "text":
        return self.to_cell_range_text(*written_range)
    if retCellRange == "value":
        return written_range

    return 0

insert_image ¶

insert_image(worksheet, figPath, figScale=(1, 1), loc=None, skipby='row', retCellRange=None)

Insert an image to the sheet.

源代码位于： ExcelMaster/ExcelMaster.py

def insert_image(self, worksheet, figPath, figScale = (1, 1), loc = None, skipby = 'row', retCellRange = None):
    """ Insert an image to the sheet. """
    start_row = loc[0] if loc else self.curr_row
    start_col = loc[1] if loc else self.curr_col

    start_cell = xl_rowcol_to_cell(start_row, start_col)
    figsize_in_cells = self._get_image_size(figPath = figPath, figScale = figScale, retSizeInCell = True)

    worksheet.insert_image(start_cell, figPath, {"x_scale": figScale[0], "y_scale": figScale[1]})

    # Skipped by Rows/Columns
    if skipby == 'row':
        self.curr_row = (start_row + int(figsize_in_cells[1]) + self.gap_number)
    if skipby == 'col':
        self.curr_col = (start_col + int(figsize_in_cells[0]) + self.gap_number)

    written_range = [start_row, start_col, 
                     start_row + int(figsize_in_cells[1]), 
                     start_col + int(figsize_in_cells[0])]

    # Return written location by Cell Text/Value Range
    if retCellRange == "text":
        return self.to_cell_range_text(*written_range)
    if retCellRange == "value":
        return written_range

    if self.verbose:
        logging.info(f"Image Written in Cell Range: {self.to_cell_range_text(*written_range)}")

    return 0

write_chart ¶

write_chart(worksheet, df, y_list, x=None, title='', chart_size=(30, 13), chart_type='line', y_axis_range=(None, None), y_num_format=None, y2_axis=False, loc=None, retChart=False, skipby='row', outputData=False, retCellRange=None, xy_axes_name=('', ''), major_gridlines=False, legend='bottom', line_marker='circle', line_type='solid', chart_style=None, append_to_chart=None)

Write line chart to Excel worksheet.

源代码位于： ExcelMaster/ExcelMaster.py

def write_chart(self, worksheet, df, y_list, x = None, title = "", 
                chart_size = (30, 13), chart_type = "line",
                y_axis_range = (None, None), y_num_format = None,
                y2_axis = False, loc = None, retChart = False, 
                skipby = "row", outputData = False, retCellRange = None,
                xy_axes_name = ("", ""), major_gridlines = False,
                legend = "bottom", line_marker = "circle", line_type = "solid", 
                chart_style = None, append_to_chart = None): 
    """ Write line chart to Excel worksheet."""

    if isinstance(chart_type, str):
        if chart_type == "line":
            chart_type = {'type': 'line'}
        elif chart_type == "column":
            chart_type = {'type': 'column'}
        elif chart_type == "stacked_column":
            chart_type = {'type': 'column', "subtype": "stacked"}
        elif chart_type == "pie":
            chart_type = {"type": "pie"}
        else:
            raise ValueError(" Please select one from 'line', 'column', 'stacked_column' or 'pie'. ")

    start_row = loc[0] if loc else self.curr_row
    start_col = loc[1] if loc else self.curr_col

    y_axis_range = self.__convert_to_none_tuple(y_axis_range)

    df_t = self._transpose_df_for_chart(df = df, y_list = y_list, x = x)
    if x is None:
        x = list(df.index.names)
    if isinstance(x, str):
        x = [x]
    y = "index" if len(x) == 1 else tuple(["index"] + [''] * (len(x) - 1))
    x_list = [x for x in df_t.columns if x != y]

    data_nrows = df_t.shape[0]
    data_ncols = df_t.shape[1]

    raw_data_ws = self.__add_chart_data_tab("__CHRT_DATA_")

    ## if y not given, then use index value as y series.
    data_index_as_y = True
    if y is None:
        sel_df = df_t[[*x_list]]
    else:
        sel_df = df_t[[y, *x_list]]
        sel_df = sel_df.set_index(y)
        sel_df.index.name = None

    ## if output chart data to current worksheet.
    if outputData:
        raw_data_ws = worksheet
        df_range = self.write_dataframe(worksheet = raw_data_ws, df = sel_df, index = data_index_as_y, title = None, skipby=None, retCellRange="value", loc = (start_row, start_col))
        chart_loc = (df_range[2] + self.gap_number, df_range[1])
    else:
        df_range = self.write_dataframe(worksheet = raw_data_ws, df = sel_df, index = data_index_as_y, title = None, skipby=None, retCellRange="value", loc = (0,0))
        chart_loc = (start_row, start_col)

    x_count = len(x_list)
    nrows = df_t.shape[0]

    data_row_anchor = df_range[0]
    data_col_anchor = df_range[1]
    row_shift = 0

    category_range = [df_range[0], df_range[1] + 1, 
                      df_range[0] + (len(x) - 1), df_range[3]]

    if self.verbose:
        logging.info(f"Category Cell Range: {self.to_cell_range_text(*category_range)}")

    chart = self.workbook.add_chart(chart_type)
    if append_to_chart is not None:
        if self.__validate_input_chart_obj(chart_type = chart_type["type"], input_chart = append_to_chart):
            chart = append_to_chart
        else:
            warnings.warn("WARNING: Can only append data to the chart that has the same chart type as your given one.")

    value_start = len(x) + 1 if len(x) > 1 else len(x)
    for row_shift in range(value_start, value_start + nrows, 1):

        value_range = [data_row_anchor + row_shift, data_col_anchor + 1, 
                       data_row_anchor + row_shift, data_col_anchor + x_count]

        chart.add_series({
            'name':        [raw_data_ws.name, (data_row_anchor + row_shift), data_col_anchor],
            'categories':  [raw_data_ws.name, *category_range],
            'values':      [raw_data_ws.name, *value_range],
            'marker':      {'type': line_marker},
            "line" :       {'dash_type': line_type},
            'data_labels': {'percentage': True} if chart_type['type'] == 'pie' else None,
            'y2_axis':     y2_axis,
        })
        if self.verbose:
            logging.info("Values Cell Range: %s", self.to_cell_range_text(*value_range))

    chart.set_title({'name': title})
    chart.set_legend({'position': str(legend)})
    chart.set_size({"height": chart_size[0] * self.default_row_height, # by row 15
                    "width": chart_size[1] * self.default_col_width}) # by col 8.43

    chart.set_x_axis({"name": xy_axes_name[0]})

    set_y_axis_dict = {"name": xy_axes_name[1], 
                       "major_gridlines": {"visible": int(major_gridlines)},
                       'min': y_axis_range[0], 
                       'max': y_axis_range[1],
                       'num_format': y_num_format}
    if y2_axis:
        chart.set_y2_axis(set_y_axis_dict)
    else:
        chart.set_y_axis(set_y_axis_dict)

    # Setting Chart Style
    if chart_style:
        chart.set_style(chart_style)

    # Need to Return Chart before Insert it to the worksheet.
    if retChart:
        return chart

    worksheet.insert_chart(
        chart_loc[0], 
        chart_loc[1], 
        chart
    )

    written_range = [start_row, start_col, int(chart_loc[0] + chart_size[0]), int(chart_loc[1] + chart_size[1])]
    if outputData:

        written_range = [start_row, start_col, 
                         (start_row + data_nrows + self.gap_number + chart_size[0]),
                         max(df_range[3], start_col + chart_size[1])]

    if skipby == 'row':
        if outputData:
            self.curr_row = (start_row + data_nrows + self.gap_number + chart_size[0] + self.gap_number - 1)
        else:
            self.curr_row = (start_row + chart_size[0] + self.gap_number - 1)
    if skipby == 'col':
        self.curr_col = (start_col + int(chart_size[1]) + self.gap_number)

    if self.verbose:
        logging.info(f"Chart Written in Cell Range: {self.to_cell_range_text(*written_range)}")

    if retCellRange == "text":
        return self.to_cell_range_text(*written_range)
    if retCellRange == "value":
        return written_range

    return 0

write_combined_chart ¶

write_combined_chart(worksheet, chart1, chart2, loc=None, chart_size=(30, 13), skipby='row', retCellRange=None)

Combined two chart objects and then write to Excel worksheet.

源代码位于： ExcelMaster/ExcelMaster.py

def write_combined_chart(self, worksheet, chart1, chart2, 
                         loc = None, chart_size = (30, 13), 
                         skipby = "row", retCellRange = None):
    """ Combined two chart objects and then write to Excel worksheet. """

    start_row = loc[0] if loc else self.curr_row
    start_col = loc[1] if loc else self.curr_col

    chart1.combine(chart2)
    worksheet.insert_chart(xl_rowcol_to_cell(start_row, start_col), chart1)

    written_range = [start_row, start_col, (start_row + chart_size[0]), (start_col + chart_size[1])]

    if skipby == 'row':
        self.curr_row = (start_row + chart_size[0] + self.gap_number - 1)
    if skipby == 'col':
        self.curr_col = (start_col + chart_size[1] + self.gap_number)

    if self.verbose:
        logging.info(f"Chart Written in Cell Range: {self.to_cell_range_text(*written_range)}")

    if retCellRange == "text":
        return self.to_cell_range_text(*written_range)
    if retCellRange == "value":
        return written_range

    return 0

write_duo_chart ¶

write_duo_chart(worksheet, df, y1_list, y2_list=None, x=None, c1_type='column', c2_type='line', y1_axis_range=(0, 1), y2_axis_range=None, y1_num_format=None, y2_num_format=None, y1_line_marker='circle', y2_line_marker='circle', y1_line_type='solid', y2_line_type='solid', loc=None, title='', chart_size=(30, 13), xy_axes_name=('', ''), major_gridlines=False, retChart=False, retCellRange=None, skipby='row')

Write duo-chart sharing the same x axis.

源代码位于： ExcelMaster/ExcelMaster.py

def write_duo_chart(self, worksheet, df,
                    y1_list, y2_list = None, x = None,
                    c1_type = "column", c2_type = "line", 
                    y1_axis_range = (0, 1), y2_axis_range = None,
                    y1_num_format = None, y2_num_format = None,
                    y1_line_marker = "circle", y2_line_marker = "circle",
                    y1_line_type = "solid", y2_line_type = "solid",
                    loc = None, title = "", chart_size = (30, 13),
                    xy_axes_name = ("", ""), major_gridlines = False,
                    retChart = False, retCellRange = None,
                    skipby = "row"):
    """ Write duo-chart sharing the same x axis. """

    start_row = loc[0] if loc else self.curr_row
    start_col = loc[1] if loc else self.curr_col

    y2_axis = True
    if y2_list is None:
        y2_axis = False

    if y2_axis_range is None:
        y2_axis_range = y1_axis_range

    xy_name = (xy_axes_name[0], xy_axes_name[1])
    xy2_name = (xy_axes_name[0], xy_axes_name[1])
    if y2_axis and len(xy_axes_name) >= 3:
        xy_name = (xy_axes_name[0], xy_axes_name[1])
        xy2_name = (xy_axes_name[0], xy_axes_name[2])
    if y2_axis and len(xy_axes_name) < 3:
        xy_name = (xy_axes_name[0], xy_axes_name[1])
        xy2_name = (xy_axes_name[0], "")

    chart1 = self.write_chart(df = df, 
                              x = x, 
                              y_list = y1_list, 
                              worksheet = worksheet, 
                              title = title,
                              chart_type = c1_type, 
                              chart_size = chart_size,
                              y_axis_range = y1_axis_range,
                              xy_axes_name = xy_name,
                              major_gridlines = major_gridlines,
                              y_num_format = y1_num_format,
                              line_type = y1_line_type,
                              line_marker = y1_line_marker,
                              retChart = True)

    chart2 = self.write_chart(df = df, 
                              x = x,
                              y_list = y2_list, 
                              worksheet=worksheet, 
                              title = title, 
                              chart_type = c2_type, 
                              chart_size = chart_size,
                              y_axis_range = y2_axis_range,
                              y2_axis = y2_axis,
                              xy_axes_name = xy2_name,
                              major_gridlines = False,
                              y_num_format = y2_num_format,
                              line_type = y2_line_type,
                              line_marker = y2_line_marker,
                              retChart=True)

    # Need to Return Chart before Insert it to the worksheet.
    if retChart:
        return (chart1, chart2)

    cell_range = self.write_combined_chart(worksheet, 
                              chart1, chart2, 
                              loc = loc, chart_size = chart_size, 
                              skipby = skipby, retCellRange = retCellRange)

    return cell_range

write_text_by_dict ¶

write_text_by_dict(worksheet, dict_cells)

Write text using Python dictionary.

源代码位于： ExcelMaster/ExcelMaster.py

def write_text_by_dict(self, worksheet, dict_cells):
    """ Write text using Python dictionary. """
    for cell in dict_cells:
        list_contents = dict_cells[cell]

        if ':' in cell:
            text, cell_format = list_contents
            worksheet.merge_range(cell, text, self.dict_cell_format[cell_format])

        else:
            if len(list_contents) == 1:
                list_items = []

                # Text formats begin with '~~~'
                list_mixed_items = list_contents[0]
                for item in list_mixed_items:
                    if item.startswith('~~~'):
                        cell_format = item.split('~~~')[1]
                        item = self.dict_cell_format[cell_format]

                        list_items.append(item)
                    else:
                        list_items.append(item)

                worksheet.write_rich_string(cell, *list_items)

            else:
                text, cell_format = list_contents
                worksheet.write(cell, text, self.dict_cell_format[cell_format])

write_text_content ¶

write_text_content(worksheet, input_text=None, txt_path=None, loc=None, retCellRange=None)

Write text content line by line to the worksheet.

源代码位于： ExcelMaster/ExcelMaster.py

def write_text_content(self, worksheet, input_text = None, txt_path = None, loc = None, retCellRange = None):
    """ Write text content line by line to the worksheet. """

    row_anchor = loc[0] if loc else self.curr_row
    col_anchor = loc[1] if loc else self.curr_col

    start_row = row_anchor
    start_col = col_anchor

    if (input_text) and (txt_path):
        raise ValueError("Please give either input_text or txt_path, not both !!!")

    if (input_text is None) and (txt_path is None):
        raise ValueError("Please specify either input_text or txt_path.")

    if txt_path is not None:
        file_notes = open(txt_path, 'r')
        textlines = file_notes.readlines()

    if input_text is not None:
        textlines = input_text.split("\n")

    for line in textlines:
        # Write text line by line
        loc = xl_rowcol_to_cell(row_anchor, col_anchor)

        if " [>] " in line:
            line_split = line.split(" [>] ")
            col_shift = 0
            for sub_line in line_split:
                sub_line = sub_line.rstrip()
                self.__parse_line_by_format_sign(worksheet = worksheet, loc = loc, line = sub_line)
                col_shift += 1
                loc = xl_rowcol_to_cell(row_anchor, col_anchor + col_shift)
        else:
            self.__parse_line_by_format_sign(worksheet = worksheet, loc = loc, line = line)

        row_anchor += 1

    self.curr_row = (row_anchor - 1)

    end_row = row_anchor
    end_col = col_anchor

    written_range = [start_row, start_col, end_row, end_col]

    if self.verbose:
        logging.info(f"Text Written in Cell Range: {self.to_cell_range_text(*written_range)}")

    if retCellRange == "text":
        return self.to_cell_range_text(*written_range)
    if retCellRange == "value":
        return written_range

    return 0

plot_boxplot `staticmethod` ¶

plot_boxplot(df, x, y, y_percentage=False, colored_box=True, color_grp=(10, 1), title='', fontsize=14, figsize=(8, 6), show_fig=True, img_path=None, transp_bg=False)

Plot Box Plot Chart using matplotlib.

源代码位于： ExcelMaster/ExcelMaster.py

@staticmethod
def plot_boxplot(df, x, y, y_percentage = False, colored_box = True, color_grp = (10, 1), title = "", fontsize = 14, figsize = (8, 6), show_fig = True, img_path = None, transp_bg = False):
    """ Plot Box Plot Chart using matplotlib. """

    plt.style.use('default')

    fig, ax = plt.subplots(1, 1, figsize = figsize, dpi=200)

    box_plot_data = convert_to_boxplot_data(df, x, y, True)

    bplot = ax.boxplot(box_plot_data.values(), patch_artist=colored_box, widths = 0.4)
    ax.set_xticklabels(box_plot_data.keys(), fontsize = fontsize)
    ax.set_ylabel(string_proc(y), color='black', fontsize = fontsize)
    ax.set_xlabel(string_proc(x), color='black', fontsize = fontsize)
    ax.tick_params(axis='both', labelsize=fontsize)
    ax.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)
    ax.set_title(title, fontsize=fontsize + 4)

    if colored_box:
        colors = color_input_validation(color_grp, len(box_plot_data.keys()))
        for patch, color in zip(bplot['boxes'], colors):
            patch.set_facecolor(color)
            patch.set_alpha(.7)

    if y_percentage:
        plt.gca().yaxis.set_major_formatter(PercentFormatter(100)) 
    plt.tight_layout()

    if show_fig:
        plt.show()

    if img_path:
        fig.savefig(img_path, transparent = transp_bg, dpi=200)

    plt.close()
    return fig, ax

write_boxplot ¶

write_boxplot(ws, df, x, y, y_percentage=False, colored_box=True, color_grp=(10, 1), title='', fontsize=14, figsize=(30, 13), show_fig=False, img_path=None, transp_bg=False, loc=None, skipby='row', retCellRange=None)

Write boxplot to Worksheet.

源代码位于： ExcelMaster/ExcelMaster.py

def write_boxplot(self, ws, df, x, y, y_percentage = False, colored_box = True, color_grp = (10, 1), title = "", 
                  fontsize = 14, figsize = (30, 13), show_fig = False, img_path = None, transp_bg = False,
                  loc = None, skipby = "row", retCellRange = None):
    """ Write boxplot to Worksheet. """

    currTime = getCurrentDateTime()
    rn = str(random.randrange(0, 1000))
    rn_num = currTime + rn
    tmp_image_path = f"./.tmp_image_{rn_num}.png"

    ExcelMaster.plot_boxplot(df = df, 
                             x = x, y = y, 
                             y_percentage = y_percentage, 
                             colored_box = colored_box, 
                             color_grp = color_grp, 
                             title = title, 
                             fontsize = fontsize, 
                             show_fig = show_fig, 
                             transp_bg = transp_bg,
                             img_path = tmp_image_path)

    self._resize_image(tmp_image_path, figsize, tmp_image_path)
    ret_range = self.insert_image(ws, figPath=tmp_image_path, figScale=(1, 1), loc = loc, skipby = skipby, retCellRange = retCellRange)

    return ret_range

getStartDateofLatestWeek ¶

getStartDateofLatestWeek(retStr=True)

Date of first day of last week.

源代码位于： ExcelMaster/Utility.py

def getStartDateofLatestWeek(retStr=True):
    """ Date of first day of last week. """
    import datetime
    today = datetime.date.today()
    curr_wk = today.strftime("%W")
    d = f"{str(today.year)}-W{curr_wk}"
    r = datetime.datetime.strptime(d + '-1', "%Y-W%W-%w")
    res = (r + datetime.timedelta(days=-1))
    if retStr:
        return res.strftime("%Y-%m-%d")
    return res

getLastCompletedVintage ¶

getLastCompletedVintage(start_date=None, format='%Y-%m-%d', vintage=False)

Last Completed Vintage

源代码位于： ExcelMaster/Utility.py

def getLastCompletedVintage(start_date = None, format="%Y-%m-%d", vintage=False):
    """ Last Completed Vintage """
    import datetime

    todayDate = datetime.date.today()

    if start_date is not None:
        todayDate = datetime.datetime.strptime(start_date, format).date()    

    lastM = todayDate.replace(day=1) - datetime.timedelta(days=1)
    if vintage:
        return int(lastM.strftime(format)[0:7].replace("-",""))
    return lastM.strftime(format)

vin2quar ¶

vin2quar(strDate)

String Vintage to Quarter (if compeleted month).

源代码位于： ExcelMaster/Utility.py

def vin2quar(strDate):
    """ String Vintage to Quarter (if compeleted month). """
    year = int(strDate[:4])
    month = int(strDate[4:6])
    completed_q = [3, 6, 9, 12]
    if month in completed_q:
        q = (month-1)//3 + 1
        return str(year) + "Q" + str(q)
    return strDate

list_files ¶

list_files(location, pattern)

List all files.

源代码位于： ExcelMaster/Utility.py

def list_files(location, pattern):
    """ List all files. """
    import re
    res = []
    for root, dirs, files in os.walk(location):
        for file in files:
            if re.search(pattern, file):
                 res.append(file)
    return res

getCurrentDateTime ¶

getCurrentDateTime(fmt='%Y%m%d%H%M%S')

Get Current DateTime

源代码位于： ExcelMaster/Utility.py

def getCurrentDateTime(fmt = "%Y%m%d%H%M%S"):
    """ Get Current DateTime"""
    import datetime
    return datetime.datetime.now().strftime(fmt)

input_table_proc ¶

input_table_proc(tbl)

Process Input Table.

源代码位于： ExcelMaster/Utility.py

def input_table_proc(tbl):
    """ Process Input Table. """
    tbl.columns = [x.lower() for x in tbl.columns]
    return tbl

get_file_extension ¶

get_file_extension(input_path)

Get File Extentsion for a given file Path.

源代码位于： ExcelMaster/Utility.py

def get_file_extension(input_path):
    """ Get File Extentsion for a given file Path. """
    return os.path.splitext(input_path)[1]

input_validation ¶

input_validation(x, sep=',')

Input Validation.

源代码位于： ExcelMaster/Utility.py

def input_validation(x, sep=","):
    """ Input Validation. """
    import os
    if isinstance(x, str):
        if get_file_extension(x) == ".sas7bdat":
            res = pd.read_sas(x, encoding="latin-1")
        else:
            res = pd.read_csv(x, sep = sep)
        res = input_table_proc(res)
        return res
    elif isinstance(x, pd.DataFrame):
        return input_table_proc(x)
    else:
        raise AttributeError("Only Support csv/sas7bdat Path or Panda DataFrame as Input!!!")

val_input_condition ¶

val_input_condition(target, condition=('>', 20))

Condition Tuple Validation.

源代码位于： ExcelMaster/Utility.py

def val_input_condition(target, condition = (">", 20)):
    """ Condition Tuple Validation. """
    if isinstance(condition, str):
        return (target == condition)

    else:
        operator = condition[0].strip().lower()
        value = float(condition[1])

        if operator == '>' or operator == 'gt':
            return (target > value)
        elif operator == '<' or operator == 'lt':
            return (target < value)
        elif operator == '=' or operator == 'eq':
            return (target == value)
        elif operator == '>=' or operator == 'gte':
            return (target >= value)
        elif operator == '<=' or operator == 'lte':
            return (target <= value)
        elif operator == '=' or operator == 'eq':
            return (target == value)

tanspose_dataframe ¶

tanspose_dataframe(df, index_col)

Transpose Pandas DataFrame.

源代码位于： ExcelMaster/Utility.py

def tanspose_dataframe(df, index_col):
    """ Transpose Pandas DataFrame. """
    df = df.set_index(index_col).T.reset_index()
    return df

convert_perc_str_to_float ¶

convert_perc_str_to_float(df, cols)

Percentage to Float.

源代码位于： ExcelMaster/Utility.py

def convert_perc_str_to_float(df, cols):
    """ Percentage to Float. """
    for col in cols:
        if str(df[col].dtypes) == 'object':
            df[col] = df[col].str.rstrip('%').astype('float') / 100
    return df

color_hex2rgb ¶

color_hex2rgb(hex_code)

Convert Color Hex Code to RGB Tuple.

源代码位于： ExcelMaster/Utility.py

def color_hex2rgb(hex_code):
    """ Convert Color Hex Code to RGB Tuple. """
    hex_code = hex_code.lower()
    h = hex_code.lstrip('#')
    return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))

get_color_set ¶

get_color_set(n, start_num=0, step=1, retName=False, lookupName=None)

Return a set of color code without replacement.

源代码位于： ExcelMaster/Utility.py

def get_color_set(n, start_num = 0, step = 1, retName = False, lookupName = None):
    """ Return a set of color code without replacement. """
    import matplotlib.colors as mcolors
    import re

    colors = list(mcolors.XKCD_COLORS.items())
    color_set = {}

    for i in range(start_num, start_num + n * step, step):
        name = colors[i][0].replace("xkcd:", "")
        color = colors[i][1]
        color_set[name] = color

    if lookupName:
        color_set = {}
        i = 0
        for name, color in colors:
            if len(color_set.items()) == n:
                break
            if re.search(lookupName, name):
                color_set[name.replace("xkcd:", "")] = color
            i += 1

    if retName:
        return color_set
    return list(color_set.values())

string_proc ¶

string_proc(x)

Process a given String.

源代码位于： ExcelMaster/Utility.py

def string_proc(x):
    """ Process a given String. """
    x_list = x.split("_")
    x_list = [x.strip().capitalize() for x in x_list]
    x = " ".join(x_list)
    return x

convert_to_boxplot_data ¶

convert_to_boxplot_data(df, x, y, y_percentage=False)

Convert dataframe to boxplot data.

源代码位于： ExcelMaster/Utility.py

def convert_to_boxplot_data(df, x, y, y_percentage = False):
    """ Convert dataframe to boxplot data. """
    x_unique_value = df[x].sort_values().unique().tolist()

    box_plot_data = {}
    for v in x_unique_value:
        box_plot_data[v] = [num * 100 if y_percentage else num for num in df[df[x] == v][y].tolist()]
    return box_plot_data

color_input_validation ¶

color_input_validation(color_grp, val_n)

Perform Color Input Validation.

源代码位于： ExcelMaster/Utility.py

def color_input_validation(color_grp, val_n):
    """ Perform Color Input Validation. """
    if isinstance(color_grp, tuple) and len(color_grp) == 2:
        # Customize colors
        cols = get_color_set(val_n, color_grp[0], color_grp[1], False)
        colors = cols
    elif isinstance(color_grp, str):
        colors = [color_grp] * val_n
    elif (isinstance(color_grp, list)) and (all([isinstance(x, str) for x in color_grp])) and (len(color_grp) == val_n):
        colors = color_grp
    else:
        raise ValueError("Please give valid color_grp: tuple of two numbers, list of color code or a single color code.")
    return colors

get_metric_shift ¶

get_metric_shift(data, metric_name, nvars_col='nvars')

Calculate Metric Shift for Variable Reduction.

源代码位于： ExcelMaster/Utility.py

def get_metric_shift(data, metric_name, nvars_col = "nvars"):
    """ Calculate Metric Shift for Variable Reduction. """
    metric_lift = (data[metric_name] - data[metric_name].shift(1))
    nvars_reduced = (data[nvars_col] - data[nvars_col].shift(1))
    return (metric_lift / nvars_reduced).fillna(0)

get_metrics_shift ¶

get_metrics_shift(data, metric_cols)

Get Shift for List of Metrics.

源代码位于： ExcelMaster/Utility.py

def get_metrics_shift(data, metric_cols):
    """ Get Shift for List of Metrics."""
    for metric in metric_cols:
        if metric.startswith(tuple(metric_cols)):
            data[metric+"_shift"] = get_metric_shift(data, metric)
    return data

compute_overfitting_shift ¶

compute_overfitting_shift(data, sample_prefix)

Calculate Overfitting Performance Shift.

源代码位于： ExcelMaster/Utility.py

def compute_overfitting_shift(data, sample_prefix):
    """ Calculate Overfitting Performance Shift. """
    b_metrics = [x for x in data.columns if x.startswith(sample_prefix[0])]
    o_metrics = [x for x in data.columns if x.startswith(sample_prefix[1])]

    if len(b_metrics) == len(o_metrics):
        for b_metric, o_metric in zip(b_metrics, o_metrics):
            data[o_metric+"_shift"] = data[o_metric].div(data[b_metric]) - 1
        return data

    raise ValueError("The lengths of metrics between two samples are the the same in the given dataset!")

proc_psi_raw_report ¶

proc_psi_raw_report(psi_raw_table, psi_title, keep_list=None, varname='variable', upper=True)

Processing Raw PSI Report generated from Takecopter.

源代码位于： ExcelMaster/Utility.py

def proc_psi_raw_report(psi_raw_table, psi_title, keep_list = None, varname = "variable", upper=True):
    """ Processing Raw PSI Report generated from Takecopter. """
    psi_table = input_validation(psi_raw_table)
    psi_table = psi_table.rename(columns={"var_for_psi":varname})
    psi_table = psi_table.set_index(varname)
    if upper:
        psi_table.columns = [x.upper() for x in psi_table.columns]
    psi_table.columns = [[psi_title]*len(psi_table.columns),psi_table.columns]
    if keep_list:
        psi_table = psi_table[[(psi_title, x) for x in keep_list]]
    return psi_table

get_mean_risk ¶

get_mean_risk(bivar_single_attr, value_range_col=['min_indep', 'max_indep'], dep_col='dep')

get average risk

源代码位于： ExcelMaster/Utility.py

def get_mean_risk(bivar_single_attr, value_range_col = ['min_indep', 'max_indep'], dep_col = "dep"):
    """get average risk"""
    mean_wo_nan = bivar_single_attr.dropna(how = "all", subset=value_range_col)[dep_col].mean()
    mean_w_na = bivar_single_attr[dep_col].mean()
    bivar_single_attr["mean"] = mean_w_na
    bivar_single_attr["mean_no_nan"] = mean_wo_nan
    return bivar_single_attr

报告模板 — `Template`¶

Template ¶

getStartDateofLatestWeek ¶

getStartDateofLatestWeek(retStr=True)

Date of first day of last week.

源代码位于： ExcelMaster/Utility.py

def getStartDateofLatestWeek(retStr=True):
    """ Date of first day of last week. """
    import datetime
    today = datetime.date.today()
    curr_wk = today.strftime("%W")
    d = f"{str(today.year)}-W{curr_wk}"
    r = datetime.datetime.strptime(d + '-1', "%Y-W%W-%w")
    res = (r + datetime.timedelta(days=-1))
    if retStr:
        return res.strftime("%Y-%m-%d")
    return res

getLastCompletedVintage ¶

getLastCompletedVintage(start_date=None, format='%Y-%m-%d', vintage=False)

Last Completed Vintage

源代码位于： ExcelMaster/Utility.py

def getLastCompletedVintage(start_date = None, format="%Y-%m-%d", vintage=False):
    """ Last Completed Vintage """
    import datetime

    todayDate = datetime.date.today()

    if start_date is not None:
        todayDate = datetime.datetime.strptime(start_date, format).date()    

    lastM = todayDate.replace(day=1) - datetime.timedelta(days=1)
    if vintage:
        return int(lastM.strftime(format)[0:7].replace("-",""))
    return lastM.strftime(format)

vin2quar ¶

vin2quar(strDate)

String Vintage to Quarter (if compeleted month).

源代码位于： ExcelMaster/Utility.py

def vin2quar(strDate):
    """ String Vintage to Quarter (if compeleted month). """
    year = int(strDate[:4])
    month = int(strDate[4:6])
    completed_q = [3, 6, 9, 12]
    if month in completed_q:
        q = (month-1)//3 + 1
        return str(year) + "Q" + str(q)
    return strDate

list_files ¶

list_files(location, pattern)

List all files.

源代码位于： ExcelMaster/Utility.py

def list_files(location, pattern):
    """ List all files. """
    import re
    res = []
    for root, dirs, files in os.walk(location):
        for file in files:
            if re.search(pattern, file):
                 res.append(file)
    return res

getCurrentDateTime ¶

getCurrentDateTime(fmt='%Y%m%d%H%M%S')

Get Current DateTime

源代码位于： ExcelMaster/Utility.py

def getCurrentDateTime(fmt = "%Y%m%d%H%M%S"):
    """ Get Current DateTime"""
    import datetime
    return datetime.datetime.now().strftime(fmt)

input_table_proc ¶

input_table_proc(tbl)

Process Input Table.

源代码位于： ExcelMaster/Utility.py

def input_table_proc(tbl):
    """ Process Input Table. """
    tbl.columns = [x.lower() for x in tbl.columns]
    return tbl

get_file_extension ¶

get_file_extension(input_path)

Get File Extentsion for a given file Path.

源代码位于： ExcelMaster/Utility.py

def get_file_extension(input_path):
    """ Get File Extentsion for a given file Path. """
    return os.path.splitext(input_path)[1]

input_validation ¶

input_validation(x, sep=',')

Input Validation.

源代码位于： ExcelMaster/Utility.py

def input_validation(x, sep=","):
    """ Input Validation. """
    import os
    if isinstance(x, str):
        if get_file_extension(x) == ".sas7bdat":
            res = pd.read_sas(x, encoding="latin-1")
        else:
            res = pd.read_csv(x, sep = sep)
        res = input_table_proc(res)
        return res
    elif isinstance(x, pd.DataFrame):
        return input_table_proc(x)
    else:
        raise AttributeError("Only Support csv/sas7bdat Path or Panda DataFrame as Input!!!")

val_input_condition ¶

val_input_condition(target, condition=('>', 20))

Condition Tuple Validation.

源代码位于： ExcelMaster/Utility.py

def val_input_condition(target, condition = (">", 20)):
    """ Condition Tuple Validation. """
    if isinstance(condition, str):
        return (target == condition)

    else:
        operator = condition[0].strip().lower()
        value = float(condition[1])

        if operator == '>' or operator == 'gt':
            return (target > value)
        elif operator == '<' or operator == 'lt':
            return (target < value)
        elif operator == '=' or operator == 'eq':
            return (target == value)
        elif operator == '>=' or operator == 'gte':
            return (target >= value)
        elif operator == '<=' or operator == 'lte':
            return (target <= value)
        elif operator == '=' or operator == 'eq':
            return (target == value)

tanspose_dataframe ¶

tanspose_dataframe(df, index_col)

Transpose Pandas DataFrame.

源代码位于： ExcelMaster/Utility.py

def tanspose_dataframe(df, index_col):
    """ Transpose Pandas DataFrame. """
    df = df.set_index(index_col).T.reset_index()
    return df

convert_perc_str_to_float ¶

convert_perc_str_to_float(df, cols)

Percentage to Float.

源代码位于： ExcelMaster/Utility.py

def convert_perc_str_to_float(df, cols):
    """ Percentage to Float. """
    for col in cols:
        if str(df[col].dtypes) == 'object':
            df[col] = df[col].str.rstrip('%').astype('float') / 100
    return df

color_hex2rgb ¶

color_hex2rgb(hex_code)

Convert Color Hex Code to RGB Tuple.

源代码位于： ExcelMaster/Utility.py

def color_hex2rgb(hex_code):
    """ Convert Color Hex Code to RGB Tuple. """
    hex_code = hex_code.lower()
    h = hex_code.lstrip('#')
    return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))

get_color_set ¶

get_color_set(n, start_num=0, step=1, retName=False, lookupName=None)

Return a set of color code without replacement.

源代码位于： ExcelMaster/Utility.py

def get_color_set(n, start_num = 0, step = 1, retName = False, lookupName = None):
    """ Return a set of color code without replacement. """
    import matplotlib.colors as mcolors
    import re

    colors = list(mcolors.XKCD_COLORS.items())
    color_set = {}

    for i in range(start_num, start_num + n * step, step):
        name = colors[i][0].replace("xkcd:", "")
        color = colors[i][1]
        color_set[name] = color

    if lookupName:
        color_set = {}
        i = 0
        for name, color in colors:
            if len(color_set.items()) == n:
                break
            if re.search(lookupName, name):
                color_set[name.replace("xkcd:", "")] = color
            i += 1

    if retName:
        return color_set
    return list(color_set.values())

string_proc ¶

string_proc(x)

Process a given String.

源代码位于： ExcelMaster/Utility.py

def string_proc(x):
    """ Process a given String. """
    x_list = x.split("_")
    x_list = [x.strip().capitalize() for x in x_list]
    x = " ".join(x_list)
    return x

convert_to_boxplot_data ¶

convert_to_boxplot_data(df, x, y, y_percentage=False)

Convert dataframe to boxplot data.

源代码位于： ExcelMaster/Utility.py

def convert_to_boxplot_data(df, x, y, y_percentage = False):
    """ Convert dataframe to boxplot data. """
    x_unique_value = df[x].sort_values().unique().tolist()

    box_plot_data = {}
    for v in x_unique_value:
        box_plot_data[v] = [num * 100 if y_percentage else num for num in df[df[x] == v][y].tolist()]
    return box_plot_data

color_input_validation ¶

color_input_validation(color_grp, val_n)

Perform Color Input Validation.

源代码位于： ExcelMaster/Utility.py

def color_input_validation(color_grp, val_n):
    """ Perform Color Input Validation. """
    if isinstance(color_grp, tuple) and len(color_grp) == 2:
        # Customize colors
        cols = get_color_set(val_n, color_grp[0], color_grp[1], False)
        colors = cols
    elif isinstance(color_grp, str):
        colors = [color_grp] * val_n
    elif (isinstance(color_grp, list)) and (all([isinstance(x, str) for x in color_grp])) and (len(color_grp) == val_n):
        colors = color_grp
    else:
        raise ValueError("Please give valid color_grp: tuple of two numbers, list of color code or a single color code.")
    return colors

get_metric_shift ¶

get_metric_shift(data, metric_name, nvars_col='nvars')

Calculate Metric Shift for Variable Reduction.

源代码位于： ExcelMaster/Utility.py

def get_metric_shift(data, metric_name, nvars_col = "nvars"):
    """ Calculate Metric Shift for Variable Reduction. """
    metric_lift = (data[metric_name] - data[metric_name].shift(1))
    nvars_reduced = (data[nvars_col] - data[nvars_col].shift(1))
    return (metric_lift / nvars_reduced).fillna(0)

get_metrics_shift ¶

get_metrics_shift(data, metric_cols)

Get Shift for List of Metrics.

源代码位于： ExcelMaster/Utility.py

def get_metrics_shift(data, metric_cols):
    """ Get Shift for List of Metrics."""
    for metric in metric_cols:
        if metric.startswith(tuple(metric_cols)):
            data[metric+"_shift"] = get_metric_shift(data, metric)
    return data

compute_overfitting_shift ¶

compute_overfitting_shift(data, sample_prefix)

Calculate Overfitting Performance Shift.

源代码位于： ExcelMaster/Utility.py

def compute_overfitting_shift(data, sample_prefix):
    """ Calculate Overfitting Performance Shift. """
    b_metrics = [x for x in data.columns if x.startswith(sample_prefix[0])]
    o_metrics = [x for x in data.columns if x.startswith(sample_prefix[1])]

    if len(b_metrics) == len(o_metrics):
        for b_metric, o_metric in zip(b_metrics, o_metrics):
            data[o_metric+"_shift"] = data[o_metric].div(data[b_metric]) - 1
        return data

    raise ValueError("The lengths of metrics between two samples are the the same in the given dataset!")

proc_psi_raw_report ¶

proc_psi_raw_report(psi_raw_table, psi_title, keep_list=None, varname='variable', upper=True)

Processing Raw PSI Report generated from Takecopter.

源代码位于： ExcelMaster/Utility.py

def proc_psi_raw_report(psi_raw_table, psi_title, keep_list = None, varname = "variable", upper=True):
    """ Processing Raw PSI Report generated from Takecopter. """
    psi_table = input_validation(psi_raw_table)
    psi_table = psi_table.rename(columns={"var_for_psi":varname})
    psi_table = psi_table.set_index(varname)
    if upper:
        psi_table.columns = [x.upper() for x in psi_table.columns]
    psi_table.columns = [[psi_title]*len(psi_table.columns),psi_table.columns]
    if keep_list:
        psi_table = psi_table[[(psi_title, x) for x in keep_list]]
    return psi_table

get_mean_risk ¶

get_mean_risk(bivar_single_attr, value_range_col=['min_indep', 'max_indep'], dep_col='dep')

get average risk

源代码位于： ExcelMaster/Utility.py

def get_mean_risk(bivar_single_attr, value_range_col = ['min_indep', 'max_indep'], dep_col = "dep"):
    """get average risk"""
    mean_wo_nan = bivar_single_attr.dropna(how = "all", subset=value_range_col)[dep_col].mean()
    mean_w_na = bivar_single_attr[dep_col].mean()
    bivar_single_attr["mean"] = mean_w_na
    bivar_single_attr["mean_no_nan"] = mean_wo_nan
    return bivar_single_attr

get_pva_report ¶

get_pva_report(em, ws, gains_result, sample_list, nbins=10, varcol='variable', chart_scale=(10, 10))

Plot PVA Table by Segment across Sample.

源代码位于： ExcelMaster/Template.py

def get_pva_report(em, ws, gains_result, sample_list, nbins = 10, varcol = "variable", chart_scale = (10, 10)):
    """ Plot PVA Table by Segment across Sample. """
    gains_results = input_validation(gains_result)

    varcol = varcol.lower()
    segs = gains_results["seg_name"].unique().tolist()
    chart_size = (nbins + chart_scale[0], (nbins + chart_scale[1])/2)

    # em.reset_curr_loc()
    em.merge_col(worksheet=ws, ncols=5, text="PVA Charts")

    em.gap_number = 2
    for seg in segs:
        """ Plot PVA charts by segment row by row """
        ########## Data Wrangling ###############
        example = gains_results.query(f"seg_name == '{seg}'").dropna()
        example = convert_perc_str_to_float(example, ["interval_bad_rate"])

        cols = ["rank", "sample", "mean_score", "interval_bad_rate"]

        example_fnl = example[cols].astype({"rank":int}).melt(id_vars = ["rank", "sample"], value_vars = ["mean_score", "interval_bad_rate"])
        example_fnl["score_type"] = example_fnl[varcol].str.replace("mean_score", "predicted").replace("interval_bad_rate", "actual")
        example_fnl = example_fnl.pivot(index = ["rank"], columns=["sample", "score_type"], values = "value")
        col_order = example_fnl.columns.get_level_values(0).unique().tolist()
        fnl_out_res = example_fnl[col_order]

        ########### Insert Data and Charts to the Worksheet ###################
        em.write_text_content(worksheet=ws, input_text=f"{seg} [[ORANGE_H3]] \n")

        # sample_list = example["sample"].unique().tolist()
        chart_loc = {}
        for sample in sample_list:
            """ Plot PVA charts by sample column by column"""
            example_sample = example.query(f"sample == '{sample}'").dropna()
            loc = em.write_chart(ws, df = example_sample, x="rank", 
                                  y_list=["mean_score", "interval_bad_rate"], 
                                  title=f"PVA Chart ({sample.upper()}) {seg}", 
                                  chart_size=chart_size, 
                                  chart_type="line", 
                                  major_gridlines=False, 
                                  xy_axes_name=("Quantiles", "% of Bads"), 
                                  y_axis_range=(0, 1), 
                                  y_num_format="0.00%",
                                  skipby="col", retCellRange="value")
            chart_loc[sample] = loc

        loc4 = em.write_dataframe(worksheet=ws, df=fnl_out_res, title=f"PVA Chart ({seg})", index=True, skipby="row", retCellRange="value")

        tbl_value_range = [x + 2 if i == 0 else x + 1 
                                 if i == 1 else x + 2
                                 if i == 2 else x
                           for i, x in enumerate(loc4)]

        em.set_cell_format(ws, tbl_value_range, "----")
        em.set_cell_format(ws, tbl_value_range, "NUM%.3")

        chart1_loc = chart_loc[sample_list[0]]
        em.curr_row = chart1_loc[2] + em.gap_number
        em.curr_col = chart1_loc[1]
    return ws

get_bivar_report ¶

get_bivar_report(em, ws, attr_info, bivar, sample_list, varcol='varname', sample_col='sample', x_cols=['min_indep', 'max_indep'], n_col='_freq_', dep_col='dep', chart_size=(20, 10), average_risk_line=False)

Get Bivar Report by Attribute across Samples.

源代码位于： ExcelMaster/Template.py

def get_bivar_report(em, ws, attr_info, bivar, sample_list, varcol = "varname", sample_col = "sample", x_cols = ["min_indep", "max_indep"], n_col = "_freq_", dep_col = "dep", chart_size = (20, 10), average_risk_line = False):
    """ Get Bivar Report by Attribute across Samples. """

    ##### Worksheet 1 ######
    em.merge_col(worksheet = ws, ncols=5, text = "Bivar Plot")

    bivar_table = input_validation(bivar)
    attr_info_table = input_validation(attr_info)

    varcol = varcol.lower()
    sample_col = sample_col.lower()
    x_cols = [x.lower() for x in x_cols]
    n_col = n_col.lower()
    dep_col = dep_col.lower()

    bivar_table.columns = [x.lower() for x in bivar_table.columns]
    attr_info_table.columns = [x.lower() for x in attr_info_table.columns]

    var_by_varimp = attr_info_table[varcol].str.lower().tolist()
    bivar_table["indep_range"] = "[" + bivar_table[x_cols[0]].astype(str) + ", " \
                                     + bivar_table[x_cols[1]].astype(str) + "]"

    for var in var_by_varimp:
        """ Plot Bivar Chart by Variable Row by Row. """

        em.gap_number = 1
        singe_attr_info = attr_info_table.query(f"{varcol} == '{var.upper()}'")
        em.write_dataframe(ws, df = singe_attr_info, index = False, 
                           title =f"Information for Attribute {var.upper()}", skipby="row", retCellRange="value")
        var_desc = singe_attr_info["description"].values[0]

        em.gap_number = 2
        chart_loc = {}
        for sample in sample_list:
            """ Plot Bivar Chart Column by Column """
            single_attr_bivar_sample = bivar_table.query(f"{varcol} == '{var}' and {sample_col} == '{sample}'")
            single_attr_bivar_sample = get_mean_risk(single_attr_bivar_sample)

            loc = em.write_duo_chart(worksheet = ws, 
                                       df = single_attr_bivar_sample, 
                                       y1_list = [n_col], 
                                       y2_list = [dep_col], 
                                       x = "indep_range",
                                       c1_type = "column", 
                                       c2_type = "line", 
                                       y1_axis_range = None,
                                       y2_axis_range = None, 
                                       title = f"Bivar Plot ({sample.upper()}) for Attribute {var.upper()}", 
                                       chart_size = chart_size,
                                       xy_axes_name = (var_desc, "N", "Bad Rate"),
                                       major_gridlines=False,
                                       retChart = average_risk_line, 
                                       y2_num_format = "0.00%",
                                       skipby = "col", 
                                       retCellRange="value")

            if average_risk_line:

                column_chart = loc[0]
                line_chart = loc[1]
                fnl_line_chart = em.write_chart(worksheet = ws, 
                                           df = single_attr_bivar_sample, 
                                           y_list = ['mean', 'mean_no_nan'], 
                                           x = "indep_range",
                                           chart_type = "line", 
                                           chart_size = chart_size,
                                           y_num_format = "0.00%", 
                                           line_type = "long_dash",
                                           line_marker = "triangle",
                                           xy_axes_name = (var_desc, "N", "Bad Rate"),
                                           major_gridlines=False,
                                           retChart = True,
                                           y2_axis = True,
                                           append_to_chart = line_chart)

                loc = em.write_combined_chart(ws, 
                                              chart1 = column_chart, 
                                              chart2 = fnl_line_chart, 
                                              chart_size = chart_size, 
                                              skipby = "col", retCellRange="value")
            chart_loc[sample] = loc
        loc1 = chart_loc[sample_list[0]]
        em.curr_row = loc1[2] + em.gap_number
        em.curr_col = loc1[1]
    return ws

add_scr_info ¶

add_scr_info(em, ws, data, info, scr_info, sample_prefices, wTitle=True)

Add Score Info Comparison

源代码位于： ExcelMaster/Template.py

def add_scr_info(em, ws, data, info, scr_info, sample_prefices, wTitle = True):
    """ Add Score Info Comparison """
    em.gap_number = 0
    title=" "
    if not wTitle:
        title = None
    info_data = data[info].set_index(info[:3])
    info_loc = em.write_dataframe(ws, df=info_data, retCellRange="value", skipby="col", title=title, titleformat="", index = True)

    i = 0
    while i < len(sample_prefices):
        title=sample_prefices[i].upper().strip("_")
        if not wTitle:
            title = None
        tmp = data[[sample_prefices[i] + x for x in scr_info]]
        tmp.columns = [x.replace(sample_prefices[i], "") for x in tmp.columns]
        loc = em.write_dataframe(ws, df=tmp, retCellRange="value", skipby="col", title=title)
        em.set_cell_format(ws, cell_range=[x + 2 if i == 1 else x for i, x in enumerate(loc)], cformat="NUM%.2")
        i += 1

    em.curr_col = info_loc[1]
    em.curr_row = loc[2] + 1
    return (info_loc[1], loc[2] + 1)

add_perf_metrics ¶

add_perf_metrics(em, ws, data, info, perf_metrics, sample_prefices, wTitle=True)

Add Perf Metrics Comparison

源代码位于： ExcelMaster/Template.py

def add_perf_metrics(em, ws, data, info, perf_metrics, sample_prefices, wTitle = True):
    """ Add Perf Metrics Comparison """
    em.gap_number = 0
    title=" "
    if not wTitle:
        title = None
    info_data = data[info].set_index(info[:3])
    info_loc = em.write_dataframe(ws, df=info_data, retCellRange="value", skipby="col", title=title, titleformat="", index = True)

    i = 0
    while i < len(sample_prefices):
        title=sample_prefices[i].upper().strip("_")
        if not wTitle:
            title = None
        tmp = data[[sample_prefices[i] + x for x in perf_metrics]]
        tmp.columns = [x.replace(sample_prefices[i], "") for x in tmp.columns]
        loc = em.write_dataframe(ws, df=tmp, retCellRange="value", skipby="col", title=title)
        em.set_cell_format(ws, cell_range=[x - 1 if i == 3 else x for i, x in enumerate(loc)], cformat="NUM%.2")
        i += 1

    em.curr_col = info_loc[1]
    em.curr_row = loc[2] + 1
    return (info_loc[1], loc[2] + 1)

add_perf_lift ¶

add_perf_lift(em, ws, m2_data, m1_data, info, perf_metrics, sample_prefices, wTitle=True)

Add Performance Lift.

源代码位于： ExcelMaster/Template.py

def add_perf_lift(em, ws, m2_data, m1_data, info, perf_metrics, sample_prefices, wTitle = True):
    """  Add Performance Lift. """
    em.gap_number = 0
    title=" "
    if not wTitle:
        title = None

    info_data = m2_data[info]
    info_data[info[0]] = m2_data[info[0]] + " over " + m1_data[info[0]]
    info_data = info_data.set_index(info[:3])
    info_loc = em.write_dataframe(ws, df=info_data, retCellRange="value", skipby="col", title=title, titleformat="", index = True)

    i = 0
    while i < len(sample_prefices):

        title=sample_prefices[i].upper().strip("_")
        if not wTitle:
            title = None

        m2_perf = m2_data[[sample_prefices[i] + x for x in perf_metrics]]
        m2_perf.columns = [x.replace(sample_prefices[i], "") for x in m2_perf.columns]

        m1_perf = m1_data[[sample_prefices[i] + x for x in perf_metrics]]
        m1_perf.columns = [x.replace(sample_prefices[i], "") for x in m1_perf.columns]

        perf_lift = (m2_perf/m1_perf) - 1

        loc = em.write_dataframe(ws, df=perf_lift, retCellRange="value", skipby="col", title=title)
        em.set_cell_format(ws, cell_range=loc, cformat="NUM%.2")
        em.set_data_bar(ws, cell_range=loc)
        i += 1

    em.curr_col = info_loc[1]
    em.curr_row = loc[2] + 1
    return info_loc

get_seg_perf_comparison_report ¶

get_seg_perf_comparison_report(em, ws, m1_data, bad, sample_prefices, title='Performance Comparison', m2_data=None, perf_metrics=['ks', 'top10_cap', 'top20_cap', 'top40_cap', 'auc'], nbins=100)

Get Segment Perf Eval Report.

源代码位于： ExcelMaster/Template.py

def get_seg_perf_comparison_report(em, ws, m1_data, bad, sample_prefices, 
                                   title = "Performance Comparison",
                                   m2_data = None, 
                                   perf_metrics = ["ks", "top10_cap", "top20_cap", "top40_cap", "auc"], 
                                   nbins=100):
    """ Get Segment Perf Eval Report. """

    metric_cols = [sample + metric for sample in sample_prefices for metric in perf_metrics]

    m1_data = input_validation(m1_data)
    m1_data = convert_perc_str_to_float(m1_data, metric_cols)

    em.gap_number = 0
    em.merge_col(ws, ncols=3, text = title, cformat = "ORANGE_H4")
    em.gap_number = 2
    em.write_text_content(ws, input_text=f"(Gains in {nbins} bins is used.) \n \n")

    info = [f"{sample_prefices[0]}scr_name", "true_bad", "seg_name", "seg_info"]
    scr_info = ["num_of_total", f"num_of_{bad.lower()}", f"rate_of_{bad.lower()}", "mean_score"]
    fnl_loc = add_scr_info(em, ws, m1_data, info, scr_info, sample_prefices)

    if m2_data is not None:
        m2_data = input_validation(m2_data)
        m2_data = convert_perc_str_to_float(m2_data, metric_cols)
        fnl_loc = add_scr_info(em, ws, m2_data, info, scr_info, sample_prefices, False)

    em.curr_col = fnl_loc[0]
    em.curr_row = fnl_loc[1] + 3

    fnl_loc = add_perf_metrics(em, ws, m1_data, info, perf_metrics, sample_prefices)

    if m2_data is not None:
        fnl_loc = add_perf_metrics(em, ws, m2_data, info, perf_metrics, sample_prefices, False)

    em.curr_col = fnl_loc[0]
    em.curr_row = fnl_loc[1] + 3

    if m2_data is not None:
        info_loc = add_perf_lift(em, ws, m2_data, m1_data, info, perf_metrics, sample_prefices)
    return ws

get_means_chart_report ¶

get_means_chart_report(em, ws, means_rpt, by_class, varlist, class_name=None, stats_list=['N', 'NMISS', 'MIN', 'MEAN', 'MAX'], inc_miss_rate=True, varcol='variable', attr_info=None)

Plot Means Chart and Get Report.

源代码位于： ExcelMaster/Template.py

def get_means_chart_report(em, ws, means_rpt, by_class, varlist, class_name = None, 
                           stats_list = ['N', 'NMISS', 'MIN', 'MEAN', 'MAX'], inc_miss_rate = True,
                           varcol = "variable", attr_info = None):
    """ Plot Means Chart and Get Report. """
    from tqdm import tqdm

    means_rpt = input_validation(means_rpt)

    if class_name is None:
        class_name = by_class

    means_rpt.columns = [x.lower() for x in means_rpt.columns]
    varcol = varcol.lower()
    means_rpt = means_rpt[means_rpt[varcol].isin(varlist)]

    if inc_miss_rate:
        if "missing_rate" not in means_rpt.columns:
            means_rpt["missing_rate"] = means_rpt["nmiss"]/(means_rpt["n"] + means_rpt["nmiss"])

    # em.reset_curr_loc()
    em.gap_number = 2
    em.merge_col(ws, ncols = 5, text="Means Chart by Attribute")

    for var in tqdm(varlist):
        """ By Attribute. """

        if attr_info is not None:
            em.gap_number = 0
            attr_info_table = input_validation(attr_info)
            singe_attr_info = attr_info_table.query(f"{varcol} == '{var.upper()}'")
            em.write_dataframe(ws, df = singe_attr_info, 
                               index = False, 
                               title =f"Information for Attribute {var.upper()}", 
                               skipby="row", retCellRange="value")

        em.gap_number = 2
        example = means_rpt[means_rpt[varcol] == var]
        chart_loc = {}
        for stat in stats_list:
            """ By Statistic. """
            em.write_text_content(worksheet=ws, input_text=f"{stat} ({var}) [[ORANGE_H3]] \n")
            example = example.sort_values([by_class.lower()], ascending = True)

            if inc_miss_rate:
                loc = em.write_duo_chart(ws, 
                                         df = example, 
                                         x = by_class.lower(), 
                                         y1_list=[stat.lower()], 
                                         y2_list = ["missing_rate"],
                                         title = f"{stat.upper()} for Attribute {var}", 
                                         chart_size=(20, 10), 
                                         c1_type="column", 
                                         c2_type="line", 
                                         y2_num_format = "0.00%",
                                         major_gridlines=False, 
                                         xy_axes_name=(class_name, stat, "Missing Rate (%)"), 
                                         skipby="col", 
                                         y1_axis_range=None,
                                         y2_axis_range=None,
                                         retCellRange="value")
            else:
                loc = em.write_chart(ws, 
                                     df = example, 
                                     x = by_class.lower(), 
                                     y_list=[stat.lower()], 
                                     title = f"{stat.upper()} for Attribute {var}", 
                                     chart_size=(20, 10), 
                                     chart_type="column", 
                                     major_gridlines=False, 
                                     xy_axes_name=(class_name, stat), 
                                     skipby="col", 
                                     y_axis_range=None,
                                     retCellRange="value")

            chart_loc[stat] = loc

            em.curr_row = loc[0] - 1

        chart1_loc = chart_loc[stats_list[0]]
        em.curr_row = chart1_loc[2] + em.gap_number
        em.curr_col = chart1_loc[1]

    em.reset_curr_loc()
    return ws

get_grid_boxplot_report ¶

get_grid_boxplot_report(em, ws, perf_res, hparam_list, metric_list, fontsize=12, figsize=(30, 13), transp_bg=True, color_grp=(20, 1), colored_box=True)

Plot Boxplots of Grid Search Result for Hyperparams.

源代码位于： ExcelMaster/Template.py

def get_grid_boxplot_report(em, ws, perf_res, hparam_list, metric_list, fontsize = 12, figsize = (30, 13), transp_bg = True, color_grp = (20, 1), colored_box = True):
    """ Plot Boxplots of Grid Search Result for Hyperparams.  """

    em.gap_number = 2

    em.merge_col(ws, ncols = 5, text="Boxplot For Grid Search Result")

    chart_loc = {}
    for metric in metric_list:

        em.write_text_content(worksheet=ws, input_text=f"{metric} [[ORANGE_H3]] \n")

        for param in hparam_list:
            loc = em.write_boxplot(ws, 
                             df = perf_res[[param, metric]],
                             x = param,
                             y = metric,
                             y_percentage = True,
                             show_fig = False,
                             colored_box = True,
                             fontsize = fontsize,
                             figsize = figsize,
                             color_grp = color_grp,
                             title=  f"Boxplot for {string_proc(param)}",
                             transp_bg = transp_bg,
                             skipby = "col", 
                             retCellRange = "value")

            chart_loc[param] = loc

        chart1_loc = chart_loc[hparam_list[0]]
        em.curr_row = chart1_loc[2] + em.gap_number
        em.curr_col = chart1_loc[1]

    return ws

get_var_reduct_report ¶

get_var_reduct_report(em, ws, vr_perf, metric_cols, target_metrics=None, basic_info_text=None, nvars_col='nvars')

Generate Variable Reduction Excel Report.

源代码位于： ExcelMaster/Template.py

def get_var_reduct_report(em, ws, vr_perf, metric_cols, target_metrics = None, basic_info_text = None, nvars_col = "nvars"):
    """ Generate Variable Reduction Excel Report. """

    if target_metrics is None:
        target_metrics = metric_cols

    nvars_col = nvars_col.lower()

    vr_perf = input_validation(vr_perf)
    vr_perf = convert_perc_str_to_float(vr_perf, metric_cols)
    res = get_metrics_shift(vr_perf, [x for x in metric_cols if x in target_metrics])

    shift_cols = [y + "_shift" for y in metric_cols]

    all_metric_cols = metric_cols + shift_cols
    info_table = vr_perf[[x for x in vr_perf.columns if x not in all_metric_cols]]
    metrics_table = vr_perf[[x for x in vr_perf.columns if x in metric_cols]]
    shift_table = vr_perf[[x for x in vr_perf.columns if x in shift_cols]]

    em.write_text_content(ws, input_text="{#} Variable Reduction Report \n")

    if basic_info_text:
        em.write_text_content(ws, input_text=basic_info_text)

    em.gap_number = 0
    info_loc = em.write_dataframe(ws, df=info_table, title = "Model Information", header = True, index = False, retCellRange="value", skipby="col")
    perf_loc = em.write_dataframe(ws, df=metrics_table, title = "Variable Reduction Performance", header = True, index = False, retCellRange="value", skipby="col")
    shift_loc = em.write_dataframe(ws, df=shift_table, title = "Performance Shift", header = True, index = False, retCellRange="value", skipby="row")

    em.set_cell_format(ws, perf_loc, cformat="NUM%.2")
    em.set_cell_format(ws, shift_loc, cformat="NUM%.2")
    em.set_data_bar(ws, shift_loc)

    em.gap_number = 2
    chart_start_loc = (info_loc[2] + em.gap_number, info_loc[1])
    # print(chart_start_loc)
    em.reset_curr_loc(loc = chart_start_loc)

    i = 0
    chart_loc = []
    while i < len(target_metrics):

        metric = target_metrics[i]
        logger.info(metric)

        loc = em.write_duo_chart(worksheet = ws, 
                                 df = res, 
                                 y1_list = [metric], 
                                 y2_list = [metric + '_shift'], 
                                 x = nvars_col,
                                 c1_type = "line", 
                                 c2_type = "line", 
                                 y1_axis_range = None,
                                 y2_axis_range = None, 
                                 title = metric, 
                                 chart_size = (20, 10),
                                 xy_axes_name = ("Nvars", metric, metric + '_shift'),
                                 major_gridlines=False,
                                 retChart = False, 
                                 retCellRange = "value",
                                 skipby = "col",
                                 y1_num_format = "0.00%",
                                 y2_num_format = "0.00%")

        chart_loc.append(loc)
        # print(loc)

        if (i != 0) and (i % 2 == 1):
            reset_loc = (chart_loc[i][2] + em.gap_number, chart_loc[0][1])
            # print(reset_loc)
            em.reset_curr_loc(loc = reset_loc)

        i += 1
    # print(chart_loc)
    return ws

get_grid_search_report ¶

get_grid_search_report(em, ws, rs_perf, metric_cols, sample_prefices, basic_info_text=None, sortby='hd_auc')

Get Grid Search Report.

源代码位于： ExcelMaster/Template.py

def get_grid_search_report(em, ws, rs_perf, metric_cols, sample_prefices, basic_info_text = None, sortby = "hd_auc"):
    """ Get Grid Search Report. """
    rs_perf = input_validation(rs_perf)
    rs_perf = convert_perc_str_to_float(rs_perf, metric_cols)

    if sortby:
        rs_perf = rs_perf.sort_values([sortby], ascending = False)

    i = 1
    while i <= len(sample_prefices) - 1:
        rs_perf = compute_overfitting_shift(rs_perf, (sample_prefices[0], sample_prefices[i]))
        i += 1

    shift_cols = [x for x in rs_perf.columns if x.endswith("_shift")]

    all_metric_cols = metric_cols + shift_cols
    info_table = rs_perf[[x for x in rs_perf.columns if x not in all_metric_cols]]
    metrics_table = rs_perf[[x for x in rs_perf.columns if x in metric_cols]]
    shift_table = rs_perf[[x for x in rs_perf.columns if x in shift_cols]]

    em.write_text_content(ws, input_text="{#} Grid Search Report \n")

    if basic_info_text:
            em.write_text_content(ws, input_text=basic_info_text)

    em.gap_number = 0
    info_loc = em.write_dataframe(ws, df=info_table, title = "Model Information", header = True, index = False, retCellRange="value", skipby="col")
    perf_loc = em.write_dataframe(ws, df=metrics_table, title = "Grid Search Performance", header = True, index = False, retCellRange="value", skipby="col")
    shift_loc = em.write_dataframe(ws, df=shift_table, title = "Overfitting Performance Shift", header = True, index = False, retCellRange="value", skipby="row")

    em.set_cell_format(ws, perf_loc, cformat="NUM%.2")
    em.set_cell_format(ws, shift_loc, cformat="NUM%.2")
    em.set_data_bar(ws, shift_loc)

    return ws

工具函数 — `Utility`¶

Utility ¶

getStartDateofLatestWeek ¶

getStartDateofLatestWeek(retStr=True)

Date of first day of last week.

源代码位于： ExcelMaster/Utility.py

def getStartDateofLatestWeek(retStr=True):
    """ Date of first day of last week. """
    import datetime
    today = datetime.date.today()
    curr_wk = today.strftime("%W")
    d = f"{str(today.year)}-W{curr_wk}"
    r = datetime.datetime.strptime(d + '-1', "%Y-W%W-%w")
    res = (r + datetime.timedelta(days=-1))
    if retStr:
        return res.strftime("%Y-%m-%d")
    return res

getLastCompletedVintage ¶

getLastCompletedVintage(start_date=None, format='%Y-%m-%d', vintage=False)

Last Completed Vintage

源代码位于： ExcelMaster/Utility.py

def getLastCompletedVintage(start_date = None, format="%Y-%m-%d", vintage=False):
    """ Last Completed Vintage """
    import datetime

    todayDate = datetime.date.today()

    if start_date is not None:
        todayDate = datetime.datetime.strptime(start_date, format).date()    

    lastM = todayDate.replace(day=1) - datetime.timedelta(days=1)
    if vintage:
        return int(lastM.strftime(format)[0:7].replace("-",""))
    return lastM.strftime(format)

vin2quar ¶

vin2quar(strDate)

String Vintage to Quarter (if compeleted month).

源代码位于： ExcelMaster/Utility.py

def vin2quar(strDate):
    """ String Vintage to Quarter (if compeleted month). """
    year = int(strDate[:4])
    month = int(strDate[4:6])
    completed_q = [3, 6, 9, 12]
    if month in completed_q:
        q = (month-1)//3 + 1
        return str(year) + "Q" + str(q)
    return strDate

list_files ¶

list_files(location, pattern)

List all files.

源代码位于： ExcelMaster/Utility.py

def list_files(location, pattern):
    """ List all files. """
    import re
    res = []
    for root, dirs, files in os.walk(location):
        for file in files:
            if re.search(pattern, file):
                 res.append(file)
    return res

getCurrentDateTime ¶

getCurrentDateTime(fmt='%Y%m%d%H%M%S')

Get Current DateTime

源代码位于： ExcelMaster/Utility.py

def getCurrentDateTime(fmt = "%Y%m%d%H%M%S"):
    """ Get Current DateTime"""
    import datetime
    return datetime.datetime.now().strftime(fmt)

input_table_proc ¶

input_table_proc(tbl)

Process Input Table.

源代码位于： ExcelMaster/Utility.py

def input_table_proc(tbl):
    """ Process Input Table. """
    tbl.columns = [x.lower() for x in tbl.columns]
    return tbl

get_file_extension ¶

get_file_extension(input_path)

Get File Extentsion for a given file Path.

源代码位于： ExcelMaster/Utility.py

def get_file_extension(input_path):
    """ Get File Extentsion for a given file Path. """
    return os.path.splitext(input_path)[1]

input_validation ¶

input_validation(x, sep=',')

Input Validation.

源代码位于： ExcelMaster/Utility.py

def input_validation(x, sep=","):
    """ Input Validation. """
    import os
    if isinstance(x, str):
        if get_file_extension(x) == ".sas7bdat":
            res = pd.read_sas(x, encoding="latin-1")
        else:
            res = pd.read_csv(x, sep = sep)
        res = input_table_proc(res)
        return res
    elif isinstance(x, pd.DataFrame):
        return input_table_proc(x)
    else:
        raise AttributeError("Only Support csv/sas7bdat Path or Panda DataFrame as Input!!!")

val_input_condition ¶

val_input_condition(target, condition=('>', 20))

Condition Tuple Validation.

源代码位于： ExcelMaster/Utility.py

def val_input_condition(target, condition = (">", 20)):
    """ Condition Tuple Validation. """
    if isinstance(condition, str):
        return (target == condition)

    else:
        operator = condition[0].strip().lower()
        value = float(condition[1])

        if operator == '>' or operator == 'gt':
            return (target > value)
        elif operator == '<' or operator == 'lt':
            return (target < value)
        elif operator == '=' or operator == 'eq':
            return (target == value)
        elif operator == '>=' or operator == 'gte':
            return (target >= value)
        elif operator == '<=' or operator == 'lte':
            return (target <= value)
        elif operator == '=' or operator == 'eq':
            return (target == value)

tanspose_dataframe ¶

tanspose_dataframe(df, index_col)

Transpose Pandas DataFrame.

源代码位于： ExcelMaster/Utility.py

def tanspose_dataframe(df, index_col):
    """ Transpose Pandas DataFrame. """
    df = df.set_index(index_col).T.reset_index()
    return df

convert_perc_str_to_float ¶

convert_perc_str_to_float(df, cols)

Percentage to Float.

源代码位于： ExcelMaster/Utility.py

def convert_perc_str_to_float(df, cols):
    """ Percentage to Float. """
    for col in cols:
        if str(df[col].dtypes) == 'object':
            df[col] = df[col].str.rstrip('%').astype('float') / 100
    return df

color_hex2rgb ¶

color_hex2rgb(hex_code)

Convert Color Hex Code to RGB Tuple.

源代码位于： ExcelMaster/Utility.py

def color_hex2rgb(hex_code):
    """ Convert Color Hex Code to RGB Tuple. """
    hex_code = hex_code.lower()
    h = hex_code.lstrip('#')
    return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))

get_color_set ¶

get_color_set(n, start_num=0, step=1, retName=False, lookupName=None)

Return a set of color code without replacement.

源代码位于： ExcelMaster/Utility.py

def get_color_set(n, start_num = 0, step = 1, retName = False, lookupName = None):
    """ Return a set of color code without replacement. """
    import matplotlib.colors as mcolors
    import re

    colors = list(mcolors.XKCD_COLORS.items())
    color_set = {}

    for i in range(start_num, start_num + n * step, step):
        name = colors[i][0].replace("xkcd:", "")
        color = colors[i][1]
        color_set[name] = color

    if lookupName:
        color_set = {}
        i = 0
        for name, color in colors:
            if len(color_set.items()) == n:
                break
            if re.search(lookupName, name):
                color_set[name.replace("xkcd:", "")] = color
            i += 1

    if retName:
        return color_set
    return list(color_set.values())

string_proc ¶

string_proc(x)

Process a given String.

源代码位于： ExcelMaster/Utility.py

def string_proc(x):
    """ Process a given String. """
    x_list = x.split("_")
    x_list = [x.strip().capitalize() for x in x_list]
    x = " ".join(x_list)
    return x

convert_to_boxplot_data ¶

convert_to_boxplot_data(df, x, y, y_percentage=False)

Convert dataframe to boxplot data.

源代码位于： ExcelMaster/Utility.py

def convert_to_boxplot_data(df, x, y, y_percentage = False):
    """ Convert dataframe to boxplot data. """
    x_unique_value = df[x].sort_values().unique().tolist()

    box_plot_data = {}
    for v in x_unique_value:
        box_plot_data[v] = [num * 100 if y_percentage else num for num in df[df[x] == v][y].tolist()]
    return box_plot_data

color_input_validation ¶

color_input_validation(color_grp, val_n)

Perform Color Input Validation.

源代码位于： ExcelMaster/Utility.py

def color_input_validation(color_grp, val_n):
    """ Perform Color Input Validation. """
    if isinstance(color_grp, tuple) and len(color_grp) == 2:
        # Customize colors
        cols = get_color_set(val_n, color_grp[0], color_grp[1], False)
        colors = cols
    elif isinstance(color_grp, str):
        colors = [color_grp] * val_n
    elif (isinstance(color_grp, list)) and (all([isinstance(x, str) for x in color_grp])) and (len(color_grp) == val_n):
        colors = color_grp
    else:
        raise ValueError("Please give valid color_grp: tuple of two numbers, list of color code or a single color code.")
    return colors

get_metric_shift ¶

get_metric_shift(data, metric_name, nvars_col='nvars')

Calculate Metric Shift for Variable Reduction.

源代码位于： ExcelMaster/Utility.py

def get_metric_shift(data, metric_name, nvars_col = "nvars"):
    """ Calculate Metric Shift for Variable Reduction. """
    metric_lift = (data[metric_name] - data[metric_name].shift(1))
    nvars_reduced = (data[nvars_col] - data[nvars_col].shift(1))
    return (metric_lift / nvars_reduced).fillna(0)

get_metrics_shift ¶

get_metrics_shift(data, metric_cols)

Get Shift for List of Metrics.

源代码位于： ExcelMaster/Utility.py

def get_metrics_shift(data, metric_cols):
    """ Get Shift for List of Metrics."""
    for metric in metric_cols:
        if metric.startswith(tuple(metric_cols)):
            data[metric+"_shift"] = get_metric_shift(data, metric)
    return data

compute_overfitting_shift ¶

compute_overfitting_shift(data, sample_prefix)

Calculate Overfitting Performance Shift.

源代码位于： ExcelMaster/Utility.py

def compute_overfitting_shift(data, sample_prefix):
    """ Calculate Overfitting Performance Shift. """
    b_metrics = [x for x in data.columns if x.startswith(sample_prefix[0])]
    o_metrics = [x for x in data.columns if x.startswith(sample_prefix[1])]

    if len(b_metrics) == len(o_metrics):
        for b_metric, o_metric in zip(b_metrics, o_metrics):
            data[o_metric+"_shift"] = data[o_metric].div(data[b_metric]) - 1
        return data

    raise ValueError("The lengths of metrics between two samples are the the same in the given dataset!")

proc_psi_raw_report ¶

proc_psi_raw_report(psi_raw_table, psi_title, keep_list=None, varname='variable', upper=True)

Processing Raw PSI Report generated from Takecopter.

源代码位于： ExcelMaster/Utility.py

def proc_psi_raw_report(psi_raw_table, psi_title, keep_list = None, varname = "variable", upper=True):
    """ Processing Raw PSI Report generated from Takecopter. """
    psi_table = input_validation(psi_raw_table)
    psi_table = psi_table.rename(columns={"var_for_psi":varname})
    psi_table = psi_table.set_index(varname)
    if upper:
        psi_table.columns = [x.upper() for x in psi_table.columns]
    psi_table.columns = [[psi_title]*len(psi_table.columns),psi_table.columns]
    if keep_list:
        psi_table = psi_table[[(psi_title, x) for x in keep_list]]
    return psi_table

get_mean_risk ¶

get_mean_risk(bivar_single_attr, value_range_col=['min_indep', 'max_indep'], dep_col='dep')

get average risk

源代码位于： ExcelMaster/Utility.py

def get_mean_risk(bivar_single_attr, value_range_col = ['min_indep', 'max_indep'], dep_col = "dep"):
    """get average risk"""
    mean_wo_nan = bivar_single_attr.dropna(how = "all", subset=value_range_col)[dep_col].mean()
    mean_w_na = bivar_single_attr[dep_col].mean()
    bivar_single_attr["mean"] = mean_w_na
    bivar_single_attr["mean_no_nan"] = mean_wo_nan
    return bivar_single_attr

ExcelMaster¶

格式定义 — ExcelFormatTool¶

ExcelFormatTool ¶

ExcelFormat ¶

add_new_format ¶

核心引擎 — ExcelMaster¶

ExcelMaster ¶

ExcelWorkbook ¶

to_cell_range_text ¶

cell_range_to_loc ¶

colletter_to_textloc ¶

set_color_scale ¶

set_data_bar ¶

set_cell_format ¶

set_cell_format_rbyr ¶

remove_tmp_img ¶

close_workbook ¶

ExcelMaster ¶

add_worksheet ¶

reset_curr_loc ¶

set_cell_size ¶

get_curr_loc ¶

set_border_line ¶

merge_col ¶

write_dataframe ¶

insert_image ¶

write_chart ¶

write_combined_chart ¶

write_duo_chart ¶

write_text_by_dict ¶

write_text_content ¶

plot_boxplot staticmethod ¶

write_boxplot ¶

getStartDateofLatestWeek ¶

getLastCompletedVintage ¶

vin2quar ¶

list_files ¶

getCurrentDateTime ¶

input_table_proc ¶

get_file_extension ¶

input_validation ¶

val_input_condition ¶

tanspose_dataframe ¶

convert_perc_str_to_float ¶

color_hex2rgb ¶

get_color_set ¶

string_proc ¶

convert_to_boxplot_data ¶

color_input_validation ¶

get_metric_shift ¶

get_metrics_shift ¶

compute_overfitting_shift ¶

proc_psi_raw_report ¶

get_mean_risk ¶

报告模板 — Template¶

Template ¶

getStartDateofLatestWeek ¶

getLastCompletedVintage ¶

vin2quar ¶

list_files ¶

getCurrentDateTime ¶

input_table_proc ¶

get_file_extension ¶

input_validation ¶

val_input_condition ¶

tanspose_dataframe ¶

convert_perc_str_to_float ¶

color_hex2rgb ¶

get_color_set ¶

string_proc ¶

convert_to_boxplot_data ¶

color_input_validation ¶

get_metric_shift ¶

get_metrics_shift ¶

compute_overfitting_shift ¶

proc_psi_raw_report ¶

get_mean_risk ¶

get_pva_report ¶

get_bivar_report ¶

add_scr_info ¶

格式定义 — `ExcelFormatTool`¶

核心引擎 — `ExcelMaster`¶

plot_boxplot `staticmethod` ¶

报告模板 — `Template`¶

工具函数 — `Utility`¶