如何在 for 循环内的一个窗口中制作 Pandas 数据框中列的子图

2024-04-08

* 请帮忙,这非常重要:为什么无法通过在 for 循环内使用 HeatMap 来获取 Pandas 数据框列的箱线图?

我正在尝试在迭代期间在 for 循环内部的 pandas 数据框中创建列的子图,因为我为每个周期绘制结果每 480 个值在一个窗口中并排显示属于 A、B、C 的所有 3 个子图。我只找到一个答案here https://stackoverflow.com/questions/27446455/pandas-subplots-in-a-loop恐怕不是我的情况! @euri10 通过使用回答flat https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.flat.html#numpy.ndarray.flat.

我的脚本如下:

# Import and call the needed libraries
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt


'''
Take a list and create the formatted matrix
'''
def mkdf(ListOf480Numbers):
    normalMatrix = np.array_split(ListOf480Numbers,8)     #Take a list and create 8 array (Sections)
    fixMatrix = []
    for i in range(8):
        lines = np.array_split(normalMatrix[i],6)         #Split each section in lines (each line contains 10 cells from 0-9)
        newMatrix = [0,0,0,0,0,0]                         #Empty array to contain reordered lines
        for j in (1,3,5):
            newMatrix[j] = lines[j]                       #lines 1,3,5 remain equal
        for j in (0,2,4):
            newMatrix[j] = lines[j][::-1]                 #lines 2,4,6 are inverted
        fixMatrix.append(newMatrix)                 #After last update of format of table inverted (bottom-up zig-zag)
    return fixMatrix

'''
Print the matrix with the required format
'''
def print_df(fixMatrix):
    values = []
    for i in range(6):
        values.append([*fixMatrix[4][i], *fixMatrix[7][i]])  #lines form section 6 and 7 are side by side
    for i in range(6):
        values.append([*fixMatrix[5][i], *fixMatrix[6][i]])  #lines form section 4 and 5 are side by side
    for i in range(6):
        values.append([*fixMatrix[1][i], *fixMatrix[2][i]])  #lines form section 2 and 3 are side by side
    for i in range(6):
        values.append([*fixMatrix[0][i], *fixMatrix[3][i]])  #lines form section 0 and 1 are side by side
    df = pd.DataFrame(values)
    return (df)

'''
Normalizing Formula
'''

def normalize(value, min_value, max_value, min_norm, max_norm):
    new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
    return new_value

'''
Split data in three different lists A, B and C
'''

dft = pd.read_csv('D:\me4.TXT', header=None)
id_set = dft[dft.index % 4 == 0].astype('int').values
A = dft[dft.index % 4 == 1].values
B = dft[dft.index % 4 == 2].values
C = dft[dft.index % 4 == 3].values
data = {'A': A[:,0], 'B': B[:,0], 'C': C[:,0]}
#df contains all the data
df = pd.DataFrame(data, columns=['A','B','C'], index = id_set[:,0])  


'''
Data generation phase

'''

#next iteration create all plots, change the number of cycles
cycles = int(len(df)/480)
print(cycles)
for i in df:
    try:
        os.mkdir(i)
    except:
        pass
    min_val = df[i].min()
    min_nor = -1
    max_val = df[i].max()
    max_nor = 1
    for cycle in range(1):             #iterate thriugh all cycles range(1) by ====> range(int(len(df)/480))
        count =  '{:04}'.format(cycle)
        j = cycle * 480
        ordered_data = mkdf(df.iloc[j:j+480][i])
        csv = print_df(ordered_data)
        #Print .csv files contains matrix of each parameters by name of cycles respectively
        csv.to_csv(f'{i}/{i}{count}.csv', header=None, index=None)            
        if 'C' in i:
            min_nor = -40
            max_nor = 150
            #Applying normalization for C between [-40,+150]
            new_value3 = normalize(df['C'].iloc[j:j+480][i].values, min_val, max_val, -40, 150)
            n_cbar_kws = {"ticks":[-40,150,-20,0,25,50,75,100,125]}
            df3 = print_df(mkdf(new_value3))
        else:
            #Applying normalizayion for A,B between    [-1,+1]
            new_value1 = normalize(df['A'].iloc[j:j+480][i].values, min_val, max_val, -1, 1)
            new_value2 = normalize(df['B'].iloc[j:j+480][i].values, min_val, max_val, -1, 1)
            n_cbar_kws = {"ticks":[-1.0,-0.75,-0.50,-0.25,0.00,0.25,0.50,0.75,1.0]}
        df1 = print_df(mkdf(new_value1))
        df2 = print_df(mkdf(new_value2))    

        #Plotting parameters by using HeatMap
        plt.figure()
        sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm', cbar_kws=n_cbar_kws)                             
        plt.title(i, fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')
        #Print .PNG images contains HeatMap plots of each parameters by name of cycles respectively
        plt.savefig(f'{i}/{i}{count}.png')  



        #plotting all columns ['A','B','C'] in-one-window side by side


        fig, axes = plt.subplots(nrows=1, ncols=3 , figsize=(20,10))

        plt.subplot(131)
        sns.heatmap(df1, vmin=-1, vmax=1, cmap ="coolwarm", linewidths=.75 , linecolor='black', cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
        fig.axes[-1].set_ylabel('[MPa]', size=20) #cbar_kws={'label': 'Celsius'}
        plt.title('A', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')

        plt.subplot(132)
        sns.heatmap(df2, vmin=-1, vmax=1, cmap ="coolwarm", cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
        fig.axes[-1].set_ylabel('[Mpa]', size=20) #cbar_kws={'label': 'Celsius'}
        #sns.despine(left=True)
        plt.title('B', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')

        plt.subplot(133)
        sns.heatmap(df3, vmin=-40, vmax=150, cmap ="coolwarm" , cbar=True , cbar_kws={"ticks":[-40,150,-20,0,25,50,75,100,125]}) 
        fig.axes[-1].set_ylabel('[°C]', size=20) #cbar_kws={'label': 'Celsius'}
        #sns.despine(left=True)
        plt.title('C', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')


        plt.suptitle(f'Analysis of data in cycle Nr.: {count}', color='yellow', backgroundcolor='black', fontsize=48, fontweight='bold')
        plt.subplots_adjust(top=0.7, bottom=0.3, left=0.05, right=0.95, hspace=0.2, wspace=0.2)
        #plt.subplot_tool()
        plt.savefig(f'{i}/{i}{i}{count}.png') 
        plt.show()

到目前为止,我无法获得正确的输出,因为在每个周期中,它以不同的时间间隔打印每个图 3 次,例如。它打印'A'离开然后再次打印'A'以的名义'B' and 'C'在一个窗口的中间和右侧。再次打印'B'3次而不是1次,放在中间,最后打印'C'3次而不是一次,放在右侧,放在中间和左侧!

Target是捕获所有 3 列 A、B 和 C 的子图单一窗口 for 每个周期(每 480 个值乘 480 个值)在主 for 循环中!

第一个周期:0000 -----> A,B,C 的子图 ----> 将其存储为 0000.png

第二个循环:0001 -----> A、B、C 的子图 ----> 将其存储为 0001.png ...

Problem是使用df在 for 循环内部,它传递 A 或 B 或 C 的值3 times虽然它应该传递属于每一列的值once我分别提供了一张不成功输出的图片here https://i.stack.imgur.com/4gF44.jpg这样你就可以清楚地看到问题出在哪里

我想要的输出如下:

我还提供了 3 个周期的数据集示例文本文件:dataset https://drive.google.com/file/d/1dQD_CARJ00-olkK_h7Sl8Q2lORX-d6QF/view?usp=sharing


因此,在查看您的代码和您的要求之后,我想我知道问题是什么。 你的for循环的顺序错误。您需要为每个周期创建一个新图形,其中包含每个“A”、“B”和“C”作为子图。

这意味着您的outer循环应该遍历循环,然后你的inner循环i,而你的缩进和循环顺序让你试图绘制所有'A','B','C'子图已经在你的第一次循环中i (i='A', cycle=1)而不是在第一个循环的第一个循环之后,所有i (i='A','B','C', cycle=1).

这也是您遇到问题的原因(正如您在评论中提到的)这个答案 https://stackoverflow.com/a/54305388/10944175)未定义 df3。 df3 的定义在 if 块中检查 if'C' in i,在第一次循环时,不满足此条件,因此未定义 df3,但您仍在尝试绘制它!

此外,您再次遇到与 NaN/inf 值相同的问题。

重新排列for循环和缩进并清理 NaN/inf 值会得到以下代码:

#...
#df contains all the data
df = pd.DataFrame(data, columns=['A','B','C'], index = id_set[:,0])  
df = df.replace(np.inf, np.nan)
df = df.fillna(0)

'''
Data generation phase

'''

#next iteration create all plots, change the number of cycles
cycles = int(len(df)/480)
print(cycles)
for cycle in range(cycles):             #iterate thriugh all cycles range(1) by ====> range(int(len(df)/480))
    count =  '{:04}'.format(cycle)
    j = cycle * 480
    for i in df:
        try:
            os.mkdir(i)
        except:
            pass

        min_val = df[i].min()
        min_nor = -1
        max_val = df[i].max()
        max_nor = 1

        ordered_data = mkdf(df.iloc[j:j+480][i])
        csv = print_df(ordered_data)
        #Print .csv files contains matrix of each parameters by name of cycles respectively
        csv.to_csv(f'{i}/{i}{count}.csv', header=None, index=None)            
        if 'C' in i:
            min_nor = -40
            max_nor = 150
            #Applying normalization for C between [-40,+150]
            new_value3 = normalize(df['C'].iloc[j:j+480], min_val, max_val, -40, 150)
            n_cbar_kws = {"ticks":[-40,150,-20,0,25,50,75,100,125]}
            df3 = print_df(mkdf(new_value3))
        else:
            #Applying normalizayion for A,B between    [-1,+1]
            new_value1 = normalize(df['A'].iloc[j:j+480], min_val, max_val, -1, 1)
            new_value2 = normalize(df['B'].iloc[j:j+480], min_val, max_val, -1, 1)
            n_cbar_kws = {"ticks":[-1.0,-0.75,-0.50,-0.25,0.00,0.25,0.50,0.75,1.0]}
            df1 = print_df(mkdf(new_value1))
            df2 = print_df(mkdf(new_value2))    

    #        #Plotting parameters by using HeatMap
    #        plt.figure()
    #        sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm', cbar_kws=n_cbar_kws)                             
    #        plt.title(i, fontsize=12, color='black', loc='left', style='italic')
    #        plt.axis('off')
    #        #Print .PNG images contains HeatMap plots of each parameters by name of cycles respectively
    #        plt.savefig(f'{i}/{i}{count}.png')  


    #plotting all columns ['A','B','C'] in-one-window side by side
    fig, axes = plt.subplots(nrows=1, ncols=3 , figsize=(20,10))

    plt.subplot(131)
    sns.heatmap(df1, vmin=-1, vmax=1, cmap ="coolwarm", linewidths=.75 , linecolor='black', cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
    fig.axes[-1].set_ylabel('[MPa]', size=20) #cbar_kws={'label': 'Celsius'}
    plt.title('A', fontsize=12, color='black', loc='left', style='italic')
    plt.axis('off')

    plt.subplot(132)
    sns.heatmap(df2, vmin=-1, vmax=1, cmap ="coolwarm", cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
    fig.axes[-1].set_ylabel('[Mpa]', size=20) #cbar_kws={'label': 'Celsius'}
    #sns.despine(left=True)
    plt.title('B', fontsize=12, color='black', loc='left', style='italic')
    plt.axis('off')

    plt.subplot(133)
    sns.heatmap(df3, vmin=-40, vmax=150, cmap ="coolwarm" , cbar=True , cbar_kws={"ticks":[-40,150,-20,0,25,50,75,100,125]}) 
    fig.axes[-1].set_ylabel('[°C]', size=20) #cbar_kws={'label': 'Celsius'}
    #sns.despine(left=True)
    plt.title('C', fontsize=12, color='black', loc='left', style='italic')
    plt.axis('off')


    plt.suptitle(f'Analysis of data in cycle Nr.: {count}', color='yellow', backgroundcolor='black', fontsize=48, fontweight='bold')
    plt.subplots_adjust(top=0.7, bottom=0.3, left=0.05, right=0.95, hspace=0.2, wspace=0.2)
    #plt.subplot_tool()
    plt.savefig(f'{i}/{i}{i}{count}.png') 
    plt.show()

这将为您提供以下三张图像,作为三个单独的图形以及您提供的数据:

Figure 1 https://i.stack.imgur.com/GCwXP.png, Figure 2 https://i.stack.imgur.com/R9s6r.png, Figure 3 https://i.stack.imgur.com/bIkgx.png

一般来说,你的代码相当混乱。我明白了,如果您是编程新手并且只想分析数据,您可以做任何有效的事情,无论它是否漂亮。

但是,我认为凌乱的代码意味着您无法正确查看脚本的底层逻辑,这就是您遇到此问题的原因。

如果您再次遇到类似的问题,我建议您编写一些包含所有循环的“伪代码”,并尝试考虑您在每个循环中想要完成的任务。

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

如何在 for 循环内的一个窗口中制作 Pandas 数据框中列的子图 的相关文章

随机推荐