yolov7/tools/merge_results.py

import os
import pandas as pd
from argparse import ArgumentParser
import yaml
import warnings
warnings.warn = lambda *args,**kwargs: None

def process_class_stats(file_path):
    columns = ['class_name', 'num_files', 'num_objects', 'precision', 'recall', 'map50', 'map']

    # Read the text file into a pandas DataFrame
    # df = pd.read_csv(file_path, delim_whitespace=True)
    warnings.simplefilter# / warnings.catch_warnings
    df = pd.read_csv(file_path, delim_whitespace=True, names=columns, header=None)

    # Find the index where the last repetition of 'all' starts
    last_all_index = df[df['class_name'] == 'all'].index[-1]

    # Slice the DataFrame from the last 'all' row downward
    sliced_df = df.iloc[last_all_index:]

    # Create a new DataFrame with num_objects and renamed map50 columns
    result_df = sliced_df[['class_name', 'num_objects', 'map50']].copy()
    result_df = result_df.set_index('class_name')
    # Rename the 'map50' column to 'class_name_map50' for each class_name
    # result_df['class_name_map50'] = result_df['class_name'] + '_map50'
    result_df = result_df[['map50']].T

    # Rename the columns by appending '_map50'
    result_df.columns = [f"{col}_map50" for col in result_df.columns]
    # Select only the required columns
    # result_df = result_df[['num_objects', 'class_name_map50']]

    # Write the result to a CSV file
    # result_df.to_csv(output_csv, index=False)

    return result_df
def main(args: list = None):
    parser = ArgumentParser()
    parser.add_argument('--path', type=str, default='/home/hanoch/projects/tir_od/runs/train', metavar='PATH',
                        help="if given, all output of the training will be in this folder. "
                             "The exception is the tensorboard logs.")

    parser.add_argument('--task', default='train', help='train, val, test, speed or study')

    args = parser.parse_args(args)
    if 0:
        path = '/hdd/hanoch/data/objects-data-bbox-20191106-simple-sharded-part/tile_data/test_eileen_best_qual/csv'
        filenames = [os.path.join(path, x) for x in os.listdir(path)
                     if x.endswith('csv')]

        df_acm = pd.DataFrame()
        for file in filenames:
            df = pd.read_csv(file, index_col=False)
            file_patt = df.full_file_name[0].split('/')[-1].split('.')[0].split('_')[1:]
            df['file_name'] = file_patt[0] + '_' +  "_".join(df.full_file_name[0].split('/')[-1].split('.')[0].split('_')[1:])
            df['val'] = 0
            df_acm = df_acm.append((df))

        cols = df_acm.columns.to_list()
        cols2 = [cols[-2]] + cols[2:-2] + [cols[-1]]
        cols3 = cols2[:-3] + cols2[-2:]
        df_acm = df_acm[cols3]
        df_acm.to_csv(os.path.join(path, 'merged.csv'), index=False)

    else:
        path = args.path
        path_result = '/home/hanoch/projects/tir_od'
        results_columns = ['Epoch', 'gpu_mem', 'box_loss', 'obj_loss', 'cls_loss', 'total_loss', 'labels', 'img_size',
                           'P', 'R', 'mAP@.5', 'mAP@.5:.95', 'val_box_loss', 'val_obj_loss', 'val_cls_loss']

        # from pathlib import Path
        # Path(os.path.join(path, 'merged')).mkdir(parents=True, exist_ok=True)
        #
        # filenames = [os.path.join(path, x) for x in os.listdir(path)
        #              if x.endswith('csv')]
        #
        # df_acm = pd.DataFrame()
        # for file in filenames:
        #     df = pd.read_csv(file, index_col=False)
        #     if 1:
        #         df.columns = df.iloc[0]
        #         df = df[1:2]
        #     print(file)
        #     df_acm = df_acm.append((df))
        #
        # # df_acm = df_acm.reindex(sorted(df_acm.columns), axis=1)
        # df_acm.to_csv(os.path.join(path, 'merged', 'merged.csv'), index=False)
        # List to hold the data
        data = []
        root_dir = path
        # Iterate through all the subfolders
        for subdir, dirs, files in os.walk(root_dir):
            # Check if 'results.txt' and 'hyp.yaml' exist in the current subdir
            results_path = os.path.join(subdir, 'results.txt')
            hyp_path = os.path.join(subdir, 'hyp.yaml')
            opt_path = os.path.join(subdir, 'opt.yaml')
            per_class_results = os.path.join(subdir, 'class_stats.txt')


            if os.path.exists(results_path) and os.path.exists(hyp_path) and os.path.exists(opt_path):
                # Get the last line from 'results.txt'
                with open(results_path, 'r') as results_file:
                    last_line = results_file.readlines()[-1].strip()

                # Split the last line into the corresponding fields
                results_values = last_line.split()

                # Ensure that the last line contains the expected number of fields
                if len(results_values) == len(results_columns):
                    results_data = dict(zip(results_columns, results_values))
                else:
                    print(f"Warning: Unexpected format in {results_path}, skipping.")
                    continue

                # Load the 'hyp.yaml' file
                with open(hyp_path, 'r') as hyp_file:
                    hyp_data = yaml.safe_load(hyp_file)

                with open(opt_path, 'r') as opt_file:
                    opt_data = yaml.safe_load(opt_file)

                df_per_class_results = pd.DataFrame()
                if os.path.exists(per_class_results):
                    df_per_class_results = process_class_stats(per_class_results)

                # Add the result and the 'hyp.yaml' content into the data list
                row = {
                    'subdir': subdir,
                }
                # Update the row with the parsed results.txt values
                row.update(results_data)
                # Update the row with the hyperparameters from the 'hyp.yaml'
                row.update(hyp_data)

                row.update(opt_data)
                if not df_per_class_results.empty:
                    row.update(df_per_class_results.to_dict(orient='list'))

                data.append(row)

        # Convert the list of dictionaries to a pandas DataFrame
        df = pd.DataFrame(data)

        # Save the DataFrame to a CSV file
        output_csv = 'runs_' + str(args.task) + '_summary.csv'
        # df.to_csv(output_csv, index=False)
        df.to_csv(os.path.join(path_result, output_csv), index=False)

        print(f"Data successfully written to {os.path.join(path_result, output_csv)}")

if __name__ == '__main__':
    main()