#!/usr/bin/env python3

import pandas as pd
import matplotlib.pyplot as plt
import sys
import os

def extract_hops_from_filename(filename):
    # Extract the number of hops from the filename (assuming the filename contains the number of hops).
    # Modify this based on your specific filename format. Here we assume the filename is in the format
    # like "file_5hops.csv", where the number of hops is before "hops".
    base_name = os.path.basename(filename)
    hops = ''.join([c for c in base_name if c.isdigit()])  # Extract digits from the filename
    return hops if hops else 'N/A'  # If no digits found, return 'N/A'

def generate_combined_box_plot(csv_files):
    all_data = []  # List to store data from all files
    labels = []    # Labels for the box plots (IF1, IF2, etc.)
    hops_label = [] # Label for number of hops (from filename)

    for i, csv_file in enumerate(csv_files):
        # Read the CSV file
        df = pd.read_csv(csv_file, header=None)

        # Filter out rows where the last two columns have invalid data (non-numeric)
        df = df.apply(pd.to_numeric, errors='coerce', axis=1)

        # Drop rows with NaN in the last two columns
        df = df.dropna(subset=[df.columns[-2], df.columns[-1]])
        # df = df.dropna(subset=[df.columns[-4], df.columns[-3]])

        # Extract the last two columns
        data1 = df.iloc[:, -2]  # second last column
        data2 = df.iloc[:, -1]  # last column
        
        # # Extract the last two columns
        # data1 = df.iloc[:, -4]  # second last column
        # data2 = df.iloc[:, -3]  # last column

        # Append the data to the combined list
        all_data.append(data1)
        all_data.append(data2)
        
        # print(all_data)

        # For each file, we label the two box plots as "IF1", "IF2", etc., based on the file number
        labels.append(f'IF 1')  # IF1 for the first column of the i-th file
        labels.append(f'IF 2')  # IF2 for the second column of the i-th file

        # Extract the number of hops from the filename
        hops = extract_hops_from_filename(csv_file)
        # print(int(hops))
        if int(hops) < 2:
            hops_label.append(f'{hops} Hop')
        else:
            hops_label.append(f'{hops} Hops')

    # Create a figure and axis
    plt.figure(figsize=(6, 5))

    # Create a combined box plot for all datasets
    plt.boxplot(all_data, meanline=True, showmeans=True, labels=labels, widths=0.3)
    
    plt.grid(axis='y')
    
    # Set plot title and labels
    plt.title('')
    plt.ylabel('Throughput (MBit/s)')
    
    # Set a consistent height for the hop labels (fixed below the plot)
    min_y_value = min(min(data) for data in all_data) -14 # Fixed vertical position for hop labels

    # Add the hops label beneath the plot
    for i, hop in enumerate(hops_label):
        # Text is placed under the appropriate pair of box plots (one for IF1 and IF2)
        # plt.text(2 * i + 1.5, min(min(all_data[2*i]), min(all_data[2*i+1])) - 5, hop, ha='center', fontsize=12)
        plt.text(2 * i + 1.5, min_y_value, hop, ha='center', fontsize=12)

    # Create the output filename (combine first file's name with "_combined.pdf")
    output_file = os.path.splitext(csv_files[0])[0] + '_combined.pdf'

    # Save the plot to a PDF file and crop it using bbox_inches='tight'
    plt.savefig(output_file, format='pdf', bbox_inches='tight')

    print(f"Combined box plot saved and cropped as {output_file}")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: ./script.py <csv_file1> <csv_file2> ...")
    else:
        csv_files = sys.argv[1:]
        generate_combined_box_plot(csv_files)