from nas_201_api import NASBench201API as API
import re
import pandas as pd
import json
import numpy as np
import argparse

api = API('./NAS-Bench-201-v1_1-096897.pth')

parser = argparse.ArgumentParser(description='Process some integers.')

parser.add_argument('--file_path', type=str, default='211035.txt',)
args = parser.parse_args()

def process_graph_data(text):
    # Split the input text into sections for each graph
    graph_sections = text.strip().split('nodes:')
    
    # Prepare lists to store data
    nodes_list = []
    edges_list = []
    results_list = []
    
    for section in graph_sections[1:]:
        # Extract nodes
        nodes_section = section.split('edges:')[0]
        nodes_match = re.search(r'(tensor\(\d+\) ?)+', section)
        if nodes_match:
            nodes = re.findall(r'tensor\((\d+)\)', nodes_match.group(0))
            nodes_list.append(nodes)
        
        # Extract edges
        edge_section = section.split('edges:')[1]
        edges_match = re.search(r'edges:', section)
        if edges_match:
            edges = re.findall(r'tensor\((\d+)\)', edge_section)
            edges_list.append(edges)
        
        # Extract the last floating point number as a result
    
    # Create a DataFrame to store the extracted data
    data = {
        'nodes': nodes_list,
        'edges': edges_list,
    }
    data['nodes'] = [[int(x) for x in node] for node in data['nodes']]
    data['edges'] = [[int(x) for x in edge] for edge in data['edges']]
    def split_list(input_list, chunk_size):
        return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
    data['edges'] = [split_list(edge, 8) for edge in data['edges']]

    print(data)
    df = pd.DataFrame(data)
    print('df')
    print(df['nodes'][0], df['edges'][0])
    return df

def is_valid_nasbench201(adj, ops):
    print(ops)
    if ops[0] != 0 or ops[-1] != 6:
        return False
    for i in range(2, len(ops) - 1):
        if ops[i] not in [1, 2, 3, 4, 5]:
            return False
    adj_mat = [ [0, 1, 1, 0, 1, 0, 0, 0],
                [0, 0, 0, 1, 0, 1 ,0 ,0],
                [0, 0, 0, 0, 0, 0, 1, 0],
                [0, 0, 0, 0, 0, 0, 1, 0],
                [0, 0, 0, 0, 0, 0, 0, 1],
                [0, 0, 0, 0, 0, 0, 0, 1],
                [0, 0, 0, 0, 0, 0, 0, 1],
                [0, 0, 0, 0, 0, 0, 0, 0]]
 
    for i in range(len(adj)):
        for j in range(len(adj[i])):
            if adj[i][j] not in [0, 1]:
                return False
            if j > i:
                if adj[i][j] != adj_mat[i][j]:
                    return False
    return True

num_to_op = ['input', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3', 'skip_connect', 'none', 'output']
def nodes_to_arch_str(nodes):
    nodes_str = [num_to_op[node] for node in nodes]
    arch_str = '|' + nodes_str[1] + '~0|+' + \
               '|' + nodes_str[2] + '~0|' + nodes_str[3] + '~1|+' +\
               '|' + nodes_str[4] + '~0|' + nodes_str[5] + '~1|' + nodes_str[6] + '~2|' 
    return arch_str

filename = args.file_path

with open('./output_graphs/' + filename, 'r') as f:
    texts = f.read()
    df = process_graph_data(texts)
    valid = 0
    not_valid = 0
    scores = []
    dist = {'<90':0, '<91':0, '<92':0, '<93':0, '<94':0, '>94':0}
    for i in range(len(df)):
        nodes = df['nodes'][i]
        edges = df['edges'][i]
        result = is_valid_nasbench201(edges, nodes)
        if result:
            valid += 1
            arch_str = nodes_to_arch_str(nodes)
            index = api.query_index_by_arch(arch_str)
            # results = api.query_by_index(index, 'cifar10', hp='200')
            # print(results)
            # result = results[888].get_eval('ori-test')
            res = api.get_more_info(index, 'cifar10', None, hp=200, is_random=False)
            acc = res['test-accuracy']
            scores.append((index, acc))
            if acc < 90:
                dist['<90'] += 1
            elif acc < 91 and acc >= 90:
                dist['<91'] += 1
            elif acc < 92 and acc >= 91:
                dist['<92'] += 1
            elif acc < 93 and acc >= 92: 
                dist['<93'] += 1
            elif acc < 94 and acc >= 93:
                dist['<94'] += 1
            else:    
                dist['>94'] += 1
        else:
            not_valid += 1
    with open('./output_graphs/' + filename + '.json', 'w') as f:
        json.dump(scores, f)
    print(scores)
    print(valid, not_valid)
    print(dist)
    print("mean: ", np.mean([x[1] for x in scores]))
    print("max: ", np.max([x[1] for x in scores]))
    print("min: ", np.min([x[1] for x in scores]))