_author__ = 'Dilum Bandara'
__version__ = "1.0"
__date__ = "03/14/2011"
__modified__ = "08/15/2012"
__license__ = 'Python'
__copyright__ = "Copyright (c) 2011 Dilum Bandara, Colorado State University"

'''
Extract time serie of given list of attributes. Use data from extractAttributes.py
Modify this function if additional attributes are needed.
Data is still considered useful if given no of random samples are missing. But not
multiple adjacent samples. If intermerdiate sample is missing add the
average of 2 intermediate ones. If first or last is missing use the next/previous one

'''

import sys

if len(sys.argv) != 6: 
    print 'Command format: python extractTS.py <input file> <start_time> <end_time> <sampling_interval> <tolerance>'
    sys.exit(0)

fIn = sys.argv[1]
start_time = float(sys.argv[2])
end_time = float(sys.argv[3])
interval = float(sys.argv[4])
num_samples = int((end_time - start_time)/interval + 1)
num_samples_tolerate = num_samples - int(sys.argv[5])
print num_samples

fOut1 = 'CPUFree.txt'
fOut2 = '1MinLoad.txt'
fOut3 = '5MinLoad.txt'
fOut4 = '15MinLoad.txt'
fOut5 = 'MemFree.txt'
fOut6 = 'DiskFree.txt'
fOut7 = 'TxRate.txt'
fOut8 = 'RxRate.txt'
#fOut9 = 'RespTime.txt'
fOut10 = 'CPUSpeed.txt'
fOut11 = 'NumCores.txt'
fOut12 = 'MemSize.txt'

att_to_extract = ['CFree', '1mLd', '5mLd', '15mLd', 'MFree', 'DFree', 'Tx', 'Rx', \
                  'CSp', 'NCore', 'MSize']
index = [] #Index of attributes in data file
cpu_free = {}
one_min_load = {}
five_min_load = {}
fifteen_min_load = {}
mem_free = {}
disk_free = {}
tx_rate = {}
rx_rate = {}
#resp_time = {}
num_cores = {}
mem_size = {}
cpu_speed = {}
num_nodes = 0

fdIn = open(fIn, 'r')
first = True
for line in fdIn:
    tmp = line.split()
    if first == True: #If 1st line ,find what are the attribute indexes to track
        for i in range(len(att_to_extract)):
           index.append(tmp.index(att_to_extract[i]))
        first = False
        continue

    time = float(tmp[1])
    time = int(round(time/interval) * interval) #Round to nearest interval
    if time < start_time: continue
    if time > end_time: break

    if tmp[0] not in cpu_free: cpu_free[tmp[0]] = [[time, tmp[index[0]]]]
    else: cpu_free[tmp[0]].append([time, tmp[index[0]]])

    if tmp[0] not in one_min_load: one_min_load[tmp[0]] = [[time, tmp[index[1]]]]
    else: one_min_load[tmp[0]].append([time, tmp[index[1]]]) 
    if tmp[0] not in five_min_load: five_min_load[tmp[0]] = [[time, tmp[index[2]]]]
    else: five_min_load[tmp[0]].append([time, tmp[index[2]]]) 
    if tmp[0] not in fifteen_min_load: fifteen_min_load[tmp[0]] = [[time, tmp[index[3]]]]
    else: fifteen_min_load[tmp[0]].append([time, tmp[index[3]]]) 

    if tmp[0] not in mem_free: mem_free[tmp[0]] = [[time, tmp[index[4]]]]
    else: mem_free[tmp[0]].append([time, tmp[index[4]]])

    if tmp[0] not in disk_free: disk_free[tmp[0]] = [[time, tmp[index[5]]]]
    else: disk_free[tmp[0]].append([time, tmp[index[5]]]) 

    if tmp[0] not in tx_rate: tx_rate[tmp[0]] = [[time, tmp[index[6]]]]
    else: tx_rate[tmp[0]].append([time, tmp[index[6]]]) 
    if tmp[0] not in rx_rate: rx_rate[tmp[0]] = [[time, tmp[index[7]]]]
    else: rx_rate[tmp[0]].append([time, tmp[index[7]]]) 

    #if tmp[0] not in resp_time: resp_time[tmp[0]] = [[time, tmp[index[8]]]]
    #else: resp_time[tmp[0]].append([time, tmp[index[8]]]) 

    if tmp[0] not in cpu_speed: cpu_speed[tmp[0]] = [[time, tmp[index[8]]]]
    else: cpu_speed[tmp[0]].append([time, tmp[index[8]]]) 

    if tmp[0] not in num_cores: num_cores[tmp[0]] = [[time, tmp[index[9]]]]
    else: num_cores[tmp[0]].append([time, tmp[index[9]]]) 

    if tmp[0] not in mem_size: mem_size[tmp[0]] = [[time, tmp[index[10]]]]
    else: mem_size[tmp[0]].append([time, tmp[index[10]]]) 

    if num_nodes < int(tmp[0]) + 1: num_nodes = int(tmp[0]) + 1 #0 based index

fdIn.close()
print 'Number of nodes', num_nodes

#Remove nodes with missing values
missing_nodes = 0
skip_list = []
for j in range(num_nodes): #Check each node
    if str(j) not in cpu_free: 
        missing_nodes += 1
        continue 

    if len(cpu_free[str(j)]) < num_samples_tolerate: #Too many missing samples
        skip_list.append(j)
        continue

    for i in range(num_samples): #Check each sample
        missing = ''
        if i == 0: #1st sample
            if (cpu_free[str(j)][i][0] - start_time > interval): #1st sample too late
                skip_list.append(j)
                break
            elif cpu_free[str(j)][i][0] > start_time: #1st sample too late
                missing = '1st'
            else: continue
        elif i != (num_samples - 1): #Intermediate sample
            if i == len(cpu_free[str(j)]): #Several samples missing at end
                skip_list.append(j)
                break
            if (cpu_free[str(j)][i][0] - cpu_free[str(j)][i-1][0]) > 2 * interval: #More than 1 sample missing
                skip_list.append(j)
                break
            if (cpu_free[str(j)][i][0] - cpu_free[str(j)][i-1][0]) > interval: #Sample missing
                missing = 'mid'
            else: continue
        else: #Last sample
            if (len(cpu_free[str(j)]) < num_samples): #Last sample missing
                missing = 'last'
            else: continue
        
        k = -1
        l = -1
        new_time = -1
        if missing == '1st':
            k = 0
            l = 0
            new_time = cpu_free[str(j)][i][0] - interval
        elif missing == 'mid':
            k = i
            l = i - 1
            new_time = cpu_free[str(j)][i][0] - interval
        elif missing == 'last':
            k = i - 1
            l = i - 1
            new_time = cpu_free[str(j)][i - 1][0] + interval
        else: assert(False), missing

        cpu_free[str(j)].insert(i, [new_time, \
            str((float(cpu_free[str(j)][k][1]) + float(cpu_free[str(j)][l][1]))/2)])
        one_min_load[str(j)].insert(i, [new_time, \
            str((float(one_min_load[str(j)][k][1]) + float(one_min_load[str(j)][l][1]))/2)])
        five_min_load[str(j)].insert(i, [new_time, \
            str((float(five_min_load[str(j)][k][1]) + float(five_min_load[str(j)][l][1]))/2)])
        fifteen_min_load[str(j)].insert(i, [new_time, \
            str((float(fifteen_min_load[str(j)][k][1]) + float(fifteen_min_load[str(j)][l][1]))/2)])
        mem_free[str(j)].insert(i, [new_time, \
            str((float(mem_free[str(j)][k][1]) + float(mem_free[str(j)][l][1]))/2)])
        disk_free[str(j)].insert(i, [new_time, \
            str((float(disk_free[str(j)][k][1]) + float(disk_free[str(j)][l][1]))/2)])
        tx_rate[str(j)].insert(i, [new_time, \
            str((float(tx_rate[str(j)][k][1]) + float(tx_rate[str(j)][l][1]))/2)])
        rx_rate[str(j)].insert(i, [new_time, \
            str((float(rx_rate[str(j)][k][1]) + float(rx_rate[str(j)][l][1]))/2)])
        #resp_time[str(j)].insert(i, [new_time, \
        #    str((float(resp_time[str(j)][k][1]) + float(resp_time[str(j)][l][1]))/2)])
        cpu_speed[str(j)].insert(i, [new_time, \
            str((float(cpu_speed[str(j)][k][1]) + float(cpu_speed[str(j)][l][1]))/2)])
        num_cores[str(j)].insert(i, [new_time, \
            str((float(num_cores[str(j)][k][1]) + float(num_cores[str(j)][l][1]))/2)])
        mem_size[str(j)].insert(i, [new_time, \
            str((float(mem_size[str(j)][k][1]) + float(mem_size[str(j)][l][1]))/2)])

print 'Skipping nodes', len(skip_list)
print 'Missing nodes', missing_nodes
print 'Available nodes', num_nodes - len(skip_list) - missing_nodes
 
fdOut1 = open(fOut1, 'w')
fdOut2 = open(fOut2, 'w')
fdOut3 = open(fOut3, 'w')
fdOut4 = open(fOut4, 'w')
fdOut5 = open(fOut5, 'w')
fdOut6 = open(fOut6, 'w')
fdOut7 = open(fOut7, 'w')
fdOut8 = open(fOut8, 'w')
#fdOut9 = open(fOut9, 'w')
fdOut10 = open(fOut10, 'w')
fdOut11 = open(fOut11, 'w')
fdOut12 = open(fOut12, 'w')

#Write to new file. Arrange as columns
for i in range(num_samples):
    tmp_str1 = ''
    tmp_str2 = ''
    tmp_str3 = ''
    tmp_str4 = ''
    tmp_str5 = ''
    tmp_str6 = ''
    tmp_str7 = ''
    tmp_str8 = ''
    #tmp_str9 = ''
    tmp_str10 = ''
    tmp_str11 = ''
    tmp_str12 = ''

    for j in range(num_nodes): 
        if j not in skip_list and str(j) in cpu_free: 
            tmp_str1 += cpu_free[str(j)][i][1] + '\t'

            tmp_str2 += one_min_load[str(j)][i][1] + '\t'
            tmp_str3 += five_min_load[str(j)][i][1] + '\t'
            tmp_str4 += fifteen_min_load[str(j)][i][1] + '\t'

            tmp_str5 += mem_free[str(j)][i][1] + '\t'

            tmp_str6 += disk_free[str(j)][i][1] + '\t'

            tmp_str7 += tx_rate[str(j)][i][1] + '\t'
            tmp_str8 += rx_rate[str(j)][i][1] + '\t'

            #tmp_str9 += resp_time[str(j)][i][1] + '\t'

            tmp_str10 += cpu_speed[str(j)][i][1] + '\t'

            tmp_str11 += num_cores[str(j)][i][1] + '\t'

            tmp_str12 += mem_size[str(j)][i][1] + '\t'

    fdOut1.write(tmp_str1 + '\n')
    fdOut2.write(tmp_str2 + '\n')
    fdOut3.write(tmp_str3 + '\n')
    fdOut4.write(tmp_str4 + '\n')
    fdOut5.write(tmp_str5 + '\n')
    fdOut6.write(tmp_str6 + '\n')
    fdOut7.write(tmp_str7 + '\n')
    fdOut8.write(tmp_str8 + '\n')
    #fdOut9.write(tmp_str9 + '\n')
    fdOut10.write(tmp_str10 + '\n')
    fdOut11.write(tmp_str11 + '\n')
    fdOut12.write(tmp_str12 + '\n')

fdOut1.close()
fdOut2.close()
fdOut3.close()
fdOut4.close()
fdOut5.close()
fdOut6.close()
fdOut7.close()
fdOut8.close()
#fdOut9.close()
fdOut10.close()
fdOut11.close()
fdOut12.close()
