__author__ = 'Dilum Bandara'
__version__ = "1.0"
__date__ = "03/14/2011"
__updated__ = "06/06/2012"
__license__ = 'Python'
__copyright__ = "Copyright (c) 2011 Dilum Bandara, Colorado State University"

''' 
Use this tool to extract node data from CoMon dump files & then save as a tab seperated file
Nodes that do not respond (based pm ResponseTime) will be skipped
'''
import re

#TODO Update following list based on attributes that are needed to be extracted. 
#Sequential Node IDs & time will be added automatically
attbs_to_extract = ['CPUSpeed', 'NumCores', 'CPUFree', '1MinLoad', '5MinLoad', '15MinLoad', 'MemSize', 'MemFree', \
                    'DiskSize', 'DiskFree', 'TxRate', 'RxRate', 'KernVer', 'FCNamex', 'CoresPerCPU', 'BWLimit', \
                    'RespTime', 'Boot']
#attbs_to_extract = ['CPUSpeed', 'NumCores', 'CPUFree', '1MinLoad', '5MinLoad', '15MinLoad', 'MemSize', 'MemFree', \
#                    'DiskSize', 'DiskFree', 'TxRate', 'RxRate', 'Location', 'Latitude', 'Longitude', 'NodeType', \
#                    'KernVer', 'FCNamex', 'CoresPerCPU', 'BWLimit', 'RespTime', 'Uptime', 'TimerAve', 'Drift', \
#                    'Boot', 'TimerMax', 'SwapUsed']
base_fIn = 'dump_comon_201102' #TODO update this if a different month
#file_date = ['04'] #TODO Use this instead of following if data is for a day
file_date = ['01', '02', '03', '04', '05', '06', '07', '08', '09', \
             '10', '11', '12', '13', '14'] #If data is for multiple days
#file_date = ['03', '04', '05', '06', '07', '08', '09'] #If data is for multiple days
fOut = 'resources_raw_' + file_date[0] + '.txt' #Output file name
START_TIME = 1296536401.0  #TODO Start time of 1st data sample. Change depending on time range
next_nid = 0 #ID of the next node. 0 based index

node_IP_list = []
node_ID_list = []
resources = []
count = 0

for i in range(len(file_date)): #each file
    print 'Starting file %d' % i
    fdIn = open(base_fIn + file_date[i], 'r') #Open file
    time = IP = respTime = swapIn = swapOut = diskIn = diskOut = diskSvc = diskUtil = upTime = oneMinLoad = \
    fiveMinLoad = fifteenMinLoad = timerMax = timerAve = memPress = diskUsed = diskFree = diskSize = memSize = \
    memActive = swapUsed = kernVer = burp = cpuSpeed = fcNamex = numCores = coresPerCPU = txRtae = rxRate \
    = bootState = nodeType = bwLimit = lati = longi = location = drif = '-1'

    end = 0    #end of an entry 0 - no, 1 - yes

    for line in fdIn:
        #Modify following to extract attributes not defined below
        if re.match('Start:', line):
            time = line.split()[1]
        elif re.match('RealIP:', line):
            IP = line[8:-1]
        elif re.match('RespTime:', line):
            respTime = line[10:-1]
        elif re.match('VMStat:', line):
            tmp = line.split()
            swapIn = tmp[7]
            swapOut = tmp[8]
            diskIn = tmp[9]
            diskOut = tmp[10]
        elif re.match('IOStat:', line):
            tmp = line.split()
            diskSvc = tmp[12]
            diskUtil = tmp[14]
        elif re.match('Uptime:', line):
            upTime = line[8:-1]
        elif re.match('Loads:', line):
            tmp = line.split()
            oneMinLoad = tmp[1]
            fiveMinLoad = tmp[2]
            fifteenMinLoad  = tmp[3]
        elif re.match('Timer:', line):
            tmp = line.split()
            timerMax = tmp[1]
            timerAve = tmp[2]
        elif re.match('MemPress:', line):
            memPress = line[10:-1]
        elif re.match('DfDot:', line):
            tmp = line.split()
            if len(tmp) > 1:
                diskUsed = tmp[1][:-1]
                diskFree = tmp[2]
                diskSize  = tmp[3]
        elif re.match('MemInfo:', line):
            tmp = line.split()
            if len(tmp) > 1:
                memSize = tmp[1]
                memActive = tmp[2]
                swapUsed  = tmp[3]
        elif re.match('KernVer:', line):
            tmp = line[9:-1]
            if '2.6.22' in tmp: kernVer = '1'
            elif '2.6.27' in tmp: kernVer = '2'
            elif '2.6.32' in tmp: kernVer = '3'
            elif tmp == '': kernVer = '-1'
            else: print 'kernVer ' + tmp 
        elif re.match('Burp:', line):
            burp = line[6:-1][:-1]
            if burp == '' : burp = '-1'
        elif re.match('CPUSpeed:', line):
            cpuSpeed = line[10:-1]
        elif re.match('FCNamex:', line):
            tmp = line[9:-1]
            if tmp == 'Werewolf': fcNamex = '1'
            elif tmp == 'Cambridge': fcNamex = '2'
            elif tmp == '': fcNamex = '3'
            else : print 'FCNa ' + tmp
        elif re.match('NumCores:', line):
            numCores = line[10:-1]
        elif re.match('CoresPerCPU:', line):
            if line[13:-1] == '' : coresPerCPU = '1'
            else : coresPerCPU = line[13:-1]
        elif re.match('TxRate:', line):
            txRate = line[8:-1]
        elif re.match('RxRate:', line):
            rxRate = line[8:-1]
        elif re.match('BootState:', line):
            tmp = line[11:-1]
            if tmp == 'boot' : bootState = '1'
            elif tmp == 'safeboot' : bootState = '2'
            elif tmp == 'reinstall' : bootState = '3'
            elif tmp == 'disabled' : bootState = '4'
            elif tmp == 'failboot' : bootState = '5'
            else: print 'Boot state ' + tmp
        elif re.match('NodeType:', line):
            tmp = line[10:-1]
            if tmp == 'Prod': nodeType = '1' 
            else: print 'Node type ' + tmp
        elif re.match('BWLimit:', line):
            bwLimit = line[9:-1]
        elif re.match('Latitude:', line):
            lati = line[10:-1]
            #latitude is between (-90, 90)
            if lati == '-1': lati = '-100.0'
        elif re.match('Longitude:', line):
            longi = line[11:-1]
            #longitude is between (-90, 90)
            if longi == '-1': longi = '-200.0'
        elif re.match('Location:', line):
            tmp  = line[10:-1]
            if tmp == 'NorthAm': location = '1'
            elif tmp == 'SouthAm': location = '2'
            elif tmp == 'Europe': location = '3'
            elif tmp == 'Asia': location = '4'
            else: print 'Location ' + tmp
        elif re.match('Drift:', line):
            drift = line[7:-1]
        elif re.match('\n', line) and end == 0:
            end = 1
        elif re.match('\n', line) and end == 1: #End of node data indicate by 2 blank lines
            #if valid response node is considered to be active
            if (respTime != 'Connection refused') and (respTime != 'No route to host') \
                and (respTime != 'no response') and (respTime != 'Connection reset by peer') \
                and (respTime != 'Network is unreachable') and (respTime != 'others'):
                if IP not in node_IP_list: #New node. Add it`
                    node_IP_list.append(IP)
                    node_ID_list.append(next_nid)
                    next_nid = next_nid + 1
                index = node_IP_list.index(IP) #Find index
                attribs = [str(node_ID_list[index]), IP, str(int(time) - START_TIME)]
                for j in range(0, len(attbs_to_extract)):
                    #extract requred attributes
                    if attbs_to_extract[j] == 'RespTime': attribs.append(respTime)
                    elif attbs_to_extract[j] == 'SwapIn': attribs.append(swapIn)
                    elif attbs_to_extract[j] == 'SwapOut': attribs.append(swapOut)
                    elif attbs_to_extract[j] == 'DiskIn': attribs.append(diskIn)
                    elif attbs_to_extract[j] == 'DiskOut': attribs.append(diskOut)
                    elif attbs_to_extract[j] == 'DiskSvc': attribs.append(diskSvc)
                    elif attbs_to_extract[j] == 'DiskUtil': attribs.append(diskUtil)
                    elif attbs_to_extract[j] == 'Uptime': attribs.append(upTime)
                    elif attbs_to_extract[j] == '1MinLoad': attribs.append(oneMinLoad)
                    elif attbs_to_extract[j] == '5MinLoad': attribs.append(fiveMinLoad)
                    elif attbs_to_extract[j] == '15MinLoad': attribs.append(fifteenMinLoad)
                    elif attbs_to_extract[j] == 'TimerMax': attribs.append(timerMax)
                    elif attbs_to_extract[j] == 'TimerAve': attribs.append(timerAve)
                    elif attbs_to_extract[j] == 'MemFree': attribs.append(memPress)
                    elif attbs_to_extract[j] == 'DiskUsed': attribs.append(diskUsed)
                    elif attbs_to_extract[j] == 'DiskFree': attribs.append(diskFree)
                    elif attbs_to_extract[j] == 'DiskSize': attribs.append(diskSize)
                    elif attbs_to_extract[j] == 'MemSize': attribs.append(memSize)
                    elif attbs_to_extract[j] == 'MemActive': attribs.append(memActive)
                    elif attbs_to_extract[j] == 'SwapUsed': attribs.append(swapUsed)
                    elif attbs_to_extract[j] == 'KernVer': attribs.append(kernVer)
                    elif attbs_to_extract[j] == 'CPUFree': attribs.append(burp)
                    elif attbs_to_extract[j] == 'CPUSpeed': attribs.append(cpuSpeed)
                    elif attbs_to_extract[j] == 'NumCores': attribs.append(numCores)
                    elif attbs_to_extract[j] == 'CoresPerCPU': attribs.append(coresPerCPU)
                    elif attbs_to_extract[j] == 'FCNamex': attribs.append(fcNamex)
                    elif attbs_to_extract[j] == 'TxRate': attribs.append(txRate)
                    elif attbs_to_extract[j] == 'RxRate': attribs.append(rxRate)
                    elif attbs_to_extract[j] == 'Boot': attribs.append(bootState)
                    elif attbs_to_extract[j] == 'NodeType': attribs.append(nodeType)
                    elif attbs_to_extract[j] == 'BWLimit': attribs.append(bwLimit)
                    elif attbs_to_extract[j] == 'Latitude': attribs.append(lati)
                    elif attbs_to_extract[j] == 'Longitude': attribs.append(longi)
                    elif attbs_to_extract[j] == 'Location': attribs.append(location)
                    elif attbs_to_extract[j] == 'Drift': attribs.append(drift)
                resources.append(attribs) 
            #Reset variables
            time = IP = respTime = swapIn = swapOut = diskIn = diskOut = diskSvc = diskUtil = upTime = oneMinLoad = \
            fiveMinLoad = fifteenMinLoad = timerMax = timerAve = memPress = diskUsed = diskFree = diskSize = memSize = \
            memActive = swapUsed = kernVer = burp = cpuSpeed = fcNamex = numCores = coresPerCPU = txRtae = rxRate \
            = bootState = nodeType = bwLimit = lati = longi = location = drif = '-1'
            end = 0
            if (count % 10000) == 0: print count #Indicate progress
            count = count + 1
    fdIn.close()

print 'No of nodes: %d' % len(node_ID_list) #No of distinct nodes 

fdOut = open(fOut, 'w')
#write list of attributes in 1st line
tmp_str = 'nid\tIP\ttime'
for i in range(0, len(attbs_to_extract)): tmp_str = tmp_str + '\t' + attbs_to_extract[i] 
fdOut.write(tmp_str + '\n')

#Dump attributes of nodes that were active - at least once
for j in range(0, len(resources)):
    tmp_str = ''
    if int(resources[j][0]) in node_ID_list: #valid node
        for k in range(0, len(resources[j])): #each attribute
            #Pack nids before dumping to file. determoned by index
            if k == 0: tmp_str = str(node_ID_list.index(int(resources[j][k])))
            else: tmp_str = tmp_str + '\t' + resources[j][k]
        fdOut.write(tmp_str + '\n')

fdOut.close()
