'''
Created on Sep 1, 2011
Modified on Aug 21, 2012
@author: Dilum Bandara
@version: 0.1
@license: Apache License v2.0

   Copyright 2012 H. M. N. Dilum Bandara and Anura P. Jayasumana, Colorado State
   University

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
'''

'''
Extract queries from already processed SWORD queries then build list of
state transitions & frequencies. A state is defined by attribute name, min
range, & max range
'''

import random
import BaseConfig

#List of attributes to extract
attList = {'resptime':'RspT', 'oneminload':'1mLd', 'load_one':'1mLd', 
           'freemem':'MFree', 'free_mem':'MFree', 'mem_free':'MFree', \
           'disk_free':'DFree', 'gbfree':'DFree', 'freecpu':'CFree', \
           'txrate':'Tx', 'cpuspeed':'CSp', 'memsize':'MSize', \
           'uptime':'UpT', 'fiveminload':'5mLd', 'fifteenminload': '15mLd', \
           'location':'Loc', 'latency':'Late', 'bwlimit':'BWLim', 'drift':'Drif', \
           'disksize':'DSize', 'kernver':'KVer', 'bootstate':'Boot', 'rxrate':'Rx'}
#fIn = 'tmp.txt'
fIn = 'QueryAttributes.txt'
fOut = 'States.txt'
mode = 4 #1 - ascending, 2 - decending, 3 - random, 4- as is
random.seed(111) #Random seed

queries = []
fdIn = open(fIn, 'r')
for line in fdIn:
    line = line.lower()
    queries.append(line)
fdIn.close()

attListSorted = attList.keys() #Sort attribute list
if mode == 1: attListSorted.sort()
elif mode == 2: attListSorted.sort(reverse=True)
elif mode == 3: random.shuffle(attListSorted)
elif mode == 4: #Attributes as they appear in queries
    tmpAttList = []
    for line in queries:
        tmpLine = line.split('\t')
        start = 1
        for i in tmpLine:
           end = i.find('>')
           attName = i[start:end]
           if attName in attListSorted and attName not in tmpAttList:
               tmpAttList.append(attName)
    attListSorted = tmpAttList
else: assert(False), 'Unknown mode'

states = {'START':{}} #{start state: {next state: num of queries}}
numLine = 0
for line in queries:
    numLine += 1
    atLeast1Att = False #Consider queries with at leats 1 attribute
    preState = '' #Previous attribute
    for i in attListSorted: #For each attribute. Only 1st occurance is considered
        start = line.find(i)
        if start == -1: continue
        end = line.find('</', start + 1)
        values = line[start + len(i) + 1:end -1 ].split(',')
        #Is attribute range is valid? Corect range if possible
        if float(values[1]) < float(values[0]): print 'ERROR', numLine, i
        if 'max' not in values[2] and 'max' not in values[3]:
            if float(values[3]) < float(values[2]): print 'ERROR', numLine, i
            if float(values[3]) < float(values[0]): print 'ERROR', numLine, i

        if i == 'fiveminload' or i == 'oneminload' or i == 'load_one' or \
                i == 'fifteenminload':
            if i == 'fiveminload':
                if float(values[0]) < float(BaseConfig.MIN_5_MIN_LOAD): 
                    values[0] = BaseConfig.MIN_5_MIN_LOAD
                if float(values[3]) > float(BaseConfig.MAX_5_MIN_LOAD): 
                    values[3] = BaseConfig.MAX_5_MIN_LOAD
                stateName = '5mLd_' + str(round(float(values[0]), 1)) + \
                     '_' + str(round(float(values[3]), 1)) 
            elif i == 'fifteenminload':
                if float(values[0]) < float(BaseConfig.MIN_15_MIN_LOAD): 
                    values[0] = BaseConfig.MIN_15_MIN_LOAD
                if float(values[3]) > float(BaseConfig.MAX_15_MIN_LOAD): 
                    values[3] = BaseConfig.MAX_15_MIN_LOAD
                stateName = '15mLd_' + str(round(float(values[0]), 1)) + \
                     '_' + str(round(float(values[3]), 1)) 
            else:
                if float(values[0]) < float(BaseConfig.MIN_1_MIN_LOAD): 
                    values[0] = BaseConfig.MIN_1_MIN_LOAD
                if float(values[3]) > float(BaseConfig.MAX_1_MIN_LOAD): 
                    values[3] = BaseConfig.MAX_1_MIN_LOAD
                stateName = '1mLd_' + str(round(float(values[0]), 1)) + \
                    '_' + str(round(float(values[3]), 1))
        elif i == 'resptime' or i == 'cpuspeed' or i == 'drift' or i == 'latency':
            if 'max' in values[2] or 'max' in values[3]:
                if i == 'resptime' : 
                    values[2] = BaseConfig.MAX_RESP_TIME
                    values[3] = BaseConfig.MAX_RESP_TIME
            if i == 'cpuspeed':
                if float(values[0]) < float(BaseConfig.MIN_CPU_SPEED): 
                    values[0] = BaseConfig.MIN_CPU_SPEED
                if float(values[3]) > float(BaseConfig.MAX_CPU_SPEED): 
                    values[3] = BaseConfig.MAX_CPU_SPEED
                stateName = 'CSp_' + str(round(float(values[0]), 1)) + \
                    '_' + str(round(float(values[3]), 1))
            elif i == 'drift':
                if float(values[0]) < float(BaseConfig.MIN_DRIFT): 
                    values[0] = BaseConfig.MIN_DRIFT
                if float(values[3]) > float(BaseConfig.MAX_DRIFT): 
                    values[3] = BaseConfig.MAX_DRIFT
                stateName = 'Drift_' + str(round(float(values[0]), 1)) + \
                    '_' + str(round(float(values[3]), 1))
            elif i == 'latency':
                if float(values[0]) < float(BaseConfig.MIN_TIMER_AVE): 
                    values[0] = BaseConfig.MIN_TIMER_AVE
                if float(values[3]) > float(BaseConfig.MAX_TIMER_AVE): 
                    values[3] = BaseConfig.MAX_TIMER_AVE
                stateName = 'Late_' + str(round(float(values[0]), 1)) + \
                    '_' + str(round(float(values[3]), 1))
            else:
                if float(values[0]) < float(BaseConfig.MIN_RESP_TIME): 
                    values[0] = BaseConfig.MIN_RESP_TIME
                if float(values[3]) > float(BaseConfig.MAX_RESP_TIME): 
                    values[3] = BaseConfig.MAX_RESP_TIME
                stateName = 'RspT_' + str(round(float(values[0]), 1)) + \
                    '_' + str(round(float(values[3]), 1))
        elif i == 'memsize':
            if float(values[0]) < float(BaseConfig.MIN_MEM_SIZE): 
                    values[0] = BaseConfig.MAX_MIN_SIZE
            if float(values[3]) > float(BaseConfig.MAX_MEM_SIZE): 
                values[3] = BaseConfig.MAX_MEM_SIZE
            stateName = 'MSize_' + str(round(float(values[0]), 1)) + \
                    '_' + str(round(float(values[3]), 1))
        elif i == 'freemem' or i == 'free_mem' or i == 'mem_free' or i == 'freecpu' \
                or i == 'numofcores' or i == 'bootstate' or i == 'kernver':
            if 'max' in values[2] or 'max' in values[3]:
                if i == 'mem_free' or i == 'freemem' : 
                    values[2] = BaseConfig.MAX_MEM_FREE
                    values[3] = BaseConfig.MAX_MEM_FREE
                elif i == 'bootstate' : 
                    values[2] = BaseConfig.MAX_BOOT
                    values[3] = BaseConfig.MAX_BOOT
                else: print 'Check max value', i
            if i == 'freecpu':
                if float(values[0]) < float(BaseConfig.MIN_CPU_FREE): 
                    values[0] = BaseConfig.MIN_CPU_FREE
                if float(values[3]) > float(BaseConfig.MAX_CPU_FREE): 
                    values[3] = BaseConfig.MAX_CPU_FREE
                stateName = 'CFree_' + str(round(float(values[0]), 0)) + \
                            '_' + str(round(float(values[3]), 0))
            elif i == 'numofcores':
                if float(values[0]) < float(BaseConfig.MIN_NUM_CORES): 
                    values[0] = BaseConfig.MIN_NUM_CORES
                if float(values[3]) > float(BaseConfig.MAX_NUM_CORES): 
                    values[3] = BaseConfig.MAX_NUM_CORES
                stateName = 'NCore_' + str(round(float(values[0]), 0)) + \
                    '_' + str(round(float(values[3]), 0))
            elif i == 'bootstate':
                if int(values[0]) < int(BaseConfig.MIN_BOOT): 
                    values[0] = BaseConfig.MIN_BOOT
                if int(values[3]) > int(BaseConfig.MAX_BOOT): 
                    values[3] = BaseConfig.MAX_BOOT
                stateName = 'Boot_' + str(int(values[0])) + \
                    '_' + str(int(values[3]))
            elif i == 'kernver':
                if float(values[0]) < float(BaseConfig.MIN_KERN_VER): 
                    values[0] = BaseConfig.MIN_KERN_VER
                if float(values[3]) > float(BaseConfig.MAX_KERN_VER): 
                    values[3] = BaseConfig.MAX_KERN_VER
                stateName = 'KVer_' + str(round(float(values[0]), 0)) + \
                    '_' + str(round(float(values[3]), 0))
            else:
                if float(values[0]) < float(BaseConfig.MIN_MEM_FREE): 
                    values[0] = BaseConfig.MIN_MEM_FREE
                if float(values[3]) > float(BaseConfig.MAX_MEM_FREE): 
                    values[3] = BaseConfig.MAX_MEM_FREE
                stateName = 'MFree_' + str(round(float(values[0]), 0)) + \
                    '_' + str(round(float(values[3]), 0))
        elif i == 'disksize' or i == 'gbfree' or i == 'disk_free' or i == 'uptime':
            if 'max' in values[2] or 'max' in values[3]:
                if i == 'uptime': 
                    values[2] = BaseConfig.MAX_UPTIME
                    values[3] = BaseConfig.MAX_UPTIME
                elif i == 'disksize': 
                    values[2] = BaseConfig.MAX_DISK_SIZE
                    values[3] = BaseConfig.MAX_DISK_SIZE
                elif i == 'diskfree' or i == 'disk_free' or i == 'gbfree': 
                    values[2] = BaseConfig.MAX_DISK_FREE
                    values[3] = BaseConfig.MAX_DISK_FREE
            if i == 'disksize':
                if float(values[0]) < float(BaseConfig.MIN_DISK_SIZE): 
                    values[0] = BaseConfig.MIN_DISK_SIZE
                if float(values[3]) > float(BaseConfig.MAX_DISK_SIZE): 
                    values[3] = BaseConfig.MAX_DISK_SIZE
                stateName = 'DSize_' + str(round(float(values[0]), -1)) + \
                    '_' + str(round(float(values[3]), -1))
            elif i == 'uptime':
                if float(values[0]) < float(BaseConfig.MIN_UPTIME): 
                    values[0] = BaseConfig.MIN_UPTIME
                if float(values[3]) > float(BaseConfig.MAX_UPTIME): 
                    values[3] = BaseConfig.MAX_UPTIME
                stateName = 'UpT_' + str(round(float(values[0]), -1)) + \
                    '_' + str(round(float(values[3]), -1))
            else:
                if float(values[0]) < float(BaseConfig.MIN_DISK_FREE): 
                    values[0] = BaseConfig.MIN_DISK_FREE
                if float(values[3]) > float(BaseConfig.MAX_DISK_FREE): 
                    values[3] = BaseConfig.MAX_DISK_FREE
                stateName = 'DFree_' + str(round(float(values[0]), -1)) + \
                    '_' + str(round(float(values[3]), -1))
        elif i == 'txrate' or i == 'rxrate' or i == 'bwlimit' :
            if 'max' in values[2] or 'max' in values[3]:
                if i == 'bwlimit': 
                    values[2] = BaseConfig.MAX_BW_LIMIT
                    values[3] = BaseConfig.MAX_BW_LIMIT
            if i == 'txrate':
                if float(values[0]) < float(BaseConfig.MIN_TX_RATE): 
                    values[0] = BaseConfig.MIN_TX_RATE
                if float(values[3]) > float(BaseConfig.MAX_TX_RATE): 
                    values[3] = BaseConfig.MAX_TX_RATE
                stateName = 'Tx_' + str(round(float(values[0]), -1)) + \
                    '_' + str(round(float(values[3]), -1))
            elif i == 'rxrate':
                if float(values[0]) < float(BaseConfig.MIN_RX_RATE): 
                    values[0] = BaseConfig.MIN_RX_RATE
                if float(values[3]) > float(BaseConfig.MAX_RX_RATE): 
                    values[3] = BaseConfig.MAX_RX_RATE
                stateName = 'Rx_' + str(round(float(values[0]), -1)) + \
                    '_' + str(round(float(values[3]), -1))
            else:
                if float(values[0]) < float(BaseConfig.MIN_BW_LIMIT): 
                    values[0] = BaseConfig.MIN_BW_LIMIT
                if float(values[3]) > float(BaseConfig.MAX_BW_LIMIT): 
                    values[3] = BaseConfig.MAX_BW_LIMIT
                stateName = 'BWLim_' + str(int(values[0])) + \
                    '_' + str(int(values[3]))
        elif i == 'location':
            if 'max' in values[2] or 'max' in values[3]:
                values[2] = BaseConfig.MAX_LOCATION
                values[3] = BaseConfig.MAX_LOCATION
            stateName = 'Loc_' + str(round(float(values[0]), -1)) + \
                '_' + str(round(float(values[3]), -1)) 
            '''
            if values[0] == 'northam' or values[0] == 'northamerica': stateName('Loc_1') 
                elif values[0] == 'southam': stateName('Loc_2') 
                elif values[0] == 'europe': stateName('Loc_3') 
                elif values[0] == 'asia': stateName('Loc_4') 
                else: print line
            '''
        elif i == 'os' : 
            if float(values[1]) < BaseConfig.MIN_FC_NAMEX : values[1] = BaseConfig.MIN_FC_NAMEX
            if float(values[1]) > BaseConfig.MAX_FC_NAMEX : values[1] = BaseConfig.MAX_FC_NAMEX
            stateName = 'OS_' + str(round(float(values[1]), 0))
        else: print 'Unknown line', line, i

        if atLeast1Att == False: #1st attribute
            if stateName in states['START']: states['START'][stateName] += 1
            else: states['START'][stateName] = 1
            preState = stateName
            atLeast1Att = True
        else:
            if preState in states:
                if stateName in states[preState]: 
                    states[preState][stateName] += 1
                else: 
                    if len(states[preState]) > 0: states[preState][stateName] = 1
                    else: states[preState] = {stateName: 1} #1st one
            else: states[preState] = {stateName: 1} #1st one
            preState = stateName
    if atLeast1Att == True: #There was at least 1 attribute. So add FINISH
        if preState in states:
            if 'FINISH' in states[preState]: 
                states[preState]['FINISH'] += 1
            else: 
                if len(states[preState]) > 0: states[preState]['FINISH'] = 1
                else: states[preState] = {'FINISH': 1} #1st one
        else: states[preState] = {'FINISH':1}
#Dump states
fdOut = open(fOut, 'w')
for i in states:
    for j in states[i]:
        fdOut.write(i + '\t' + j + '\t' + str(states[i][j]) + '\n')
fdOut.close()
