'''
Extract node data
'''

from CPU_OS import CPU_OS

class value():
    #append 'T' to indicate tag in varaiable name
    def __init__(self):
        self.ID = '-1'
        self.pVendor = '-1'
        self.pSpeed = '-1'
        self.pBits = '-1'
        self.pArchi = '-1'
        self.os = '-1'
        self.osVer = '-1'
        self.tZone = '-1'
        self.nCore = '-1'
        self.pFP = '-1'
        self.pIP = '-1'
        self.mSize = '-1'
        self.cache = '-1'
        self.mSwap = '-1'
        self.dSize = '-1'
        self.dFree = '-1'
        self.Rx = '-1'
        self.Tx = '-1'

    def get(self):
        tmpStr = self.ID + '\t' + self.pVendor + '\t' + self.pSpeed + '\t'
        tmpStr += self.pBits + '\t' + self.pArchi + '\t' + self.nCore + '\t'
        tmpStr += self.pIP + '\t' + self.pFP + '\t' + self.mSize + '\t'
        tmpStr += self.mSwap + '\t' + self.cache + '\t' + self.dSize + '\t'
        tmpStr += self.dFree + '\t' + self.os + '\t' + self.osVer + '\t' 
        tmpStr += self.Rx + '\t' + self.Tx + '\t' + self.tZone + '\n' 
        return tmpStr

    def getHeader(self):
        tmpStr = 'nid\tcpuVen\tcSp\tcBit\tcArchi\tnCore\tpIops\tpFpops\t'
        tmpStr += 'MSize\tSwap\tCache\tDSize\tDFree\tOS\tOSVer\tRx\tTx\tTZone\n'
        return tmpStr

#fdIn = open('tmp.xml', 'r')
fdIn = open('host_2012_06_02_active_2012_04_30.xml', 'r')
fdOut = open('host_2012_06_02_active_2012_04_30.txt', 'w')
count = 0
convert = CPU_OS()
node = value()
nodeIDs = {}
fdOut.write(node.getHeader())
for line in fdIn:
    if '</host>' in line:
        node.ID = str(count)
        fdOut.write(node.get())
        node = value() #reset
        count += 1
        if count % 10000 == 0: print count
    elif '<host>' in line or '<host_cpid>' in line or '<userid>' in line or \
        '<total_credit>' in line or '<expavg_credit>' in line or \
        '<expavg_time>' in line or '<coprocs>' in line or '<create_time>' in line or \
        '<p_membw>' in line or '<avg_turnaround>' in line or '<credit_per_cpu_sec>' in line:
        continue
    else:
        val = line[line.find('>') + 1: line.find('</')]
        if'<id>' in line:
            if val in nodeIDs: 
                print 'Duplicate', node.get(), nodeIDs[val].get()
            nodeIDs[val] = node
        elif '<p_vendor>' in line:
            if 'GenuineIntel' in val: val = 'Intel'
            elif 'AuthenticAMD' in val: val = 'AMD'
            node.pVendor = val
        elif '<p_model>' in line:
            result = convert.convertCPU(val)
            assert(len(result) > 0), line + node.pVendor
            node.pArchi = result[1]
            node.pBits = result[2]
            node.pSpeed = result[3]
        elif '<os_name>' in line:
            node.os = val
        elif '<os_version>' in line:
            result = convert.convertOS(val)
            assert(len(result) > 0), line + node.os
            node.osVer = result[0]
        elif '<timezone>' in line: node.tZone = val
        elif '<ncpus>' in line: node.nCore = val
        elif '<p_fpops>' in line: node.pFP = val
        elif '<p_iops>' in line: node.pIP = val
        elif '<m_nbytes>' in line: node.mSize = str(float(val)/(1024*1024*1024))
        elif '<m_cache>' in line: node.cache = str(float(val)/(1024*1024))
        elif '<m_swap>' in line: node.mSwap = str(float(val)/(1024*1024*1024))
        elif '<d_total>' in line: node.dSize = str(float(val)/(1024*1024*1024))
        elif '<d_free>' in line: node.dFree = str(float(val)/(1024*1024*1024))
        elif '<n_bwup>' in line: node.Tx = val
        elif '<n_bwdown>' in line: node.Rx = val

fdIn.close()
fdOut.close()
print count
