'''
Created on June 22, 2012
@author: Dilum Bandara
@version: 0.1
@license: Apache License v2.0

   Copyright 2012 H. M. N. Dilum Bandara and Anura P. Jayasumana, Colorado State
   University

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
'''

'''
Convert static attributes of SETI@home nodes to the format of PwlCopula tool
Also, print min and max nodes of each attribute and line no to manually check
for any outliers in data

Output has the following format:
#some comment lines starting with #
#NumAttributes<tab>NumNodes<tab>Att1<tab>Att2<tab>  <tab>Attn<tab>Att1<tab>...
'''

fIn = 'multipleAttributes.txt'
fOut = 'SETI@homeStatic.txt'

fdIn = open(fIn, 'r')
fdOut = open(fOut, 'w')
tmpStr = '#State attributes form nodes from SETI@home.\n'
tmpStr += '#Nodes that were active after #2012/04/30 are considered.\n'
tmpStr += '#Few nodes with outliers invalid nodes are removed\n'
tmpStr += '#NumAtt\tNumNodes'

attNames = []
minList = [] #Track min nodes
minListLine = [] #Line numbers realted to min valies
maxList = [] #max
maxListLine = [] #Line numbers for max
firstLine = True #Track whether 1st line is handled
nodes = [] #List of nodes
lineNum = 0

for line in fdIn:
    lineNum += 1
    tmpLine = line[:-1].split('\t') #Split tab sepearted line
    if firstLine == True: #1st line
        for i in tmpLine: tmpStr += '\t' + i #Add attribute namesi
        fdOut.write(tmpStr + '\n') #Dump to file
        for i in range(len(tmpLine)):
            minList.append(99999999999999999999999999999999999)
            maxList.append(-1)
            minListLine.append(-1)
            maxListLine.append(-1)
            attNames.append(tmpLine[i])
        firstLine = False
        continue
    nodes.append(tmpLine) #Teack each node
    for i in range(len(tmpLine)):
        tmpValue = float(tmpLine[i])
        if tmpValue > maxList[i]:
            maxList[i] = tmpValue
            maxListLine[i] = lineNum
        if tmpValue < minList[i]:
            minList[i] = tmpValue
            minListLine[i] = lineNum

fdIn.close()

#Print min & max nodes
print 'MIN nodes: value line--------------'
for i in range(len(minList)):
    print attNames[i], minList[i], minListLine[i]
print 'MAX nodes: value line--------------'
for i in range(len(maxList)):
    print attNames[i], maxList[i], maxListLine[i]

#Dump to file
fdOut.write(str(len(nodes[0])) + '\t' + str(len(nodes)))
for i in nodes:
    for j in i:
        fdOut.write('\t' + j)
fdOut.write('\n')
fdOut.close()
