''' Compare 2 search clouds and identify how many tearms are common to both clouds
'''

__author__ = 'Dilum Bandara'
__version__ = "0.1"
__date__ = "07/26/2010"
__license__ = 'Python'
__copyright__ = "Copyright (c) 2010 Dilum Bandara"


baseFile = 'torrenttractor.txt'
compareFile = 'youbittorrent.txt'
resultFile = 'results.txt'

fIn1 = open(baseFile, "r")
fIn2 = open(compareFile, "r")
fOut = open(resultFile, "w")

keywords = {}
matchingOnes = {}
noMatching = 0

for line in fIn1:
    #skip first line
    if '\t' not in line:
        continue
    tmp = line.split('\t')
    keyword = (str(tmp[0])).lower().strip()
    # Blank keys can't be in a Python directory. So add 1 more common search term 
    # manually if this happens in BASE & COMPARE files
    if keyword == ' ':
       print 'Blank key found in BASE'
       continue
    keywords[keyword] = tmp[1]

for line in fIn2:
    #skip first line
    if '\t' not in line:
        continue
    tmp = line.split('\t')
    keyword = (str(tmp[0])).lower().strip()
    if keyword == ' ':
       print 'Blank key found in COMPARE'
       continue
    if keyword in keywords:
        #print keyword
        matchingOnes[keyword] = tmp[1]
        noMatching = noMatching + 1

for i in matchingOnes:
    #One of the following has to be used depending on where actual no of queries are given
    #fOut.write(str(i) + '\t' + str(matchingOnes[i]) + '\n')
    fOut.write(str(i) + '\t' + str(matchingOnes[i]))
print noMatching

fIn1.close()
fIn2.close()
fOut.close()
