#! /usr/bin/env python # # Fingerprint a web server and identify its vendor/version/OS # Copyright (C) 2003 Dustin Lee # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # You can reach me at ###################################################################### import sys,pprint,glob,getopt,re,time import socket, urlparse, select import core.controllers.outputManager as om from core.controllers.w3afException import w3afException import core.data.kb.config as cf class request: """Collect elements needed to send a Request to an HTTP server""" def __init__(self, url, method='GET', local_uri='/', version='1.0'): self.url = url self.method = method self.local_uri = local_uri self.version = version self.headers = [['User-Agent', cf.cf.getData('User-Agent')]] self.line_joiner = '\r\n' self.body = '' self.adhoc_method_line = '' def __str__(self): method_line = self.adhoc_method_line if not method_line: method_line = '%s %s HTTP/%s'%(self.method, self.local_uri, self.version) return self.line_joiner.join([method_line] + \ ['%s: %s'%(x,y) for x,y in self.headers]) + \ (2*self.line_joiner) + self.body def submit(self): om.out.debug('hmap is sending: ' + str(self) ) # Echo client program HOST = self.url tries = 3 wait_time = 1 while tries != 0: if tries < 3 and VERBOSE: print '!!! TRIES =', tries s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Added by Andres Riancho to get SSL support ! try: s.connect((HOST, PORT)) except: raise w3afException('hmap: Connection failed to ' + str(HOST) + ':' + str(PORT) ) else: if useSSL: try: s2 = socket.ssl( s ) except: raise w3afException('hmap: SSL Connection failed to ' + str(HOST) + ':' + str(PORT) ) s.recv = s2.read s.send = s2.write try: s.send(str(self)) except: raise w3afException('hmap: Failed to send data to socket.' ) data = '' ss = s try: while 1: ss = select.select([s],[],[],10)[0] if not ss: break ss = ss[0] temp = ss.recv(1024) if not temp: break data += temp # TODO: more efficient to append to list s.close() except KeyboardInterrupt,e: raise e except Exception: tries -= 1 time.sleep(wait_time) wait_time *= 2 s.close() continue break return response(data) def add_header(self, name, data): self.headers.append([name,data]) ###################################################################### class response: """Read in Response from HTTP server and parse out elements of interest""" def __init__(self, raw_text): self.raw_text = raw_text self.headers = [] self.body = '' self.__parse(raw_text) def __parse(self,text): if not text: self.response_code = 'NO_RESPONSE' self.response_text = 'NONE' return if not re.search('^HTTP/1\.[01] [0-9]{3} [A-Z]{,10}', text): self.response_code = 'NO_RESPONSE_CODE' # HTTP/0.9 like self.response_text = 'NONE' self.body = text return # really parse it crlf_index = text.find('\r\n') cr_index = text.find('\r') line_splitter = '\r\n' # TODO: is this sufficient??? if crlf_index == -1 or cr_index < crlf_index: line_splitter = '\n' response_lines = text.split(line_splitter) self.response_line = response_lines[0] response_line_match = re.search('(HTTP/1\.[01]) ([0-9]{3}) ([^\r\n]*)', text) self.response_code, self.response_text = response_line_match.groups()[1:] blank_index = response_lines[:].index('') if blank_index == -1: blank_index = len(response_lines) self.headers = response_lines[1:blank_index] # NOTE: !! actually don't need or want body to be split but don't # really care at this point ... self.body = response_lines[blank_index:] def return_code(self): return self.response_code, self.response_text def describe(self): print '-'*70 print 'RESPONSE LINE:' if hasattr(self,'response_line'): print self.response_line print '-'*70 print 'HEADERS:' if hasattr(self,'headers'): print self.headers print '-'*70 print 'BODY:' if hasattr(self,'body'): print self.body def has_header(self, name): for h in self.headers: if h.startswith(name): return 1 return 0 def header_data(self,name): assert(self.has_header(name)) for h in self.headers: if h.startswith(name): return h.split(': ',1)[-1] def header_names(self): result = [] for h in self.headers: name = h.split(':',1)[0] result.append(name) return result def servername(self): if not self.has_header('Server'): return None return self.header_data('Server') ###################################################################### # Functions for probing server and collecting characteristics def get_fingerprint(url): basic_get(url) # TODO: this is redundant with later test... basic_options(url) unknown_method(url) unauthorized_activity(url) nonexistant_object(url) malformed_method_line(url) long_url_ranges(url) long_default_ranges(url) many_header_ranges(url) large_header_ranges(url) unavailable_accept(url) fake_content_length(url) ### TODO some more tests to add: # compare_get_head_header_order n ## see if body sent back?? # also see if get same headers in same order # require_host # unmodified_since # also with sending bad date fingerprint['SYNTACTIC']['HEADER_ORDER'] = winnow_ordered_list(fingerprint['SYNTACTIC']['HEADER_ORDER']) return fingerprint ###################################################################### # Known test types for provoking characterisitcs # Many tests are just "randomly" designed out of thin air # but many come from reading the RFC and looking for things # that implementors may have varied in implementations. def basic_get(url): req = request(url) res = req.submit() get_characteristics('basic_get', res) def basic_options(url): req = request(url,method='OPTIONS') res = req.submit() get_characteristics('basic_options', res) def unknown_method(url): req = request(url,method='QWERTY') res = req.submit() get_characteristics('unknown_method', res) def unauthorized_activity(url): unauthorized_activities = ('OPTIONS', 'TRACE', 'GET', 'HEAD', 'DELETE', 'PUT', 'POST', 'COPY', 'MOVE', 'MKCOL', 'PROPFIND', 'PROPPATCH', 'LOCK', 'UNLOCK', 'SEARCH') for ua in unauthorized_activities: req = request(url,method=ua) res = req.submit() get_characteristics('unauthorized_activity', res) def nonexistant_object(url): req = request(url,local_uri='/asdfg.hjkl') res = req.submit() get_characteristics('nonexistant_object', res) # ways to mess up the method line # (nothing)METHOD(space)RELATIVE-URI(space)VERSION(line-sep) # - replace any one of these with wrong thing # - string together variations of any of these # - number where expects letter or vice verse # - really LONG things # - invalid characters # - different file system conventions # - illegal paths "../../../" # - url encoding (hex, unicode, invalid of each) # - something instead of nothing and vice versa # - uppercase/lowercase def malformed_method_line(url): malformed_methods = ( 'GET', #0 TODO: repeat all these with HEAD and OTHER 'GET /',#1 'GET / HTTP/999.99', 'GET / HHTP/1.0', 'GET / HTP/1.0', 'GET / HHTP/999.99', #'GET / HHTP/1.0', 'GET / hhtp/999.99', 'GET / http/999.99', 'GET / HTTP/Q.9', 'GET / HTTP/9.Q', 'GET / HTTP/Q.Q', #10 'GET / HTTP/1.X', 'GET / HTTP/1.10', 'GET / HTTP/1.1.0', 'GET / HTTP/1.2', 'GET / HTTP/2.1', 'GET / HTTP/1,0', #r'\GET / HTTP/1.0' or '\\GET / HTTP/1.0' #'GET / HTTP\1.0', #'GET / HTTP-1.0', #'GET / HTTP 1.0', 'GET / HTTP/1.0X', 'GET / HTTP/', #'get / http/1.0', #'qwerty / HTTP/1.0' #'GETX / HTTP/1.0' #' GET/HTTP/1.0', 'GET/HTTP/1.0' , 'GET/ HTTP/1.0' ,#20 'GET /HTTP/1.0' , 'GET/HTTP /1.0' , 'GET/HTTP/1 .0' , 'GET/HTTP/1. 0' , 'GET/HTTP/1.0 ' , 'GET / HTTP /1.0', #etc.... 'HEAD /.\\ HTTP/1.0', # indicates windows?? 'HEAD /asdfasdfasdfasdfasdf/../ HTTP/1.0', 'HEAD /asdfasdfasdfasdfasdf/.. HTTP/1.0', 'HEAD /./././././././././././././././ HTTP/1.0',#30 'HEAD /././././././qwerty/.././././././././ HTTP/1.0', #'HEAD ../ HTTP/1.0', 'HEAD /.. HTTP/1.0', 'HEAD /../ HTTP/1.0', 'HEAD /../../../../../ HTTP/1.0', 'HEAD .. HTTP/1.0', #'HEAD . HTTP/1.0', 'HEAD\t/\tHTTP/1.0', 'HEAD ///////////// HTTP/1.0', 'Head / HTTP/1.0', '\nHEAD / HTTP/1.0', ' \nHEAD / HTTP/1.0',#40 ' HEAD / HTTP/1.0', 'HEAD / HQWERTY/1.0', # 'HEAD http://some.host.com/ HTTP/1.0', # 'HEAD hTTP://some.host.com/ HTTP/1.0', # 'HEAD http://some.host.com HTTP/1.0', 'HEAD %s HTTP/1.0' % url, #'HEAD hTTP://$url/ HTTP/1.0', #'HEAD http://$url HTTP/1.0', 'HEAD %s' % url, 'HEAD http:// HTTP/1.0', 'HEAD http:/ HTTP/1.0', 'HEAD http: HTTP/1.0', 'HEAD http HTTP/1.0', 'HEAD h HTTP/1.0', # 'HEAD HTTP://some.host.com/ HTTP/1.0', #'HEAD HTTP://$url/ HTTP/1.0', 'HEAD HTTP://qwerty.asdfg.com/ HTTP/1.0', #50 'GET GET GET', 'HELLO', # 'HEAD%00 / HTTP/1.0', 'GET \0 / HTTP/1.0', 'GET / \0 HTTP/1.0', 'GET / HTTP/1.0\0', 'GET / H', ' GET / HTTP/1.0', ' '*1000 + 'GET / HTTP/1.0', 'GET'+' '*1000+'/ HTTP/1.0', 'GET '+'/'*1000+' HTTP/1.0', #60 'GET /'+' '*1000+'HTTP/1.0', 'GET / '+'H'*1000+'TTP/1.0', 'GET / '+'HTTP'+'/'*1000+'1.0', 'GET / '+'HTTP/'+'1'*1000+'.0', 'GET / '+'HTTP/1'+'.'*1000+'0', 'GET / '+'HTTP/1.'+'0'*1000, 'GET / HTTP/1.0' + ' ' * 1000, '12345 GET / HTTP/1.0', '12345 / HTTP/1.0', # check if \0 is really a null '\0',#70 '\0'*1000, '\0'+'GET / HTTP/1.0', '\0'*1000+'GET / HTTP/1.0', '\r\n'*1000+'GET / HTTP/1.0', 'Get / HTTP/1.0', 'GET\0/\0HTTP/1.0', 'GET . HTTP/1.0', 'GET index.html HTTP/1.0', # is this legal? 'GET / HTTP/1.', '', #80 ' ', ' '*1000, '/', '/' * 1000, 'GET FTP://asdfasdf HTTP/1.0', 'GET / HTTP/1.0 X', # any or all parts or request URL encoded #>>> [hex(ord(x)) for x in "GET / HTTP/1.0"] #['0x47', '0x45', '0x54', '0x20', '0x2f', '0x20', '0x48', '0x54', '0x54', '0x50', '0x2f', '0x31', '0x2e', '0x30'] '%47ET / HTTP/1.0', '%47%45%54 / HTTP/1.0', 'GET %2f HTTP/1.0', 'GET %2F HTTP/1.0', #90 'GET%20/ HTTP/1.0', 'GET / FTP/1.0', 'GET \ HTTP/1.0', # windows style #'GET \./', #'GET \.\.\.\.\.\.\.\.\.\.\.\.\.\.\.\.\.\. HTTP/1.0' 'GET C:\ HTTP/1.0', 'HTTP/1.0 / GET', # and other permutations # try various escape sequences from c etal # \a = bell # \b = back space? 'ALL YOUR BASE ARE BELONG TO US', 'GET "/" HTTP/1.0', "GET '/' HTTP/1.0", 'GET `/` HTTP/1.0', '"GET / HTTP/1.0"', #100 '"GET / HTTP/1.0', '"GET" / HTTP/1.0', '""GET / HTTP/1.0', 'GEX\bT / HTTP/1.0', # espace characters ) #print len(malformed_methods) for index, mm in zip(range(len(malformed_methods)), malformed_methods): req = request(url) req.adhoc_method_line = mm res = req.submit() get_characteristics('MALFORMED_'+('000'+str(index))[-3:], res) def large_binary_searcher(url, large_helper, largest, guesses=[]): ranges = [(x,large_helper(url, x)) for x in [1]+guesses+[largest]] while 1: halfways = find_halfways(ranges) if not halfways: break for hw in halfways: ranges.append((hw, large_helper(url, hw))) ranges.sort() ranges = minimize_ranges(ranges) return ranges def find_halfways(ranges): # assumes they are sorted grouped_ranges = [] for r in ranges: if len(grouped_ranges) == 0: grouped_ranges.append([r]) continue if r[1] == grouped_ranges[-1][-1][1]: grouped_ranges[-1].append(r) continue grouped_ranges.append([r]) halfways = [] for i in range(len(grouped_ranges) - 1): largest_previous = grouped_ranges[i][-1] smallest_next = grouped_ranges[i+1][0] if (smallest_next[0] - largest_previous[0]) == 1: continue hw = ((smallest_next[0] - largest_previous[0]) / 2) + largest_previous[0] if VERBOSE: print largest_previous, hw, smallest_next halfways.append(hw) return halfways def minimize_ranges(ranges): # assumes they are sorted # TODO: this is the same code as above just copied!!! grouped_ranges = [] for r in ranges: if len(grouped_ranges) == 0: grouped_ranges.append([r]) continue if r[1] == grouped_ranges[-1][-1][1]: grouped_ranges[-1].append(r) continue grouped_ranges.append([r]) minimized = [] for gr in grouped_ranges: minimized.append(gr[0]) if len(gr) > 1: minimized.append(gr[-1]) return minimized # TODO: maybe do this recursively???? # TODO: remember that header size et all are configurable in apache def long_url_helper(url,size): #long_name = 'a'*size req = request(url,local_uri=('/'+('a'*size))) res = req.submit() get_characteristics('LONG_URL_RANGES', res) return res.response_code # TODO: note that don't call get_characteristics # since don't have a response to deal with here def long_url_ranges(url): # TODO: base these on "best guess" of what talking to # e.g. if think it's apache 1.3.9 then use those to avoid # so many long requests initial_guesses = [99,100,201,202,208,209,210,211,254,255,256, 765,766, 8079,8080,8176,8177] ranges = large_binary_searcher(url, long_url_helper, 10000, guesses=initial_guesses) add_characteristic('SEMANTIC','LONG_URL_RANGES', ranges) def long_default_helper(url,size): req = request(url,local_uri=('/'*size)) res = req.submit() get_characteristics('LONG_DEFAULT_RANGES', res) return res.response_code def long_default_ranges(url): ranges = large_binary_searcher(url, long_default_helper, 10000) add_characteristic('SEMANTIC','LONG_DEFAULT_RANGES', ranges) def many_header_helper(url,size): req = request(url) for i in range(size): req.add_header('HEADER'+('0000000000'+str(i)[-10:]), ('0000000000'+str(i))[-10:] ) res = req.submit() get_characteristics('MANY_HEADER_RANGES', res) return res.response_code def many_header_ranges(url): initial_guesses = [99,100,228,229,] ranges = large_binary_searcher(url, many_header_helper, 10000, guesses=initial_guesses) add_characteristic('SEMANTIC','MANY_HEADER_RANGES', ranges) def large_header_helper(url,size): req = request(url) req.add_header('LARGE_HEADER', 'a'*size ) res = req.submit() get_characteristics('LARGE_HEADER_RANGES', res) return res.response_code def large_header_ranges(url): initial_guesses = [8176,8177,] ranges = large_binary_searcher(url, large_header_helper, 10000, guesses=initial_guesses) add_characteristic('SEMANTIC','LARGE_HEADER_RANGES', ranges) def unavailable_accept(url): req = request(url) req.add_header('Accept', 'qwer/asdf') res = req.submit() get_characteristics('unavailable_accept', res) def fake_content_length(url): req = request(url) req.add_header('Content-Length', '1000000000') req.body = 'qwerasdfzxcv' res = req.submit() get_characteristics('fake_content_length', res) # TODO: put this global declaration somewhere easier to find.... fingerprint = {'LEXICAL' : {}, 'SYNTACTIC' : {}, 'SEMANTIC' : {},} def add_characteristic(category,name,value,data_type=None): # just add if not already in there if not fingerprint[category].has_key(name): # TODO: probably don't need a data type just look at data... if data_type == 'LIST': value = [value] fingerprint[category][name] = value return # don't duplicate if fingerprint[category][name] == value: return # create or add to list as necessary if type(fingerprint[category][name]) != type([]): fingerprint[category][name] = [fingerprint[category][name],value] elif value not in fingerprint[category][name]: fingerprint[category][name].append(value) def get_characteristics(test_name, res): if VERBOSE: print 'processing', test_name response_code, response_text = res.return_code() claimed_servername = res.servername() if response_code not in ['NO_RESPONSE_CODE', 'NO_RESPONSE']: add_characteristic('LEXICAL',response_code,response_text) add_characteristic('LEXICAL','SERVER_NAME', claimed_servername) if test_name.endswith('RANGES'): return # only need the code and text if res.has_header('Allow'): data = res.header_data('Allow') add_characteristic('SYNTACTIC','ALLOW_ORDER',data) if res.has_header('Public'): data = res.header_data('Public') add_characteristic('SYNTACTIC','PUBLIC_ORDER',data) if res.has_header('Vary'): data = res.header_data('Vary') add_characteristic('SYNTACTIC','VARY_ORDER',data) if test_name.startswith('MALFORMED_'): add_characteristic('SEMANTIC',test_name, response_code) if response_code not in ['NO_RESPONSE_CODE', 'NO_RESPONSE']: header_names = res.header_names() add_characteristic('SYNTACTIC', 'HEADER_ORDER', header_names, data_type='LIST') else: ### Added by APR to solve a wierd exception.... add_characteristic('SYNTACTIC', 'HEADER_ORDER', [], data_type='LIST') if res.has_header('ETag'): data = res.header_data('ETag') add_characteristic('SYNTACTIC', 'ETag', data) elif res.has_header('Etag'): data = res.header_data('Etag') add_characteristic('SYNTACTIC', 'ETag', data) # 'HEADER_ORDER': [ [ 'Date', # 'Server', # 'Last-Modified', # 'ETag', # 'Accept-Ranges', # 'Content-Length', # 'Connection', # 'Content-Type'], # [ 'Date', # 'Server', # 'Content-Length', # 'Allow', # 'Connection'], # [ 'Date', # 'Server', # 'Allow', # 'Connection'], # ['Date', 'Server', 'Connection'], # [ 'Date', # 'Server', # 'Connection', # 'Transfer-Encoding'], # [ 'Date', # 'Server', # 'Alternates', # 'Vary', # 'TCN', # 'Connection']], # clean up redundancies in lists of lists def winnow_ordered_list(ordered_list): #print ordered_list if len(ordered_list) < 2: #print 'ordered_list too small to look at' return ordered_list.sort(lambda a,b: cmp(len(a), len(b))) #print 'sorted order', ordered_list index = 0 result = [] for (index, elem) in zip(range(len(ordered_list) - 1),ordered_list): is_ok = 1 for other in ordered_list[index+1:]: if is_partial_ordered_sublist(elem, other): #print elem,'is sublist of', other is_ok = 0 break if is_ok: result.append(elem) result.append(ordered_list[-1]) #print result return result def is_partial_ordered_sublist(small,large): if len(small) > len(large): return 0 if small == large: return 1 presort = [] try: presort = [large.index(x) for x in small] except ValueError: return 0 postsort = presort[:] postsort.sort() #print presort, postsort if -1 in presort or presort != postsort: return 0 return 1 ###################################################################### # Functions for comparing to known profiles # def find_most_similar(known_servers, subject): scores = [] #TODO: make each of these it's own function.... for server in known_servers: matches = 0 mismatches = 0 unknowns = 0 # LEXICAL codes = ('200', '207', '301', '302', '400', '401', '403', '404', '405', '406', '411', '413', '414', '500', '501',) for code in codes: known_server_text = '' subject_server_text = '' if server['LEXICAL'].has_key(code): known_server_text = server['LEXICAL'][code] if subject['LEXICAL'].has_key(code): subject_server_text = subject['LEXICAL'][code] if known_server_text == '' or subject_server_text == '': unknowns += 1 elif known_server_text == subject_server_text: matches += 1 else: mismatches += 1 # SYNTACTIC # allow order known_server_allows = '' subject_server_allows = '' if server['SYNTACTIC'].has_key('ALLOW_ORDER'): known_server_allows = server['SYNTACTIC']['ALLOW_ORDER'] if subject['SYNTACTIC'].has_key('ALLOW_ORDER'): subject_server_allows = subject['SYNTACTIC']['ALLOW_ORDER'] if known_server_allows and subject_server_allows: if known_server_allows == subject_server_allows: matches += 1 else: mismatches += 1 else: unknowns += 1 ## etag match #check if server has ETag and subject has ETag # if either not then unknonw #if subject matches server by regex # matches += 1 #else # mismatches += 1 # SEMANTIC # malformed_??? for num in range(105): malformed = 'MALFORMED_' + ('000'+str(num))[-3:] known_server_mal = server['SEMANTIC'][malformed] subject_server_mal = subject['SEMANTIC'][malformed] if known_server_mal == subject_server_mal: matches += 1 else: mismatches += 1 # long ranges known_server_long_url = server['SEMANTIC']['LONG_URL_RANGES'] subject_server_long_url = subject['SEMANTIC']['LONG_URL_RANGES'] if known_server_long_url == subject_server_long_url: matches += 1 #print 'LONG_URL_RANGES match', server['LEXICAL']['SERVER_NAME'] #print known_server_long_url else: mismatches += 1 # long default "/" ranges known_server_long_default = server['SEMANTIC']['LONG_DEFAULT_RANGES'] subject_server_long_default = subject['SEMANTIC']['LONG_DEFAULT_RANGES'] if known_server_long_default == subject_server_long_default: matches += 1 #print 'LONG_URL_DEFAULT_RANGES match', server['LEXICAL']['SERVER_NAME'] #print known_server_long_default else: mismatches += 1 # unique header exists # e.g. X-Pad, etc - or just do ALL known headers.... scores.append([server, (matches,mismatches,unknowns)]) return scores # [a,b,c,e,f] and [a,c,d,f,g] are both ordered the same..... # -1/0/1 = no/maybe/yes # TODO: get this working.... def partial_same_order(list1, list2): common = {} #print 'comparing lists: ',list1,list2 for x in list1+list2: if x not in common: common[x] = 0 common[x] += 1 common_items = {} #common_items = [common_items[k] = v for k,v in common if v == 2] for k,v in common: if v == 2: common[k] = v common1 = [] # is there a simple way?? common2 = [] for i in list1: if common_items.has_key(i): common1.append(i) for i in list2: if common_items.has_key(i): common2.append(i) #print common1,common2 if common1 == []: return 0 elif common1 == common2: return 1 else: return -1 def usage(): print """ hmap is a web server fingerprinter. hmap [-hpgn] {url | filename} e.g. hmap http://localhost:82 hmap -p www.somehost.net.80 -h this info... -n show this many of the top possible matches -p run with a prefetched file -g gather only (don't do comparison) -c show this many closest matches """ sys.exit() ###################################################################### # This was added by Andres Riancho to make hmap work inside w3af # it is a "copy" of the "main" with a lot of default parameters :P VERBOSE = 0 PORT = 80 useSSL = False def testServer( ssl, server, port, matchCount, generateFP ): global VERBOSE global PORT global useSSL VERBOSE = 0 PORT = port useSSL = ssl MATCH_COUNT = matchCount fingerprintDir = 'plugins/discovery/oHmap/known.servers/' # Get the fingerprint target_url = server fp = get_fingerprint(target_url) # Read the fingerprint db known_servers = [] for f in glob.glob(fingerprintDir+'*'): ksf = file(f) ks = eval(ksf.read()) known_servers.append(ks) ksf.close() # Generate the fingerprint file if generateFP: try: fd = open( 'hmap-fingerprint-' + server , 'w' ) except: raise w3afException('Cannot open fingerprint file.') else: import pprint pp = pprint.PrettyPrinter(indent=4) pprint.PrettyPrinter(stream=fd).pprint(fp) fd.close() # Compare scores = find_most_similar(known_servers, fp) def score_cmp(score1,score2): (server1, (matches1,mismatches1,unknowns1)) = score1 (server2, (matches2,mismatches2,unknowns2)) = score2 if -cmp(matches1,matches2) != 0: return -cmp(matches1,matches2) return cmp (server1,server2) scores.sort(score_cmp) res = [] for (server, (matches,mismatches,unknowns)) in scores[:MATCH_COUNT]: res.append(server['LEXICAL']['SERVER_NAME']) return res