''' html.py Copyright 2006 Andres Riancho This file is part of w3af, w3af.sourceforge.net . w3af is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation version 2 of the License. w3af is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with w3af; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ''' import core.controllers.outputManager as om from plugins.grep.passwordProfilingPlugins.basePpPlugin import basePpPlugin from core.data.getResponseType import * import sgmllib class html(basePpPlugin): ''' This plugin creates a map of possible passwords by reading html responses. @author: Andres Riancho ( andres.riancho@gmail.com ) ''' def __init__(self): basePpPlugin.__init__(self) def getWords(self, response): ''' Get words from the body, this is a modified "strings" that filters out HTML tags. @parameter body: In most common cases, an html. Could be almost anything. @return: Two map of strings:repetitions. One for titles and one for words. ''' if isTextOrHtml( response.getHeaders() ): sp = simpleParser() try: sp.parse( response.getBody() ) except: # If this plugin couldnt parse the document, return None. This will indicate passwordProfiling.py to # continue to the next pp plugin. return None else: res = {} data = sp.getData() # I think that titles have more password material that normal data: titles = sp.getTitles() for t in titles.keys(): titles[ t ] *= 5 # join both maps for i in titles.keys(): res[i] = titles[i] for i in data.keys(): res[i] = data[i] return res class simpleParser(sgmllib.SGMLParser): "A simple parser class." def parse(self, s): "Parse the given string 's'." self.feed(s) self.close() def __init__(self, verbose=0): sgmllib.SGMLParser.__init__(self, verbose) self._data = [] self._titles = [] self._inTitle = False def handle_data(self, data): "Handle the textual 'data'." if self._inTitle: self._titles.append( data ) else: self._data.append(data) def start_title( self, data): "Handle titles." self._inTitle = True def end_title( self ): "Handle titles." self._inTitle = False def _parseStrings( self, stringList ): res = {} for d in stringList: d = d.replace('>', ' ') d = d.replace('<', ' ') splitted = d.split(' ') for chunk in splitted: if chunk.isalnum() and len(chunk) >= 4: if chunk in res.keys(): res[ chunk ] += 1 else: res[ chunk ] = 1 return res def getData(self): "Return a map of string:repetitions" return self._parseStrings( self._data ) def getTitles( self ): "Return a map of string:repetitions" return self._parseStrings( self._titles )