''' error404page.py Copyright 2006 Andres Riancho This file is part of w3af, w3af.sourceforge.net . w3af is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation version 2 of the License. w3af is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with w3af; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ''' import core.controllers.outputManager as om from core.controllers.basePlugin.baseDiscoveryPlugin import baseDiscoveryPlugin import core.data.kb.knowledgeBase as kb import core.data.parsers.urlParser as urlParser from core.data.fuzzer.fuzzer import * import re from core.controllers.w3afException import w3afRunOnce class error404page(baseDiscoveryPlugin): ''' This plugin reads the 404 page returned by the server. @author: Andres Riancho ( andres.riancho@gmail.com ) ''' def __init__(self): baseDiscoveryPlugin.__init__(self) self._exec = True # Note that \ are escaped first ! self._metachars = ['\\', '.', '^', '$', '*', '+', '?', '{', '[', ']', \ '|', '(', ')','..','\\d','\\D','\\s','\\S','\\w','\\W',\ '\\A', '\\Z', '\\b','\\B'] self._404dirRegexMap = {} def discover(self, fuzzableRequest ): ''' Get a page that doesnt exist and generate the is404 function @parameter fuzzableRequest: A fuzzableRequest instance that contains (among other things) the URL to test. ''' domPath = urlParser.getDomainPath( fuzzableRequest.getURL() ) if domPath not in self._404dirRegexMap.keys(): response = self._generate404( fuzzableRequest.getURL() ) count = 3 while response.getCode() != 404 and count != 0: count -= 1 self._generate404( fuzzableRequest.getURL() ) msg = 'The 404 page for the directory: ' + domPath + ' was identified.' if count==0: # Not using 404 in error pages om.out.information('Server uses ' + str(response.getCode()) + ' as 404 code.' + msg) else: om.out.debug(msg) # Escape special characters is404regexStr = response.getBody() for c in self._metachars: is404regexStr = is404regexStr.replace( c, '\\'+c ) # For some reason I dont want to know about, ' ' (spaces) must be escaped also is404regexStr = is404regexStr.replace( ' ', '\\ ' ) # If the 404 error showed the URL I requested, replace that with a ".*?" is404regexStr = is404regexStr.replace( self._randAlNum, '.*?' ) is404regexStr = '^' + is404regexStr + '$' self._404dirRegexMap[ domPath ] = is404regexStr kb.kb.save( self, '404', self.is404 ) return [] def is404( self, httpResponse ): domPath = urlParser.getDomainPath( httpResponse.getURL() ) if domPath in self._404dirRegexMap.keys(): # Get the regex string from the map is404regexStr = self._404dirRegexMap[ domPath ] # Do the matching... if re.search( is404regexStr, httpResponse.getBody(), re.DOTALL ): return True else: return False else: # Not generated the is404regexStr for this directory yet if httpResponse.getCode() == 404: return True else: return False def _generate404( self, url ): baseUrl = urlParser.baseUrl( url ) self._randAlNum = createRandAlNum() url404 = urlParser.urlJoin( baseUrl , self._randAlNum ) response = self._urlOpener.GET( url404, useCache=True, grepResult=False ) return response def getOptionsXML(self): ''' This method returns a XML containing the Options that the plugin has. Using this XML the framework will build a window, a menu, or some other input method to retrieve the info from the user. The XML has to validate against the xml schema file located at : w3af/core/ui/userInterface.dtd @return: XML with the plugin options. ''' return '\ \ \ ' def setOptions( self, OptionList ): ''' This method sets all the options that are configured using the user interface generated by the framework using the result of getOptionsXML(). @parameter OptionList: A dictionary with the options for the plugin. @return: No value is returned. ''' pass def getPluginDeps( self ): ''' @return: A list with the names of the plugins that should be runned before the current one. ''' return [] def getLongDesc( self ): ''' @return: A DETAILED description of the plugin functions and features. ''' return ''' This plugin generates a regular expression that identifies 404 error pages. The regular expression is generated for every new directory that is tested. Other plugins can use this knowledge to identify 404 pages. '''