''' googleSpider.py Copyright 2006 Andres Riancho This file is part of w3af, w3af.sourceforge.net . w3af is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation version 2 of the License. w3af is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with w3af; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ''' import core.controllers.outputManager as om from core.controllers.w3afException import w3afException from core.controllers.w3afException import w3afRunOnce from core.data.searchEngines.google import google as google from core.controllers.basePlugin.baseDiscoveryPlugin import baseDiscoveryPlugin import core.data.parsers.urlParser as urlParser class googleSpider(baseDiscoveryPlugin): ''' This plugin searches google using google API to get a list of URL's @author: Andres Riancho ( andres.riancho@gmail.com ) ''' ''' Go here to get a API License : http://www.google.com/apis/ Get pygoogle from : http://pygoogle.sourceforge.net/ This plugin wont use proxy/proxy auth/auth/etc settings (for now) @author: Andres Riancho ( andres.riancho@gmail.com ) ''' def __init__(self): baseDiscoveryPlugin.__init__(self) self._run = True # User variables self._key = '' self._resultLimit = 300 def discover(self, fuzzableRequest ): ''' @parameter fuzzableRequest: A fuzzableRequest instance that contains (among other things) the URL to test. ''' newUrls = [] self._fuzzableRequests = [] if not self._run: # This will remove the plugin from the discovery plugins to be runned. raise w3afRunOnce() else: # I will only run this one time. All calls to googleSpider return the same url's self._run = False self._google = google( self._urlOpener, self._key ) domain = urlParser.getDomain( fuzzableRequest.getURL() ) if self._google.isPrivate( domain ): raise w3afException('There is no point in searching google for "site:'+ domain + '" . Google doesnt index private pages.') results = self._google.getNResults('site:'+ domain, self._resultLimit) for res in results: targs = (res.URL,) self._tm.startFunction( target=self._generateFuzzableRequests, args=targs, ownerObj=self ) self._tm.join( self ) return self._fuzzableRequests def _generateFuzzableRequests( self, url ): try: response = self._urlOpener.GET( url, useCache=True, getSize=True ) except KeyboardInterrupt, k: raise k except w3afException, w3: pass fuzzReqs = self._createFuzzableRequests( response ) self._fuzzableRequests.extend( fuzzReqs ) def getOptionsXML(self): ''' This method returns a XML containing the Options that the plugin has. Using this XML the framework will build a window, a menu, or some other input method to retrieve the info from the user. The XML has to validate against the xml schema file located at : w3af/core/ui/userInterface.dtd @return: XML with the plugin options. ''' return '\ \ \ \ \ ' def setOptions( self, optionsMap ): ''' This method sets all the options that are configured using the user interface generated by the framework using the result of getOptionsXML(). @parameter OptionList: A dictionary with the options for the plugin. @return: No value is returned. ''' self._key = optionsMap['key'] self._resultLimit = optionsMap['resultLimit'] def getPluginDeps( self ): ''' @return: A list with the names of the plugins that should be runned before the current one. ''' return [] def getLongDesc( self ): ''' @return: A DETAILED description of the plugin functions and features. ''' return ''' This plugin finds new URL's using google. It will search for "site:domain.com" and requests all the URL's found in the result. Two configurable parameters exist: - resultLimit - key '''