'''
detectPhishing.py
Copyright 2006 Andres Riancho
This file is part of w3af, w3af.sourceforge.net .
w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.
w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
'''
import core.controllers.outputManager as om
from core.controllers.basePlugin.baseGrepPlugin import baseGrepPlugin
import core.data.kb.knowledgeBase as kb
import core.data.kb.vuln as vuln
import core.data.parsers.dpCache as dpCache
from core.data.parsers.urlParser import *
from core.data.getResponseType import *
import urllib
class detectPhishing(baseGrepPlugin):
'''
This plugin greps every page and assigns a phishing score to it.
@author: Andres Riancho ( andres.riancho@gmail.com ) ; special thanks to Matthew Hall !
'''
def __init__(self):
baseGrepPlugin.__init__(self)
# User configured variables
self._phishLimit = 65
def _testResponse(self, request, response):
if isTextOrHtml(response.getHeaders()) :
urlScore = self._scoreURL( response )
referenceScore = self._scoreReferences( response )
formsScore = self._scoreForms( response )
bodyScore = self._scoreBody( response )
phishingScore = urlScore + referenceScore + formsScore + bodyScore
if phishingScore > self._phishLimit:
v = vuln.vuln()
v.setURL( response.getURL() )
v.setURI( response.getURI() )
v.setId( response.id )
v.setDesc( 'The URL : '+ response.getURL() + ' seems to be a phishing scam. The phishingScore is: ' + str(phishingScore) )
v['phishingScore'] = phishingScore
kb.kb.append( self, 'phishing', v )
om.out.debug('The URL: ' + response.getURL() + ' returned a phishing score of: ' + str(phishingScore) )
om.out.debug('Detailed scoring: ' )
om.out.debug('The urlScore is: ' + str(urlScore) )
om.out.debug('The referenceScore is: ' + str(referenceScore) )
om.out.debug('The formsScore is: ' + str(formsScore) )
om.out.debug('The bodyScore is: ' + str(bodyScore) )
def _scoreURL( self, response ):
'''
Assign a phishing score to the URL.
@return: A number indicating the phishing score of this page.
'''
urlScore = 0
url = urllib.unquote( response.getURL() )
# Long paths are not common in true bank logins
if getPath( url ).count('/') >= 5:
urlScore += 5
elif getPath( url ) .count('/') >= 7:
urlScore += 8
# Bank logins are almost always https...
if not url.startswith('https://'):
urlScore += 20
# Not so many banks use php, and many scammers do...
if getPath( url ).split('.')[-1:][0].lower().count( 'php' ):
urlScore += 10
# This is a true banking scam , many dots in URL also indicate
# that something is wrong
# http://www.53.com.bankingportal.id57473179.billwartell.info/conf
if getPath( url ).count( '.' ) >= 5:
urlScore += 10
# This catches URLs like http://a0s9g019270q293710293712039.a.com/phish.com/
if len( getPath( url ) ) >= 30:
urlScore += 10
# Hidden dirs or files
if url.count('/.'):
urlScore += 10
# URL's that have much URL encoding are bad also
# http://mal.icio.us/ becomes http://%6D%61%6C%2E%69%63%69%2E%75%73/
if len ( url ) > (1,5 * len( response.getURL() ) ) :
# There is too much encoding in this URL
urlScore += 10
# http://12.34.5.67/ is http://203556163/ in decimal.
if getDomain( url ).isdigit():
urlScore += 25
# awfull banking sites wih xss errors!
# http://www.barclays.com/script.asp?foo=
if url.count('