#!/usr/bin/env python #-*-coding:utf-8-*- # # $Id: sitered,v 0.4 2009/03/21 15:47:55 st3f Exp $ # # Site reduction is a term for telling a search engine to give all indexed # pages for a specific domain (with the site-operator) except the ones # that start with "www". For Google and Live Search this is done with the # query string "site:example.tld -site:www.example.tld". # # This program performs site reduction for one or more domains, parses the # results and outputs it in a nice grep(1)-friendly format. It is useful # for discovering new hosts under a domain in a covert manner. # # Copyright (c) Stefan Pettersson 2008-2009, http://www.bigpointyteeth.se/ # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # import re, os, sys import optparse import httplib import time ################################################ # functions ################################################ def live_site_reduction(domain): # the raw html data recieved from live searches data = "" verbose(2, "Connecting to Live Search to do '%s'" % domain) h = httplib.HTTPConnection("search.live.com") # we need english so that the parsing will work properly hdr = {"User-agent":useragent,"Accept-Language":"en-us,en;q=0.5"} regex = re.compile(r'
  • .*?
  • ') # temp storage for search results live_raw_results = [] # loop through the results pages next_exists = True page = 0 while next_exists and page < options.maxpages: next_exists = False verbose(2, "Getting page %d on Live Search" % page) # search string: "site:domain.tld -site:www.domain.tld" url = "/results.aspx?q=site%3A" + domain + "%20-site%3Awww." + domain + "&first=" + str(10 * page + 1) verbose(3, "Search URL: %s" % url) h.request("GET", url, headers=hdr) data = h.getresponse().read() live_raw_results += regex.findall(data) # increment and determine if there are more pages if data.find("\">Next") > 0: page += 1 next_exists = True # sleep for two seconds (polite) time.sleep(2) h.close() verbose(1, "Got %d raw results from a %d-page search for %s'" % (len(live_raw_results), page, domain)) # final storage for search results live_results = [] for vhost in live_raw_results: # strip the characters that come with the regex name = vhost[10:-12] verbose(3, "Found URL %s" % name) # TODO maybe we should strive to keep the https:// if it is there if name.startswith("https://"): name = name[8:] if "/" in name: name = name[:name.find("/")] if name not in live_results: verbose(2, "Added %s" % name) live_results.append(name) return live_results def fatal(msg): print>>sys.stderr,"%s: %s" % (me, msg) sys.exit(1) def error(msg): print>>sys.stderr,"%s: %s" % (me, msg) def verbose(lvl, msg): """Take verbosity level 'lvl' and a message 'msg' as input. If the verbosity level is higher that the desired, 'verbosity', print the message to stdout.""" if lvl <= options.verbosity: print msg def print_version(option, opt, value, parser): print "%s %s" % (me, version) sys.exit(0) ################################################ # globals ################################################ me = os.path.basename(sys.argv[0]) version = "$Revision: 0.4 $" useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" # list of host addresses to analyse addresses = [] # dictionary that will hold the results results = {} ################################################ # parse command line options ################################################ usage = "(1) %prog [opts] [,domain]\n" +\ " (2) %prog [opts] " desc = "%prog is used to find host names used under a specific domain " +\ "by using site reduction against Microsoft Live Search." optparser = optparse.OptionParser(description=desc, usage=usage) optparser.add_option("-o", type="string", dest="outfile", metavar="FILE",\ help="write output to FILE, verbose and debug output not included") optparser.add_option("-p", type="int", dest="maxpages", metavar="PAGES", default=10,\ help="never go beyond PAGES number of results pages (default: %default)") optparser.add_option("-v", action="count", dest="verbosity", default=0,\ help="print verbose output (can be used several times)") optparser.add_option("-V", action="callback", callback=print_version,\ help="print version information") (options, args) = optparser.parse_args() ################################################ # prepare domain list according to mode ################################################ # only one positional argument is accepted if len(args) == 1: # we fist try to open a file with args[0] as file name try: # this file contains a list of host names and ip addresses verbose(2, "Trying to open '%s' as a file" % args[0]) inputfile = open(args[0], "rU") for line in inputfile.readlines(): # skip comments and blank lines line = line.strip() if line == "" or line.startswith("#"): # skip it continue else: # add it domains.append(line) inputfile.close() except IOError: # okay, it's not a file name verbose(2, "Failed to open '%s' as a file" % args[0]) verbose(2, "Trying to use %s as a comma-separated list of domains" % args[0]) # we assume this is a comma-separated list of host addresses # split up the comma-separated list of host addresses domains = args[0].split(",") else: # bad (number of) arguments optparser.print_help() sys.exit(-1) # open handle for output file if options.outfile: # die if output file already exists if os.access(options.outfile, os.F_OK): fatal("file '%s' already exists" % options.outfile) try: outputfile = open(options.outfile, "wt") verbose(1, "Saving output to '%s'" % options.outfile) except IOError: fatal("cannot open file '%s'" % options.outfile) else: outputfile = None ################################################ # analyse the addresses ################################################ verbose(1, "Doing queries for %s domain(s)" % len(domains)) for domain in domains: verbose(2, " %s" % domain) # analyse addresses for domain in domains: hosts = live_site_reduction(domain) results[domain] = hosts # print output from analysis for key in results.keys(): if len(results[key]) > 0: for value in results[key]: print "%s\t%s" % (key, value) # if we want to save output to a file if options.outfile: for value in results[key]: outputfile.write("%s\t%s\n" % (key, value)) if options.outfile: outputfile.close() # eof