#!/usr/bin/env python
#-*-coding:utf-8-*-
#
# $Id: sitered,v 0.4 2009/03/21 15:47:55 st3f Exp $
#
# Site reduction is a term for telling a search engine to give all indexed
# pages for a specific domain (with the site-operator) except the ones
# that start with "www". For Google and Live Search this is done with the
# query string "site:example.tld -site:www.example.tld".
#
# This program performs site reduction for one or more domains, parses the
# results and outputs it in a nice grep(1)-friendly format. It is useful
# for discovering new hosts under a domain in a covert manner.
#
# Copyright (c) Stefan Pettersson 2008-2009, http://www.bigpointyteeth.se/
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
import re, os, sys
import optparse
import httplib
import time
################################################
# functions
################################################
def live_site_reduction(domain):
# the raw html data recieved from live searches
data = ""
verbose(2, "Connecting to Live Search to do '%s'" % domain)
h = httplib.HTTPConnection("search.live.com")
# we need english so that the parsing will work properly
hdr = {"User-agent":useragent,"Accept-Language":"en-us,en;q=0.5"}
regex = re.compile(r'
.*?')
# temp storage for search results
live_raw_results = []
# loop through the results pages
next_exists = True
page = 0
while next_exists and page < options.maxpages:
next_exists = False
verbose(2, "Getting page %d on Live Search" % page)
# search string: "site:domain.tld -site:www.domain.tld"
url = "/results.aspx?q=site%3A" + domain + "%20-site%3Awww." + domain + "&first=" + str(10 * page + 1)
verbose(3, "Search URL: %s" % url)
h.request("GET", url, headers=hdr)
data = h.getresponse().read()
live_raw_results += regex.findall(data)
# increment and determine if there are more pages
if data.find("\">Next") > 0:
page += 1
next_exists = True
# sleep for two seconds (polite)
time.sleep(2)
h.close()
verbose(1, "Got %d raw results from a %d-page search for %s'" % (len(live_raw_results), page, domain))
# final storage for search results
live_results = []
for vhost in live_raw_results:
# strip the characters that come with the regex
name = vhost[10:-12]
verbose(3, "Found URL %s" % name)
# TODO maybe we should strive to keep the https:// if it is there
if name.startswith("https://"):
name = name[8:]
if "/" in name:
name = name[:name.find("/")]
if name not in live_results:
verbose(2, "Added %s" % name)
live_results.append(name)
return live_results
def fatal(msg):
print>>sys.stderr,"%s: %s" % (me, msg)
sys.exit(1)
def error(msg):
print>>sys.stderr,"%s: %s" % (me, msg)
def verbose(lvl, msg):
"""Take verbosity level 'lvl' and a message 'msg' as input. If the
verbosity level is higher that the desired, 'verbosity', print the
message to stdout."""
if lvl <= options.verbosity:
print msg
def print_version(option, opt, value, parser):
print "%s %s" % (me, version)
sys.exit(0)
################################################
# globals
################################################
me = os.path.basename(sys.argv[0])
version = "$Revision: 0.4 $"
useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
# list of host addresses to analyse
addresses = []
# dictionary that will hold the results
results = {}
################################################
# parse command line options
################################################
usage = "(1) %prog [opts] [,domain]\n" +\
" (2) %prog [opts] "
desc = "%prog is used to find host names used under a specific domain " +\
"by using site reduction against Microsoft Live Search."
optparser = optparse.OptionParser(description=desc, usage=usage)
optparser.add_option("-o", type="string", dest="outfile", metavar="FILE",\
help="write output to FILE, verbose and debug output not included")
optparser.add_option("-p", type="int", dest="maxpages", metavar="PAGES", default=10,\
help="never go beyond PAGES number of results pages (default: %default)")
optparser.add_option("-v", action="count", dest="verbosity", default=0,\
help="print verbose output (can be used several times)")
optparser.add_option("-V", action="callback", callback=print_version,\
help="print version information")
(options, args) = optparser.parse_args()
################################################
# prepare domain list according to mode
################################################
# only one positional argument is accepted
if len(args) == 1:
# we fist try to open a file with args[0] as file name
try:
# this file contains a list of host names and ip addresses
verbose(2, "Trying to open '%s' as a file" % args[0])
inputfile = open(args[0], "rU")
for line in inputfile.readlines():
# skip comments and blank lines
line = line.strip()
if line == "" or line.startswith("#"):
# skip it
continue
else:
# add it
domains.append(line)
inputfile.close()
except IOError:
# okay, it's not a file name
verbose(2, "Failed to open '%s' as a file" % args[0])
verbose(2, "Trying to use %s as a comma-separated list of domains" % args[0])
# we assume this is a comma-separated list of host addresses
# split up the comma-separated list of host addresses
domains = args[0].split(",")
else:
# bad (number of) arguments
optparser.print_help()
sys.exit(-1)
# open handle for output file
if options.outfile:
# die if output file already exists
if os.access(options.outfile, os.F_OK):
fatal("file '%s' already exists" % options.outfile)
try:
outputfile = open(options.outfile, "wt")
verbose(1, "Saving output to '%s'" % options.outfile)
except IOError:
fatal("cannot open file '%s'" % options.outfile)
else:
outputfile = None
################################################
# analyse the addresses
################################################
verbose(1, "Doing queries for %s domain(s)" % len(domains))
for domain in domains:
verbose(2, " %s" % domain)
# analyse addresses
for domain in domains:
hosts = live_site_reduction(domain)
results[domain] = hosts
# print output from analysis
for key in results.keys():
if len(results[key]) > 0:
for value in results[key]:
print "%s\t%s" % (key, value)
# if we want to save output to a file
if options.outfile:
for value in results[key]:
outputfile.write("%s\t%s\n" % (key, value))
if options.outfile:
outputfile.close()
# eof