#!/usr/bin/env python
#-*-coding:utf-8-*-
#
# $Id: mresolve,v 1.30 2010/02/11 09:42:23 stef Exp $
#
# mresolve is yet another program to enumerate domain names. It works in
# three main modes. The first is used to resolve a list of names and IP
# addresses held in an input file. The file can contain both host names
# and IP addresses, mresolve will act properly. A list of IP addresses
# to reverse resolve can be generated with nmap. The second mode is used
# to find new host names by using a dictionary to guess for subdomain
# names. The third mode is used to find which TLD's a domain name exists
# below.
#
# What's especially nice with mresolve is that the output doesn't leave
# any information out and that quirks such as aliases (CNAME) and host
# names with multiple A records are respected. Also, if a host name is
# sucessfully forward-resolved to an IP address, that address can
# optionally be put back into the mix and be reverse resolved to find its
# reverse name.
#
# Copyright (c) Stefan Pettersson 2008-2010, http://www.bigpointyteeth.se/
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# TODO - when doing TLD grinding we should not do regular A lookups since
#        e.g. nti.se is not available but www.nti.se is (NS instead?)
#      - make sure an output file is not saved when no results were found
#      - implement proper IP-range parsing (IPy.py)
#      - sort after IP before results output
#

import os, sys
import optparse
import re
import thread
import time
import socket
import random


################################################
#                 functions
################################################

def printmatrix(listoflists, _output=None, _header=False):
    """Take a list of lists as input and output a pretty matrix with
    straight columns. If _output is set to a file descriptor, write the
    same data to it. Optionally use the first row as a header and print
    a line after it."""
    cols = map(None, *listoflists)
    widths = [max([len(str(item)) for item in col]) for col in cols]

    for row in listoflists:
        for i in range(len(widths)):
            # entries without alias(es) are smaller, skip index errors
            try:
                printrow = row[i] + " " * (widths[i] - len(row[i]))
                print printrow,
                if _output:
                    _output.write(printrow + " ")
            except IndexError:
                pass
        print ""
        if _output:
            _output.write(os.linesep)
        if _header:
            totalwidth = 0
            for i in widths:
                totalwidth += i
            line = "-" * (totalwidth + len(widths) - 1)
            print line
            if _output:
                _output.write(line + os.linesep)
            _header = False

def printcsv(listoflists, _output=None, _sep=";"):
    """Take a list of list as input and output as a matrix with comma-
    separated values. If '_output' is set to a file descriptor, write
    the same data to it. '_sep' is the separator used for the values."""
    
    for row in listoflists:
        for col in row:
            sys.stdout.write(col + _sep)
            if _output:
                _output.write(col + _sep)
        if _output:
            _output.write("\n")
        print ""


def splitlist(list, n):
    """Take a (large) list and an integer n as input. Distribute the items
    of this list to n smaller list and return a list of these lists.
    Ordering will be destroyed and the lists will be of different length
    depending on how many items list is of and how many lists are
    returned."""
    returnlist = []
    for i in range(n): returnlist.append([])
    while list:
        for sublist in returnlist:
            try: sublist.append(list.pop())
            except IndexError: break
    return returnlist
		
def resolvelist(id, hostnames):
    """Take an integer id and a list hostnames as input. The hostnames in
    hostnames are resolved sequentially in a seperate thread. The results
    are appended to a shared list, results, in the format:
    [[hostname, ip, [alias, ...]], [hostname, ip, [alias, ...]], ...]."""
    verbose(2, "Thread %s taking on %s hostnames" % (id, len(hostnames)))
    for hostname in hostnames: 
        if options.wait > 0:
            verbose(3, "Thread %s sleeping for %s seconds" % (id, options.wait))
            time.sleep(float(options.wait))
        verbose(2, "Thread %s resolving %s" % (id, hostname))
        try:
            # if it's an IP we use byaddr else byname_ex
            if ip_re.match(hostname):
                tmp = socket.gethostbyaddr(hostname)
            else:
                tmp = socket.gethostbyname_ex(hostname)
        except socket.herror:
            # this exception is raised when an ip address does not have a
            # reverse name associated with it or when the query times out
            verbose(3, "Thread %d failed to resolve %s due to socket.herror: %s" %\
                    (id, hostname, sys.exc_info()[1].args[1]))
            continue
        except socket.gaierror:
            # this exception is raised when a host name does not have an ip
            # address associated with it, this will be raised all the time
            #error("got '%s' for %s" % (sys.exc_info()[1].args[1], hostname))
            verbose(3, "Thread %d failed to resolve %s due to socket.gaierror: %s" %\
                    (id, hostname, sys.exc_info()[1].args[1]))
            continue
        if tmp != None:
            # tmp is in the format: (hostname, [alias, ...], [ipaddr, ...])
            for ip in tmp[2]:
                verbose(2, "Thread %s appending %s" % (id, ip))
                # we reverse resolve the ip address if the option is active
                if not ip_re.match(hostname) and options.reverseresolve:
                    try:
                        rev = socket.gethostbyaddr(ip)[0]
                    except socket.herror:
                        # this will raise the same exceptions as the one above
                        #error("socket.gethostbyaddr(%s) raised '%s'" % (ip, sys.exc_info()[1].args[1]))
                        rev = ""
                # we don't need to if it has already been done for an ip
                elif ip_re.match(hostname):
                    rev = tmp[0]
                # finally, if we chose not to
                else:
                    rev = ""
                if options.threads > 1: append_mutex.acquire()
                results.append([hostname, tmp[0], ip, rev] + tmp[1])
                if options.threads > 1: append_mutex.release()
    verbose(2, "Thread %s exiting" % id)
    # signal that the thread is finished
    if options.threads > 1: exit_mutexes[id].acquire()


def expandips(ips):
    """Take an IP range string as input and return a list of all the IP
    addresses they represent. The format of input is assumed to be
    correct (i.e. matches the following regex).

    ^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(-\d{1,3}|(,\d{1,3})+){0,1}$"""

    # XXX this functionality is still in beta

    octets = ips.split(".")
    iprange = []
    
    # we focus on the last octet
    if "-" in octets[3]:
        (start, end) = octets[3].split("-")
        # check so that start < end
        if int(start) >= int(end):
            fatal("malformed IP range '%s'" % ips)
        for i in range(int(start), int(end)+1):
            ip = "%s.%s.%s.%d" % (octets[0], octets[1], octets[2], i)
            iprange.append(ip)
    elif "," in octets[3]:
        ends = octets[3].split(",")
        for end in ends:
            ip = "%s.%s.%s.%s" % (octets[0], octets[1], octets[2], end)
            iprange.append(ip)
    else:
        iprange.append(ips)

    return iprange


def fatal(msg):
    print>>sys.stderr,"%s: %s" % (me, msg)
    sys.exit(1)


def error(msg):
    print>>sys.stderr,"%s: %s" % (me, msg)


def verbose(lvl, msg):
    """Take verbosity level 'lvl' and a message 'msg' as input. If the
    verbosity level is higher that the desired, 'verbosity', print the
    message to stdout."""
    if lvl <= options.verbosity:
        print msg


def print_version(option, opt, value, parser):
    print "%s %s" % (os.path.basename(sys.argv[0]), version)
    sys.exit(0)


################################################
#                   globals
################################################

me = os.path.basename(sys.argv[0])

version = "$Revision: 1.30 $"

# this will hold the results from the queries
results = [("QUERY", "NAME", "IP", "PTR", "CNAME")]

# this list will hold all the addresses that are to be resolved
addresses = []

################################################
#         parse command line options
################################################

usage = "(1) %prog [opts] <addr file|ip range>\n" +\
        "       (2) %prog [opts] <name file>[,name file] <domain>[,domain]\n" +\
        "       (3) %prog [opts] <domain>[,domain] <name file>[,name file]"

desc = "%prog works in one of three modes: (1) Resolve all IP addresses and " +\
       "host names found in input file (\"mass resolve\"). (2) Try to use each " +\
       "string in name file(s) as a subdomain name for each domain in the comma-" +\
       "separated list (\"subdomain guessing\"). Or (3), opposite of (2), try " +\
       "different topdomains for each domain name (\"TLD guessing\")."

optparser = optparse.OptionParser(description=desc, usage=usage)

optparser.add_option("-c", action="store_true", dest="csv", default=False,\
             help="print output as comma-separated values")
optparser.add_option("-o", type="string", dest="outfile", metavar="FILE",\
             help="write output to FILE, verbose and debug output not included")
optparser.add_option("-p", action="store_true", dest="passive", default=False,\
             help="passive mode, just output the generated list of addresses, do not send any queries")
optparser.add_option("-r", action="store_false", dest="reverseresolve", default=True,\
             help="do not reverse resolve IP addresses found during forward resolving")
optparser.add_option("-t", type="int", dest="threads", default=2, metavar="THREADS",\
             help="use THREADS threads when resolving (default: %default)")
optparser.add_option("-v", action="count", dest="verbosity", default=0,\
             help="print verbose output (can be used up to three times)")
optparser.add_option("-V", action="callback", callback=print_version,\
             help="print version information")
optparser.add_option("-w", type="float", dest="wait", default=0.0, metavar="SECS",\
             help="pause for SECS seconds between each query (default: %default) this overrides threads and will set THREADS=1")

(options, args) = optparser.parse_args()

# wait time overrides threads
if options.wait > 0:
    options.threads = 1


################################################
#    prepare address list according to mode
################################################

if len(args) == 1:
    # we got one positional argument -> mode 1
    # the positional argument is either a file name or an ip address range

    # this regular expression will match "1.2.3.4", "1.2.3.4-5" and "1.2.3.4,7,8"
    ips_re = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(-\d{1,3}|(,\d{1,3})+){0,1}$")

    if ips_re.match(args[0]):
        # we got an address range
        addresses = expandips(args[0])
    else:
        # we got a file name
        try:
            # this file contains a list of host names and ip addresses
            inputfile = open(args[0], "rU")
            for line in inputfile.readlines():
                # skip comments and blank lines
                line = line.strip()
                if line == "" or line.startswith("#"):
                    # skip it
                    continue
                else:
                    # add it
                    addresses.append(line)
            inputfile.close()
        except IOError:
            fatal("cannot open file '%s'" % args[0])

elif len(args) == 2:
    # XXX warning, the code below is quite intricate
    # in a future release, we might want to change the user interface

    # we got two positional arguments -> mode 2 or 3

    # split up what is assumed to be the comma-separated list of input files
    inputfiles = args[0].split(",")
    try:
        # split up the comma-separated list of what is assumed to be domain names
        domains = args[1].split(",")

        # extract names from all the input files
        for filename in inputfiles:
            inputfile = open(filename, "rU")
            for line in inputfile.readlines():
                # skip comments and blank lines
                line = line.strip()
                if line == "" or line.startswith("#"):
                    # skip it
                    continue
                else:
                    # add it
                    for domain in domains:
                        # prepend to each domain name
                        addresses.append("%s.%s" % (line, domain))
            inputfile.close()

        # if we end up here, we're in mode 2

    except IOError:
        # if we end up here, we might be in mode 3 or
        # we have a misspelled file name on our hands

        # let's try to open the second positional instead
        # if this works, we're in mode 3 and the first
        # positional can be split as a ordinary list
        inputfiles = args[1].split(",")
        try:
            # split up the comma-separated list of what is assumed to be domain names
            domains = args[0].split(",")

            # extract names from all the input files
            # TODO 'filename' variable should be sent to the exception!
            for filename in inputfiles:
                inputfile = open(filename, "rU")
                for line in inputfile.readlines():
                    # skip comments and blank lines
                    line = line.strip()
                    if line == "" or line.startswith("#"):
                        # skip it
                        continue
                    else:
                        # add it
                        for domain in domains:
                            # prepend to each domain name
                            addresses.append("%s.%s" % (domain, line))
                inputfile.close()

            # now we're in mode 3

        except IOError:
            # if both fail, we have a bad file name and should bail out
            fatal("cannot open either of '%s' or '%s' as files" % (args[0], args[1]))

else:
    # bad number of arguments
    error("bad number of arguments")
    optparser.print_help()
    sys.exit(1)

# randomize the address list in place
random.shuffle(addresses)

# open handle for output file
if options.outfile:
    # die if output file already exists
    if os.access(options.outfile, os.F_OK):
        fatal("file '%s' already exists" % options.outfile)
    try:
        outputfile = open(options.outfile, "wt")
        verbose(1, "Saving output to '%s'" % options.outfile)
    except IOError:
        fatal("cannot open file '%s'" % options.outfile)
else:
    outputfile = None

# if passive mode, just output the generated address list and exit
if options.passive:
    for address in addresses:
        print address
        if options.outfile:
            outputfile.write(address + os.linesep)
    if options.outfile:
        outputfile.close()
    sys.exit(0)

verbose(1, "Doing queries for %s address(es)" % len(addresses))


################################################
#           perform the guessing
################################################

ip_re = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")

if options.threads > 1:
    verbose(1, "Going with %d threads" % options.threads)
    # mutex to handle locking of the shared results list
    append_mutex = thread.allocate_lock()
    # list of mutexes where threads signal that they are finished
    exit_mutexes = []
    # split list of hostnames in equal parts, one for each thread
    sublists = splitlist(addresses, options.threads)
    # spawn threads that perform the resolving
    # and save the results in list results
    verbose(2, "Spawning %d threads" % options.threads)
    for i in range(options.threads):
        exit_mutexes.append(thread.allocate_lock())
        thread.start_new(resolvelist, (i, sublists[i]))

    # wait for all threads to finish
    verbose(2, "Waiting for threads to finish...")
    for mutex in exit_mutexes:
        while not mutex.locked():
            time.sleep(0.1)
        pass
else:
    # just do it sequentially and store it in results list
    verbose(1, "Doing it with one thread")
    resolvelist(0, addresses)


################################################
#             print the results
################################################

# > 1 since the headers count
if len(results) > 1:
    verbose(1, "Resolved %s address(es)" % (len(results) - 1))
    if options.csv:
        printcsv(results, outputfile)
    else:
        printmatrix(results, outputfile, True)
    if options.outfile:
        outputfile.close()
else:
    print "Sorry mate, none of the addresses resolved."
# eof