#!/usr/bin/env python
"""
	ww - weaknesses walker
	----------------------
	Choose your tool between the possible ones:
		- flawfinder
		- rats
		- its4
		- vdb
	Then, select the path you want to run them. The output format will be
	the one used by the SATE project: http://samate.nist.gov/index.php/SATE

	Disclaimer:
	This tool doesn't mean to be that useful, it's only to run different type of tools
	and get the answer with a common format.
	

	ww Version 0.1 - Romain Gaucher - http://rgaucher.info

"""
import os,sys,re,popen2
from xml.sax import *   # Need PyXML [http://pyxml.sourceforge.net/]

files_list = []
# lower case
extensions_list = ["c","cc","cpp","cxx","h","hh","hpp","hxx"]
tools_list      = ["flawfinder","rats","its4","vdb"]
format_list     = ["plain","original","sate"]
# basic configuration
t_name    = ""
t_version = ""
file_out  = "out.xml"
format    = "both"
directory = "./"
is_file   = False
xml_store = []

# regular expression for matching the outputs
re_ff = re.compile("^(.+):([\d]+): <b>  \[([\d]+)\] </b> \((.*)\) <i> (.*) </i>$", re.I)
re_it = re.compile("^(.+):([\d]+):\((.*)\) (.+)$",re.I)

def strip_characters(str_buffer):
	return str_buffer.replace('\n','')

def good_extension(file_name):
	"""
		Check whether the file is a possible source code (.c,.h,.cpp, etc.)
	"""	
	if '.' not in file_name:
		return False
	file_name = file_name.lower()
	file_ext  = file_name[file_name.rfind('.')+1:]
	if file_ext in extensions_list:
		return True		
	return False


def get_list_files(directory):
	"""
		Retrieve the list of files with the good extension; will look inside subdirectories
	"""
	global files_list
	names = os.listdir(directory)
	for n in names:
		srcname = os.path.join(directory,n)
		try:
			if os.path.isdir(srcname):
				get_list_files(srcname)
			elif os.path.isfile(srcname) and good_extension(srcname):
				if srcname not in files_list:
					files_list.append(srcname)
		except (IOError, os.error), error_name:
			print "get_list_files(%s) --> " % directory, error_name
				

def launch_process(cmd_str):
	"""
		Launch a processus and return the stdout/stderr output
	"""
	r,w,e = popen2.popen3(cmd_str)
	b1    = e.readlines()
	b2    = r.readlines()
	r.close()
	e.close()
	w.close()
	return (b1,b2) 


def write_sate_xml_ff(ff_id,ff_fname,ff_nb,ff_severity,ff_type,ff_description):
	xml = """
<weakness id="%s">
    <name>%s</name>
    <location path="%s" line="%s"/>
    <grade severity="%s" />
    <output>
        <textoutput>%s</textoutput>
    </output>
</weakness>"""
	return xml % (ff_id,ff_type,ff_fname,ff_nb,ff_severity,ff_description)

def run_flawfinder():
	"""
		Run flawfinder on a given project directory.
		Flawfinder will do the scan of files itself
	"""
	ff_format = ""
	if format == "sate":
		ff_format = "--html"	
	cmd_str = "flawfinder %s %s" % (ff_format,directory)
	(stderr,stdout) = launch_process(cmd_str)
	if len(stdout) > 0 or len(stderr) > 0:
		if len(stdout) == 0:
			# just manage the error
			for l in stderr:
				print "",l
		else:
			# handling the normal output...
			if format == "sate":
				buffer = ''.join(stdout)
				# looking between <ul></ul>
				output = strip_characters(buffer[buffer.find('<ul>')+4 : buffer.rfind('</ul>')])
				# to convert, a vulnerability start with <li> and finish to the </i>
				output = output.split('<li>')
				out_xml_str = ""
				for l in output:
					if re_ff.match(l):
						out = re_ff.search(l)
						ff_fname       = out.group(1)
						ff_line        = out.group(2)
						ff_severity    = out.group(3)
						ff_type        = out.group(4)
						ff_description = out.group(5)
						out_xml_str += write_sate_xml_ff(hash(l), ff_fname, ff_line,ff_severity,ff_type,ff_description)
				
				(foo,get_version) = launch_process("flawfinder --version")
				ff_version = strip_characters(''.join(get_version))
				return "<?xml version=\"1.0\"?>\n<report tool_name=\"flawfinder\" tool_version=\"" + ff_version + "\">" + out_xml_str  + "</report>"					
			else:
				return '\n'.join(stdout)			
	else:
		return False
	return False


# Handle the RATS-XML file with a SAX Parser
class RATSXMLHandler(ContentHandler):
	def __init__(self):
		global xml_store
		self.in_vulnerability = False
		self.in_severity      = False
		self.in_type          = False
		self.in_message       = False
		self.in_file          = False
		self.in_name          = False
		self.in_line          = False
		self.string           = ""
		self.rt_dict          = {}
		self.name             = ""
		xml_store             = []
	def startElement(self, name, attrs):
		if name == 'vulnerability':
			self.in_vulnerability = True
		elif name == 'severity':
			self.in_severity = True
			self.string = ""
		elif name == "type":
			self.in_type = True
			self.string = ""
		elif name == "message":
			self.in_message = True
			self.string = ""		
		elif name == "file":
			self.in_file = True	
		elif name == "name":
			self.in_name = True
			self.string = ""		
		elif name == "line":
			self.in_line = True
			self.string = ""			
	def characters(self, ch):
		self.string += ch
	def endElement(self, name):
		global xml_store
		if name == 'vulnerability':
			self.in_vulnerability = False
			xml_store.append(self.rt_dict)
			self.rt_dict = {}
		elif name == 'severity' and self.in_vulnerability:
			self.in_severity = False
			self.rt_dict["grade"] = get_grade_from_rats(self.string)
			self.string = ""
		elif name == "type" and self.in_vulnerability:
			self.in_type = False
			self.rt_dict["name"] = self.string
			self.string = ""
		elif name == "message" and self.in_vulnerability:
			self.in_message = False
			if "description" not in self.rt_dict:
				self.rt_dict["description"] = self.string
			else:
				self.rt_dict["description"] += "\n" + self.string
			self.string = ""
		elif name == "file" and self.in_vulnerability:
			self.in_file = False
			self.name = ""
		elif name == "name" and self.in_file:
			self.in_name = False
			if "location" not in self.rt_dict:
				self.rt_dict["location"] = []
			self.name = self.string
			self.string = ""		
		elif name == "line":
			self.in_line = False
			self.rt_dict["location"].append((self.name,self.string))
			self.string = ""

def get_grade_from_rats(rt_string):
	rt_string = rt_string.replace(' ','')
	rt_string = rt_string.lower()
	if rt_string == "high":
		return "3"
	elif rt_string == "medium":
		return "2"
	return "1"

def write_sate_xml_dict(elmt):
	"""
		Output the SATE XML form of the xml_store dict form
	"""
	xml = """
<weakness id="%s">
    <name>%s</name>
%s
    <grade severity="%s" />
    <output>
        <textoutput>%s</textoutput>
    </output>
</weakness>"""
	rt_id         = hash('|'.join(str(elmt.values())))
	rt_type       = elmt["name"]
	rt_grade      = elmt["grade"]
	rt_description= elmt["description"]
	rt_loc = ""
	for e in elmt["location"]:
		rt_loc += "    <location path=\"%s\" line=\"%s\" />\n" % (e[0],e[1])
	return xml % (rt_id,rt_type,rt_loc,rt_grade,rt_description)


def run_rats():
	"""
		Run flawfinder on a given project directory.
		Flawfinder will do the scan of files itself
	"""
	rt_format = ""
	if format == "sate":
		rt_format = "--xml"	
	cmd_str = "rats --resultsonly %s %s" % (rt_format,directory)
	(stderr,stdout) = launch_process(cmd_str)
	if len(stdout) > 0 or len(stderr) > 0:
		if len(stdout) == 0:
			# just manage the error
			for l in stderr:
				print "",l
		else:
			# handling the normal output...
			if format == "sate":
				buffer = ''.join(stdout)
				# looking between <ul></ul>
				output = buffer
				parser = make_parser()
				rt_handler = RATSXMLHandler()
				# Tell the parser to use our handler
				parser.setContentHandler(rt_handler)
				parser.feed(output)				
				out_xml_str = ""
				for e in xml_store:
					out_xml_str += write_sate_xml_dict(e)
				(foo,get_version) = launch_process("rats -h")
				rt_version = strip_characters(''.join(get_version[0]))
				rt_version = rt_version.replace('RATS v','')
				rt_version = rt_version.replace(' - Rough Auditing Tool for Security','')			
				return "<?xml version=\"1.0\"?>\n<report tool_name=\"rats\" tool_version=\"" + rt_version + "\">" + out_xml_str  + "\n</report>"					
			else:
				return '\n'.join(stdout)			
	else:
		return False
	return False
	return


def get_grade_from_its(it_string):
	it_string = it_string.replace(' ','')
	it_string = it_string.lower()
	if it_string == "urgent":
		return "3"
	elif it_string == "risky":
		return "2"
	return "1"


def run_its4():
	global xml_store
	"""
		Run ITS4 on a given project directory.
		Using the ww to look at the source code files; TS4 works on a single file
	"""
	it_buff = {}
	it_join = "<br>"

	if not is_file:
		get_list_files(directory)
		for f in files_list:
			cmd_str = "its4 %s" % (f)
			(stderr,stdout) = launch_process(cmd_str)
			if f not in it_buff:
				it_buff[f] = strip_characters(it_join.join(stdout))
	else:
		cmd_str = "its4 %s" % (directory)
		(stderr,stdout) = launch_process(cmd_str)
		it_buff[directory] = strip_characters(it_join.join(stdout))
	
	if format == "sate":
		# Transform the buffers into XML SATE files
		it_out_xml_buffer = ""
		xml_store = []
		for k in it_buff:
			# analyze the report for each file
			it_out_buffer = it_buff[k].split("----------------")
			for r in it_out_buffer:
				it_vuln_dict = {}
				it_file_line = []
				it_description = ""
				lines = r.split("<br>")
				for l in lines:
					if re_it.match(l):
						# description line
						out = re_it.search(l)
						it_name     = out.group(1)
						it_line     = out.group(2)
						it_severity = out.group(3)
						it_type     = out.group(4)
						it_file_line.append((it_name, it_line))
					else:
						it_description += l
				if len(it_file_line) > 0:
					it_vuln_dict["name"]       = it_type
					it_vuln_dict["location"]   = it_file_line
					it_vuln_dict["grade"]      = get_grade_from_its(it_severity)
					it_vuln_dict["description"]= it_description
					xml_store.append(it_vuln_dict)
			
		out_xml_str = ""
		for e in xml_store:
			out_xml_str += write_sate_xml_dict(e)
		(foo,get_version) = launch_process("its4 --version")
		it_version = strip_characters(''.join(get_version[0]))
		it_version = it_version.replace('It\'s the software, stupid! (Security Scanner) Version ','')
		it_version = it_version[:it_version.find(',')+1]
		return "<?xml version=\"1.0\"?>\n<report tool_name=\"its4\" tool_version=\"" + it_version + "\">" + out_xml_str  + "\n</report>"
	else:
		# Just return the concatenated buffers
		it_out_buffer = ""
		for k in it_buff:
			it_out_buffer += "------------------------------------------\n"
			it_out_buffer += "File: %s\n" % k
			it_out_buffer += "\n%s" % it_buff[k].replace("<br>","\n")
		return it_out_buffer					
	return ""

# Levenstein distance
# stolen from m.l. hetland
def ld(a, b):
	n, m = len(a), len(b)
	if n > m:
		a,b = b,a
		n,m = m,n
	current = xrange(n+1)
	for i in xrange(1,m+1):
		previous, current = current, [i]+[0] * m
		for j in xrange(1, n+1):
			add, delete = previous[j] + 1, current[j-1] + 1
			change = previous[j-1]
			if a[j-1] != b[i-1]:
				change +=1
			current[j] = min(add, delete, change)
	return current[n]


def run_vdb():
	global t_name, t_version
	"""
		Looking at the NVD and trying to find vulnerabilities for the given product/version
		The following code is absolutely not optimized...
	"""
	from wwwCall import wwwCall
	http = wwwCall('http://nvd.nist.gov')
	handler = http.get("http://nvd.nist.gov/nvd.cfm?advancedsearch&productstart=10")
	t_name = t_name.lower()
	html = handler.read().lower()
	if t_name not in html:
		# Look for the product with the closest name
		html = re.sub("<.*?>","",html)
		html_list = html.split(' ')
		min_value = 42
		min_word  = "__false__"
		for w in html_list:
			c_value = ld(t_name,w)
			if c_value < min_value:
				min_word = w
				min_value= c_value
		if min_word == "__false__":
			print " ww cannot find such a project in the current NVD"
			return "<report tool_name=\"NVD\" tool_version=\"2.0\">\n<!-- %s not found -->\n</report>" % t_name
		print " NVD: ww couldn't find the exact project name in the NVD and choose to look for '%s'" % min_word		
	# The product exists in the NVD
	handler = http.get("http://nvd.nist.gov/nvd.cfm?advancedsearch&product_command="+t_name)
	html = handler.read().lower()
	buffer = html[html.find("<select name=\"version\">")+23:]
	buffer = buffer[:buffer.find("</select>")]
	html = re.sub("<.*?>","",buffer)
	if t_version in html:	
		# Retrieve the vulnerabilities
		handler = http.post("http://nvd.nist.gov/nvd.cfm?startrow=1",{"Search":"Search","product":t_name,"version":t_version,"resources":"cve"})
		if handler:
			html = handler.read()
			match= "<div class=\"rightbar_title\">"
			html = html[html.find(match)+len(match):]
			html = html[:html.find("<div class=\"footnotes\">")]
			html = html[:html.rfind("<p>&nbsp;</p>")]
			list_vulns = html.split(match)

			out_xml_sate = ""
			xml_fmt = """<weakness id="%s">\n\t<grade severity="%s" />\n\t<output>\n\t\t<textoutput>%s</textoutput>\n\t</output>\n</weakness>\n"""
			
			for vuln_instance in list_vulns:
				vuln_instance = vuln_instance[vuln_instance.find("<em><strong>Summary:</strong></em>"):vuln_instance.rfind('<br>')]
				vuln_instance = re.sub("<.*?>","",vuln_instance)
				v_id          = hash(vuln_instance)
				v_description = vuln_instance[vuln_instance.find("Summary:")+9:vuln_instance.find("Published")]
				v_severity    = vuln_instance[vuln_instance.find("CVSS Severity:")+14:vuln_instance.rfind("CVE")]
				
				v_description = ' '.join(v_description.split())
				v_severity = re.sub("[^a-zA-Z]+","",v_severity)
				if v_severity == "High":
					v_severity = "3"
				elif v_severity == "Medium":
					v_severity = "2"
				else:
					v_severity = "1"
				out_xml_sate += xml_fmt % (v_id,v_severity,v_description)			

			return "<report tool_name=\"NVD\" tool_version=\"2.0\">\n"+ out_xml_sate +"\n</report>"				
	else:
		print " ww cannot find such a project/version in the current NVD"
		return "<report tool_name=\"NVD\" tool_version=\"2.0\">\n<!-- %s version %s not found -->\n</report>" % (t_name,t_version)
	return "<report tool_name=\"NVD\" tool_version=\"2.0\">\n<!-- plouf -->\n</report>"

def help():
	print " Weaknesses Walker: "
	print " Usage: ./ww.py --tool [rats|flawfinder|its4]   --file [fname] --format [sate|original] [directory]"
	print "        ./ww.py --vdb  [project name] [version] --file [fname]"
	print " 'ww' will look at the directory for file names then, run the selected tool against the files or the"
	print " directory, depending on how the tools work. You can keep the original format or use the SATE ones, "
	print " depending on what you want."
	print " The --vdb options is sets to look at the NVD and retrieve the vulnerabilites in SATE format."

def tool_name(n):
	"""
		Check whether the name of the tool is correct or not; if so, return the name (lowered)
	"""
	n = n.lower()
	if n in tools_list:
		return n
	return None

def format_name(n):
	"""
		Check whether the format is correct or not; if so, return the format name (lowered)
	"""
	n = n.lower()
	if n in format_list:
		return n

	
"""
	Main dispatcher
"""
if __name__ == "__main__":
	nargs     = len(sys.argv) 
	if nargs == 8:
		for i in range(nargs):
			s = sys.argv[i]
			if s == "--tool":
				t_name = tool_name(sys.argv[i+1])
			elif s == "--file":
				file_out  = sys.argv[i+1]
			elif s == "--format":
				format    = format_name(sys.argv[i+1])
		directory = sys.argv[nargs-1]
		
		# You can also test with a single file
		if not os.path.isdir(directory) and os.path.isfile(directory):
			is_file = True
			
		# print the information of the current scan
		print " Scanning %s with %s; the output (%s) will be in %s format" % (directory,t_name,file_out,format)
		
		# cleaning the listing
		files_list = []
		# basic dispatcher
		out = None
		try:
			if t_name == "flawfinder":
				out = run_flawfinder()
			elif t_name == "rats":
				out = run_rats()
			elif t_name == "its4":
				out = run_its4()
			else:
				out = run_vdb()
		except KeyboardInterrupt:
			print " Stopped by user"
		
		try:
			f_out = open(file_out,"w")
			f_out.write(out)
			f_out.close()
		except IOError:
			print " Error, cannot write the output file:",file_out
	elif nargs == 6:
		# looking at the NVD
		for i in range(nargs):
			s = sys.argv[i]
			if s == "--vdb":
				t_name    = sys.argv[i+1]
				t_version = sys.argv[i+2]
			elif s == "--file":
				file_out  = sys.argv[i+1]
		try:
			out = run_vdb()
		except KeyboardInterrupt:
			print " Stopped by user"
		
		try:
			f_out = open(file_out,"w")
			f_out.write(out)
			f_out.close()
		except IOError:
			print " Error, cannot write the output file:",file_out	
	else:
		help()
