protocols/vulkan/scripts/htmldiff.orig - platform/hardware/google/gfxstream - Git at Google

 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """ $Id: htmldiff,v 1.62 2016/10/06 10:46:19 dom Exp $
 """

 import atexit
 import cgi
 import http_auth
 import httplib
 import os
 import re
 import surbl
 import sys
 import tempfile
 import tidy
 import urlparse

 from subprocess import Popen, PIPE

 CONTENT_TYPE = "text/html;charset=utf-8"

 Page = """
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
 <head><title>HTML Diff service</title>
 <link rel="stylesheet" href="http://www.w3.org/StyleSheets/base" />
 </head>
 <body>

 <p><a href="http://www.w3.org/"><img src="http://www.w3.org/Icons/w3c_home" alt="W3C"/></a> <a href="http://www.w3.org/2003/Editors">W3C Editors homepage</a></p>

 <h1>Create Diff between HTML pages</h1>
 """
 Page2 = """
 <form method="GET">
 <p>Address of reference document: <input name="doc1" type="url" value="%s" style="width:100%%"/></p>
 <p>Address of new document: <input name="doc2" value="%s"  style="width:100%%"/></p>
 <p><input type="submit" value="get Diff"/></p>
 </form>

 <p><strong>Tip</strong>: if the document uses the W3C convention on linking to its previous version, you can specify only the address of the new document — the previous link will be automatically detected.</p>
 <h2>Diff markings</h2>
 <p>This service relies on <a href="https://www.gnu.org/software/diffutils/">GNU diff</a>. The found differences are roughly marked as follow:
 <ul>
 <li>deleted text is shown in pink with down-arrows (as styled for a &lt;del> element)</li>
 <li>where there is replacement, it’s shown in green with bi-directional arrows,</li>
 <li>where there is newly inserted text, it’s yellow with up arrows (&lt;ins> element)</li>
 </ul>
 <address>
 script $Revision: 1.62 $ of $Date: 2016/10/06 10:46:19 $<br />
 by <a href="http://www.w3.org/People/Dom/">Dominique Hazaël-Massieux</a><br />based on <a href="https://dev.w3.org/cvsweb/2009/htmldiff/htmldiff.pl">Shane McCarron’ Perl script</a> wrapped in a <a href="http://dev.w3.org/cvsweb/2009/htmldiff/">Python CGI</a>
 </address>
 </body>
 </html>
 """

 def checkInputUrl(url):
     checker = surbl.SurblChecker('/usr/local/share/surbl/two-level-tlds','/afs/w3.org/pub/WWW/Systems/Server/debian/generic/usr/local/etc/surbl.whitelist')

     if  url[:5] == 'file:' or len(urlparse.urlparse(url)[0])<2:
         print "Status: 403"
         print "Content-Type: text/plain"
         print
         print "sorry, I decline to handle file: addresses"
         sys.exit()
     elif checker.isMarkedAsSpam(url):
         print "Status: 403"
         print "Content-Type: text/plain; charset=utf-8"
         print
         print "sorry, this URL matches a record known in SURBL. See http://www.surbl.org/"
         sys.exit()

 def copyHeader(copy_func, source, key, header_name=None):
     value = source.get(key)
     if not value:
         return False
     elif header_name is None:
         header_name = key
     copy_func(header_name, value)
     return True

 def setupRequest(source_headers):
     opener = http_auth.ProxyAuthURLopener()
     copyHeader(opener.addheader, source_headers, 'If-Modified-Since')
     copyHeader(opener.addheader, os.environ, 'REMOTE_ADDR', 'X_Forward_IP_Addr')
     return opener

 def tidyFile(file):
     # option for tidy
     options = dict(tidy_mark=0,show_warnings=0,quiet=1,char_encoding='utf8')
     html5 = re.search(r"<!doctype\s+html\s*>", file.read(4096),
                       re.IGNORECASE)
     file.seek(0)
     html5_options = {"add_xml_space": "no",
                      "output_xhtml": "no",
                      "tidy_mark": "no",
                      "new_blocklevel_tags": 'article,aside,canvas,dialog,details,figcaption,figure,footer,header,hgroup,menu,nav,section,main,summary,math,semantics,mrow,mfenced,mtable,mtr,mtd,mi,mn,msub,mo,mfrac,munderover,mtext,svg,g,image,rect,text,desc,line,path,polygon,ellipse,tspan,defs,feoffset,fecolormatrix,filter,fegaussianblur,feblend,marker,circle',
                      "new_inline_tags": 'video,audio,canvas,ruby,rt,rp,time,meter,progress,track,source,emu-val,emu-nt,emu-t,mark',
                      "break_before_br": "no",
                      "vertical_space": "no",
                      "enclose_text": "no",
                      "numeric_entities": "yes",
                      "wrap": "1000",
                      "wrap_attributes": "no",
                      "drop_empty_paras": "no"
                      }
     if html5:
         options.update(html5_options)
     newtidy = tidy.parseString(file.read(), **options)
     if len(newtidy.errors) > 0:
         if not html5:
             file.seek(0)
             options.update(html5_options)
             newtidy = tidy.parseString(file.read(), **options)
     file.close()
     file = tempfile.NamedTemporaryFile(
         mode='w+', prefix='htmldiff-', suffix='.html')
     atexit.register(file.close)
     file.write(str(newtidy))
     file.flush()
     file.seek(0)
     return (file, newtidy.errors)

 def matchPredecessorRel(rel):
     return rel and "predecessor-version" in rel.lower().split(" ")

 def mirrorURL(url, opener):
     try:
         filename, headers = opener.retrieve(url)
     except IOError, error:
         opener.error = "I/O error: %s %s" % (error.errno, error.strerror)
     except httplib.InvalidURL:
         opener.error = "Invalid URL submitted"
     except AttributeError:  # ProxyAuthURLopener returned None.
         pass                # There's already an error set.
     else:
         atexit.register(os.unlink, filename)
         file = open(filename)
         if headers.has_key("content-encoding") and headers["content-encoding"] == "gzip":
             import gzip
             from StringIO import StringIO
             data = StringIO(file.read())
             file.close()
             file = gzip.GzipFile(fileobj=data)
         file,errors = tidyFile(file)
         if len(errors) == 0:
             return (file, headers)
         else:
             opener.error = "Tidy errors: %s" % (str(errors))
     return (None, {})

 def showPage(url1='', url2='', error_html='', **headers):
     for name, value in headers.items():
         print "%s: %s" % (name.replace('_', '-'), value)
     print
     print Page
     print error_html
     print Page2 % (url1, url2)
     sys.exit()

 def serveRequest():
     fields = cgi.FieldStorage()

     if (not fields.has_key('doc2')):
         showPage(Content_Type=CONTENT_TYPE)
     # if doc1 is not specified, we load doc2 to check if it has a previous version link
     doc2 = fields['doc2'].value
     checkInputUrl(doc2)
     url_opener2 = setupRequest(fields.headers)
     newdoc, newheaders = mirrorURL(doc2, url_opener2)
     if fields.has_key('doc1'):
         doc1 = fields['doc1'].value
     elif newdoc is not None:
         from BeautifulSoup import BeautifulSoup

         soup = BeautifulSoup(newdoc.read())
         newdoc.seek(0)
         try:
             doc1 = soup.find(text=re.compile("Previous Version",re.IGNORECASE)).findNext(name="a", attrs={"href":True})["href"]
         except:
             try:
                 doc1 = soup.find(name=["a", "link"], attrs={"href":True, rel:matchPredecessorRel})["href"]
             except:
                 doc1 = None
     else:
         doc1 = None
     if (not doc1):
         showPage(Content_Type=CONTENT_TYPE)

     checkInputUrl(doc1)
     esc1 = cgi.escape(doc1, True)
     esc2 = cgi.escape(doc2, True)
     urlcomponents1 = urlparse.urlparse(doc1)
     urlcomponents2 = urlparse.urlparse(doc2)
     # if same domain, we can use the same urlopener
     # otherwise, we create a separate one
     if urlcomponents2[1] == urlcomponents1[1]:
         url_opener = url_opener2
     else:
         url_opener = setupRequest(fields.headers)

     refdoc, refheaders = mirrorURL(doc1, url_opener)
     if not (refdoc and newdoc):
         http_error = ""
         url = ""
         if not refdoc:
             http_error = url_opener.error
             url = esc1
         else:
             http_error = url_opener2.error
             url = esc2
         if re.match("^[1234][0-9][0-9] ", http_error):
             print "Status: %s" %(http_error)
         error="<p style='color:#FF0000'>An error (%s) occured trying to get <a href='%s'>%s</a>.</p>" % (cgi.escape(http_error), url, url)
         showPage(esc1, esc2, error, Content_Type=CONTENT_TYPE)

     print "Content-Type: text/html"
     if newheaders.has_key('Content-Type'):
         contentType = cgi.parse_header(newheaders["Content-Type"])
         if contentType[1].has_key('charset'):
             charset = contentType[1]['charset'].lower()
             #if charset == "iso-8859-1":
             #    options["char_encoding"]='latin1'

     for proxy_header in ('Last-Modified', 'Expires'):
         if copyHeader(lambda header, value: sys.stdout.write("%s: %s" %(header, value)), newheaders, proxy_header):
             print
     print
     p = Popen(["/usr/local/bin/htmldiff", refdoc.name, newdoc.name],
               stdin=PIPE, stdout=PIPE, stderr=PIPE)
     sys.stdout.flush()
     sys.stderr.flush()
     (out, err) = p.communicate()
     p.stdin.close()
     if err:
         error = "<p style='color:#FF0000'>An error occured when running <code>htmldiff</code> on the documents:</p><pre>%s</pre>" % (cgi.escape(err),)
         showPage(esc1, esc2, error)
     else:
         print out
 if __name__ == '__main__':
     if os.environ.has_key('SCRIPT_NAME'):
         serveRequest()
	#!/usr/bin/python
	# -- coding: utf-8 --
	""" $Id: htmldiff,v 1.62 2016/10/06 10:46:19 dom Exp $
	"""

	import atexit
	import cgi
	import http_auth
	import httplib
	import os
	import re
	import surbl
	import sys
	import tempfile
	import tidy
	import urlparse

	from subprocess import Popen, PIPE

	CONTENT_TYPE = "text/html;charset=utf-8"

	Page = """
	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
	<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
	<head><title>HTML Diff service</title>
	<link rel="stylesheet" href="http://www.w3.org/StyleSheets/base" />
	</head>
	<body>

	<p><a href="http://www.w3.org/"><img src="http://www.w3.org/Icons/w3c_home" alt="W3C"/></a> <a href="http://www.w3.org/2003/Editors">W3C Editors homepage</a></p>

	<h1>Create Diff between HTML pages</h1>
	"""
	Page2 = """
	<form method="GET">
	<p>Address of reference document: <input name="doc1" type="url" value="%s" style="width:100%%"/></p>
	<p>Address of new document: <input name="doc2" value="%s" style="width:100%%"/></p>
	<p><input type="submit" value="get Diff"/></p>
	</form>

	<p><strong>Tip</strong>: if the document uses the W3C convention on linking to its previous version, you can specify only the address of the new document — the previous link will be automatically detected.</p>
	<h2>Diff markings</h2>
	<p>This service relies on <a href="https://www.gnu.org/software/diffutils/">GNU diff</a>. The found differences are roughly marked as follow:
	<ul>
	<li>deleted text is shown in pink with down-arrows (as styled for a <del> element)</li>
	<li>where there is replacement, it’s shown in green with bi-directional arrows,</li>
	<li>where there is newly inserted text, it’s yellow with up arrows (<ins> element)</li>
	</ul>
	<address>
	script $Revision: 1.62 $ of $Date: 2016/10/06 10:46:19 $<br />
	by <a href="http://www.w3.org/People/Dom/">Dominique Hazaël-Massieux</a><br />based on <a href="https://dev.w3.org/cvsweb/2009/htmldiff/htmldiff.pl">Shane McCarron’ Perl script</a> wrapped in a <a href="http://dev.w3.org/cvsweb/2009/htmldiff/">Python CGI</a>
	</address>
	</body>
	</html>
	"""

	def checkInputUrl(url):
	checker = surbl.SurblChecker('/usr/local/share/surbl/two-level-tlds','/afs/w3.org/pub/WWW/Systems/Server/debian/generic/usr/local/etc/surbl.whitelist')

	if url[:5] == 'file:' or len(urlparse.urlparse(url)[0])<2:
	print "Status: 403"
	print "Content-Type: text/plain"
	print
	print "sorry, I decline to handle file: addresses"
	sys.exit()
	elif checker.isMarkedAsSpam(url):
	print "Status: 403"
	print "Content-Type: text/plain; charset=utf-8"
	print
	print "sorry, this URL matches a record known in SURBL. See http://www.surbl.org/"
	sys.exit()

	def copyHeader(copy_func, source, key, header_name=None):
	value = source.get(key)
	if not value:
	return False
	elif header_name is None:
	header_name = key
	copy_func(header_name, value)
	return True

	def setupRequest(source_headers):
	opener = http_auth.ProxyAuthURLopener()
	copyHeader(opener.addheader, source_headers, 'If-Modified-Since')
	copyHeader(opener.addheader, os.environ, 'REMOTE_ADDR', 'X_Forward_IP_Addr')
	return opener

	def tidyFile(file):
	# option for tidy
	options = dict(tidy_mark=0,show_warnings=0,quiet=1,char_encoding='utf8')
	html5 = re.search(r"<!doctype\s+html\s*>", file.read(4096),
	re.IGNORECASE)
	file.seek(0)
	html5_options = {"add_xml_space": "no",
	"output_xhtml": "no",
	"tidy_mark": "no",
	"new_blocklevel_tags": 'article,aside,canvas,dialog,details,figcaption,figure,footer,header,hgroup,menu,nav,section,main,summary,math,semantics,mrow,mfenced,mtable,mtr,mtd,mi,mn,msub,mo,mfrac,munderover,mtext,svg,g,image,rect,text,desc,line,path,polygon,ellipse,tspan,defs,feoffset,fecolormatrix,filter,fegaussianblur,feblend,marker,circle',
	"new_inline_tags": 'video,audio,canvas,ruby,rt,rp,time,meter,progress,track,source,emu-val,emu-nt,emu-t,mark',
	"break_before_br": "no",
	"vertical_space": "no",
	"enclose_text": "no",
	"numeric_entities": "yes",
	"wrap": "1000",
	"wrap_attributes": "no",
	"drop_empty_paras": "no"
	}
	if html5:
	options.update(html5_options)
	newtidy = tidy.parseString(file.read(), **options)
	if len(newtidy.errors) > 0:
	if not html5:
	file.seek(0)
	options.update(html5_options)
	newtidy = tidy.parseString(file.read(), **options)
	file.close()
	file = tempfile.NamedTemporaryFile(
	mode='w+', prefix='htmldiff-', suffix='.html')
	atexit.register(file.close)
	file.write(str(newtidy))
	file.flush()
	file.seek(0)
	return (file, newtidy.errors)

	def matchPredecessorRel(rel):
	return rel and "predecessor-version" in rel.lower().split(" ")

	def mirrorURL(url, opener):
	try:
	filename, headers = opener.retrieve(url)
	except IOError, error:
	opener.error = "I/O error: %s %s" % (error.errno, error.strerror)
	except httplib.InvalidURL:
	opener.error = "Invalid URL submitted"
	except AttributeError: # ProxyAuthURLopener returned None.
	pass # There's already an error set.
	else:
	atexit.register(os.unlink, filename)
	file = open(filename)
	if headers.has_key("content-encoding") and headers["content-encoding"] == "gzip":
	import gzip
	from StringIO import StringIO
	data = StringIO(file.read())
	file.close()
	file = gzip.GzipFile(fileobj=data)
	file,errors = tidyFile(file)
	if len(errors) == 0:
	return (file, headers)
	else:
	opener.error = "Tidy errors: %s" % (str(errors))
	return (None, {})

	def showPage(url1='', url2='', error_html='', **headers):
	for name, value in headers.items():
	print "%s: %s" % (name.replace('_', '-'), value)
	print
	print Page
	print error_html
	print Page2 % (url1, url2)
	sys.exit()

	def serveRequest():
	fields = cgi.FieldStorage()

	if (not fields.has_key('doc2')):
	showPage(Content_Type=CONTENT_TYPE)
	# if doc1 is not specified, we load doc2 to check if it has a previous version link
	doc2 = fields['doc2'].value
	checkInputUrl(doc2)
	url_opener2 = setupRequest(fields.headers)
	newdoc, newheaders = mirrorURL(doc2, url_opener2)
	if fields.has_key('doc1'):
	doc1 = fields['doc1'].value
	elif newdoc is not None:
	from BeautifulSoup import BeautifulSoup

	soup = BeautifulSoup(newdoc.read())
	newdoc.seek(0)
	try:
	doc1 = soup.find(text=re.compile("Previous Version",re.IGNORECASE)).findNext(name="a", attrs={"href":True})["href"]
	except:
	try:
	doc1 = soup.find(name=["a", "link"], attrs={"href":True, rel:matchPredecessorRel})["href"]
	except:
	doc1 = None
	else:
	doc1 = None
	if (not doc1):
	showPage(Content_Type=CONTENT_TYPE)

	checkInputUrl(doc1)
	esc1 = cgi.escape(doc1, True)
	esc2 = cgi.escape(doc2, True)
	urlcomponents1 = urlparse.urlparse(doc1)
	urlcomponents2 = urlparse.urlparse(doc2)
	# if same domain, we can use the same urlopener
	# otherwise, we create a separate one
	if urlcomponents2[1] == urlcomponents1[1]:
	url_opener = url_opener2
	else:
	url_opener = setupRequest(fields.headers)

	refdoc, refheaders = mirrorURL(doc1, url_opener)
	if not (refdoc and newdoc):
	http_error = ""
	url = ""
	if not refdoc:
	http_error = url_opener.error
	url = esc1
	else:
	http_error = url_opener2.error
	url = esc2
	if re.match("^[1234][0-9][0-9] ", http_error):
	print "Status: %s" %(http_error)
	error="<p style='color:#FF0000'>An error (%s) occured trying to get <a href='%s'>%s</a>.</p>" % (cgi.escape(http_error), url, url)
	showPage(esc1, esc2, error, Content_Type=CONTENT_TYPE)

	print "Content-Type: text/html"
	if newheaders.has_key('Content-Type'):
	contentType = cgi.parse_header(newheaders["Content-Type"])
	if contentType[1].has_key('charset'):
	charset = contentType[1]['charset'].lower()
	#if charset == "iso-8859-1":
	# options["char_encoding"]='latin1'

	for proxy_header in ('Last-Modified', 'Expires'):
	if copyHeader(lambda header, value: sys.stdout.write("%s: %s" %(header, value)), newheaders, proxy_header):
	print
	print
	p = Popen(["/usr/local/bin/htmldiff", refdoc.name, newdoc.name],
	stdin=PIPE, stdout=PIPE, stderr=PIPE)
	sys.stdout.flush()
	sys.stderr.flush()
	(out, err) = p.communicate()
	p.stdin.close()
	if err:
	error = "<p style='color:#FF0000'>An error occured when running <code>htmldiff</code> on the documents:</p><pre>%s</pre>" % (cgi.escape(err),)
	showPage(esc1, esc2, error)
	else:
	print out
	if __name__ == '__main__':
	if os.environ.has_key('SCRIPT_NAME'):
	serveRequest()