xml/impl/src/com/intellij/xml/util/documentation/html5TagTableGen.rb - platform/tools/idea - Git at Google

 require 'rexml/document'

 # read html4 tags and attributes to be able to skip them
 file = File.new("htmltable.xml")
 doc = REXML::Document.new file
 known4Tags = Set.new
 known4Attributes = Set.new
 doc.elements.each("html-property-table/tag") { |e| known4Tags << e.attributes["name"] }
 doc.elements.each("html-property-table/attribute") { |e| known4Attributes << e.attributes["name"] }
 file.close

 # read html5 tags and attributes for verifying generated data
 file = File.new("html5table.xml")
 doc = REXML::Document.new file
 known5Tags = Set.new
 known5Attributes = Set.new
 doc.elements.each("html-property-table/tag") { |e| known5Tags << e.attributes["name"] }
 doc.elements.each("html-property-table/attribute") { |e| known5Attributes << e.attributes["name"] }
 file.close

 # read html5 spec
 generatedTags = Set.new
 result = "<html-property-table baseHelpRef=\"http://www.w3.org/html/wg/drafts/html/master/\">\n"
 file = File.new("html5.html")
 content = file.read
 offset = 0
 # parse tags
 content.scan(/<tr><th><code><a href="([^"]+)">([^<]+).*<\/th>\s*<td>(?:<a href="[^"]+">)?([^<]+).*(<\/td>)?/) do |match|
   next if known4Tags.include?($2)
   startTag = true
   endTag = true
   nextTag = content.index("<tr>", ($~.offset(0)[1]))
   empty = content[$~.offset(0)[0]..nextTag].include?("empty")
   dtd = ""
   result +=
        "<tag name        = \"#{$2}\"\n" +
        "     helpref     = \"#{$1}\"\n" +
        "     description = \"#{$3}\"\n" +
        "     startTag    = \"#{startTag}\"\n" +
        "     endTag      = \"#{endTag}\"\n" +
        "     empty       = \"#{empty}\"\n" +
        "     dtd         = \"#{dtd}\"\n" +
        "/>\n"
   generatedTags << $2
   offset = $~.offset(0)[1]
 end

 generatedAttributes = Set.new
 content[offset..-1].scan(/<tr><th>\s?<code(?:[^>]*)>([^<]+)\s*<\/code>\s*<td>([^;\n]*(?:;\s*[^;\n]*)*)\s*<td>\s*(.*)\s*<td>(.*)/) do
   next if known4Attributes.include?($1)
   name = $1
   field_and_link = $2
   description = $3
   type = $4
   type = type.gsub(/<[^>]*>/, "").gsub(/\s+/, " ").gsub(/"/, "").gsub(/^\s+/, "").gsub(/;$/, "")
   description = description.gsub(/<[^>]*>/, "").gsub(/\s+/, " ").gsub(/"/, "").gsub(/^\s+/, "").gsub(/;$/, "")
   helpref_match = field_and_link.match(/<a href="([^"]*)"/)
   helpref = helpref_match ? helpref_match[1] : ""
   relatedTags = field_and_link.gsub(/<[^>]*>/, "").gsub(/\s+/, " ").gsub(/"/, "").gsub(/^\s+/, "").gsub(/;$/, "")
   dtd = ""
   default = true
   result +=
        "<attribute name        = \"#{name}\"\n" +
        "           helpref     = \"#{helpref}\"\n" +
        "           description = \"#{description}\"\n" +
        "           relatedTags = \"#{relatedTags}\"\n" +
        "           dtd         = \"#{dtd}\"\n" +
        "           type        = \"#{type}\"\n" +
        "           default     = \"#{default}\"\n" +
        "/>\n"
   generatedAttributes << name
 end
 result += '</html-property-table>'
 puts result


 # verify that we haven't missed tags or attributes
 if (!(generatedTags + known4Tags).superset?(known5Tags))
   printf $stderr, "warning! missing tags: #{(known5Tags - known4Tags - generatedTags).to_a.sort}\n"
 end

 if !(generatedAttributes + known4Attributes).superset?(known5Attributes)
   printf $stderr, "warning! missing attributes: #{(known5Attributes - known4Attributes - generatedAttributes).to_a.sort}\n"
 end
	require 'rexml/document'

	# read html4 tags and attributes to be able to skip them
	file = File.new("htmltable.xml")
	doc = REXML::Document.new file
	known4Tags = Set.new
	known4Attributes = Set.new
	doc.elements.each("html-property-table/tag") { \|e\| known4Tags << e.attributes["name"] }
	doc.elements.each("html-property-table/attribute") { \|e\| known4Attributes << e.attributes["name"] }
	file.close

	# read html5 tags and attributes for verifying generated data
	file = File.new("html5table.xml")
	doc = REXML::Document.new file
	known5Tags = Set.new
	known5Attributes = Set.new
	doc.elements.each("html-property-table/tag") { \|e\| known5Tags << e.attributes["name"] }
	doc.elements.each("html-property-table/attribute") { \|e\| known5Attributes << e.attributes["name"] }
	file.close

	# read html5 spec
	generatedTags = Set.new
	result = "<html-property-table baseHelpRef=\"http://www.w3.org/html/wg/drafts/html/master/\">\n"
	file = File.new("html5.html")
	content = file.read
	offset = 0
	# parse tags
	content.scan(/<tr><th><code><a href="([^"]+)">([^<]+).<\/th>\s<td>(?:<a href="[^"]+">)?([^<]+).*(<\/td>)?/) do \|match\|
	next if known4Tags.include?($2)
	startTag = true
	endTag = true
	nextTag = content.index("<tr>", ($~.offset(0)[1]))
	empty = content[$~.offset(0)[0]..nextTag].include?("empty")
	dtd = ""
	result +=
	"<tag name = \"#{$2}\"\n" +
	" helpref = \"#{$1}\"\n" +
	" description = \"#{$3}\"\n" +
	" startTag = \"#{startTag}\"\n" +
	" endTag = \"#{endTag}\"\n" +
	" empty = \"#{empty}\"\n" +
	" dtd = \"#{dtd}\"\n" +
	"/>\n"
	generatedTags << $2
	offset = $~.offset(0)[1]
	end

	generatedAttributes = Set.new
	content[offset..-1].scan(/<tr><th>\s?<code(?:[^>])>([^<]+)\s<\/code>\s<td>([^;\n](?:;\s[^;\n]))\s<td>\s(.)\s<td>(.)/) do
	next if known4Attributes.include?($1)
	name = $1
	field_and_link = $2
	description = $3
	type = $4
	type = type.gsub(/<[^>]*>/, "").gsub(/\s+/, " ").gsub(/"/, "").gsub(/^\s+/, "").gsub(/;$/, "")
	description = description.gsub(/<[^>]*>/, "").gsub(/\s+/, " ").gsub(/"/, "").gsub(/^\s+/, "").gsub(/;$/, "")
	helpref_match = field_and_link.match(/<a href="([^"]*)"/)
	helpref = helpref_match ? helpref_match[1] : ""
	relatedTags = field_and_link.gsub(/<[^>]*>/, "").gsub(/\s+/, " ").gsub(/"/, "").gsub(/^\s+/, "").gsub(/;$/, "")
	dtd = ""
	default = true
	result +=
	"<attribute name = \"#{name}\"\n" +
	" helpref = \"#{helpref}\"\n" +
	" description = \"#{description}\"\n" +
	" relatedTags = \"#{relatedTags}\"\n" +
	" dtd = \"#{dtd}\"\n" +
	" type = \"#{type}\"\n" +
	" default = \"#{default}\"\n" +
	"/>\n"
	generatedAttributes << name
	end
	result += '</html-property-table>'
	puts result



	# verify that we haven't missed tags or attributes
	if (!(generatedTags + known4Tags).superset?(known5Tags))
	printf $stderr, "warning! missing tags: #{(known5Tags - known4Tags - generatedTags).to_a.sort}\n"
	end

	if !(generatedAttributes + known4Attributes).superset?(known5Attributes)
	printf $stderr, "warning! missing attributes: #{(known5Attributes - known4Attributes - generatedAttributes).to_a.sort}\n"
	end