x/net/html/charset: add NewReaderByName

This provides a CharsetReader function for xml.Decoder.

Change-Id: Id00787bbdee90d267d38c84c98a06f9e10d93336
Reviewed-on: https://go-review.googlesource.com/4420
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/html/charset/charset.go b/html/charset/charset.go
index 2e5f9ba..84e6062 100644
--- a/html/charset/charset.go
+++ b/html/charset/charset.go
@@ -10,6 +10,7 @@
 
 import (
 	"bytes"
+	"fmt"
 	"io"
 	"mime"
 	"strings"
@@ -110,6 +111,18 @@
 	return r, nil
 }
 
+// NewReaderByName returns a reader that converts from the specified charset to
+// UTF-8. It returns an error if the charset is not one of the standard
+// encodings for HTML. It is suitable for use as encoding/xml.Decoder's
+// CharsetReader function.
+func NewReaderByName(charset string, input io.Reader) (io.Reader, error) {
+	e, _ := Lookup(charset)
+	if e == nil {
+		return nil, fmt.Errorf("unsupported charset: %q", charset)
+	}
+	return transform.NewReader(input, e.NewDecoder()), nil
+}
+
 func prescan(content []byte) (e encoding.Encoding, name string) {
 	z := html.NewTokenizer(bytes.NewReader(content))
 	for {
diff --git a/html/charset/charset_test.go b/html/charset/charset_test.go
index d309f75..44a1867 100644
--- a/html/charset/charset_test.go
+++ b/html/charset/charset_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"bytes"
+	"encoding/xml"
 	"io/ioutil"
 	"runtime"
 	"strings"
@@ -213,3 +214,23 @@
 		}
 	}
 }
+
+func TestXML(t *testing.T) {
+	const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
+
+	d := xml.NewDecoder(strings.NewReader(s))
+	d.CharsetReader = NewReaderByName
+
+	var a struct {
+		Word string
+	}
+	err := d.Decode(&a)
+	if err != nil {
+		t.Fatalf("Decode: %v", err)
+	}
+
+	want := "résumé"
+	if a.Word != want {
+		t.Errorf("got %q, want %q", a.Word, want)
+	}
+}