src/go/doc/comment/text.go - toolchain/go - Git at Google

 // Copyright 2022 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package comment

 import (
 	"bytes"
 	"fmt"
 	"sort"
 	"strings"
 	"unicode/utf8"
 )

 // A textPrinter holds the state needed for printing a Doc as plain text.
 type textPrinter struct {
 	*Printer
 	long       strings.Builder
 	prefix     string
 	codePrefix string
 	width      int
 }

 // Text returns a textual formatting of the [Doc].
 // See the [Printer] documentation for ways to customize the text output.
 func (p *Printer) Text(d *Doc) []byte {
 	tp := &textPrinter{
 		Printer:    p,
 		prefix:     p.TextPrefix,
 		codePrefix: p.TextCodePrefix,
 		width:      p.TextWidth,
 	}
 	if tp.codePrefix == "" {
 		tp.codePrefix = p.TextPrefix + "\t"
 	}
 	if tp.width == 0 {
 		tp.width = 80 - utf8.RuneCountInString(tp.prefix)
 	}

 	var out bytes.Buffer
 	for i, x := range d.Content {
 		if i > 0 && blankBefore(x) {
 			out.WriteString(tp.prefix)
 			writeNL(&out)
 		}
 		tp.block(&out, x)
 	}
 	anyUsed := false
 	for _, def := range d.Links {
 		if def.Used {
 			anyUsed = true
 			break
 		}
 	}
 	if anyUsed {
 		writeNL(&out)
 		for _, def := range d.Links {
 			if def.Used {
 				fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL)
 			}
 		}
 	}
 	return out.Bytes()
 }

 // writeNL calls out.WriteByte('\n')
 // but first trims trailing spaces on the previous line.
 func writeNL(out *bytes.Buffer) {
 	// Trim trailing spaces.
 	data := out.Bytes()
 	n := 0
 	for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') {
 		n++
 	}
 	if n > 0 {
 		out.Truncate(len(data) - n)
 	}
 	out.WriteByte('\n')
 }

 // block prints the block x to out.
 func (p *textPrinter) block(out *bytes.Buffer, x Block) {
 	switch x := x.(type) {
 	default:
 		fmt.Fprintf(out, "?%T\n", x)

 	case *Paragraph:
 		out.WriteString(p.prefix)
 		p.text(out, "", x.Text)

 	case *Heading:
 		out.WriteString(p.prefix)
 		out.WriteString("# ")
 		p.text(out, "", x.Text)

 	case *Code:
 		text := x.Text
 		for text != "" {
 			var line string
 			line, text, _ = strings.Cut(text, "\n")
 			if line != "" {
 				out.WriteString(p.codePrefix)
 				out.WriteString(line)
 			}
 			writeNL(out)
 		}

 	case *List:
 		loose := x.BlankBetween()
 		for i, item := range x.Items {
 			if i > 0 && loose {
 				out.WriteString(p.prefix)
 				writeNL(out)
 			}
 			out.WriteString(p.prefix)
 			out.WriteString(" ")
 			if item.Number == "" {
 				out.WriteString(" - ")
 			} else {
 				out.WriteString(item.Number)
 				out.WriteString(". ")
 			}
 			for i, blk := range item.Content {
 				const fourSpace = "    "
 				if i > 0 {
 					writeNL(out)
 					out.WriteString(p.prefix)
 					out.WriteString(fourSpace)
 				}
 				p.text(out, fourSpace, blk.(*Paragraph).Text)
 			}
 		}
 	}
 }

 // text prints the text sequence x to out.
 func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) {
 	p.oneLongLine(&p.long, x)
 	words := strings.Fields(p.long.String())
 	p.long.Reset()

 	var seq []int
 	if p.width < 0 || len(words) == 0 {
 		seq = []int{0, len(words)} // one long line
 	} else {
 		seq = wrap(words, p.width-utf8.RuneCountInString(indent))
 	}
 	for i := 0; i+1 < len(seq); i++ {
 		if i > 0 {
 			out.WriteString(p.prefix)
 			out.WriteString(indent)
 		}
 		for j, w := range words[seq[i]:seq[i+1]] {
 			if j > 0 {
 				out.WriteString(" ")
 			}
 			out.WriteString(w)
 		}
 		writeNL(out)
 	}
 }

 // oneLongLine prints the text sequence x to out as one long line,
 // without worrying about line wrapping.
 // Explicit links have the [ ] dropped to improve readability.
 func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) {
 	for _, t := range x {
 		switch t := t.(type) {
 		case Plain:
 			out.WriteString(string(t))
 		case Italic:
 			out.WriteString(string(t))
 		case *Link:
 			p.oneLongLine(out, t.Text)
 		case *DocLink:
 			p.oneLongLine(out, t.Text)
 		}
 	}
 }

 // wrap wraps words into lines of at most max runes,
 // minimizing the sum of the squares of the leftover lengths
 // at the end of each line (except the last, of course),
 // with a preference for ending lines at punctuation (.,:;).
 //
 // The returned slice gives the indexes of the first words
 // on each line in the wrapped text with a final entry of len(words).
 // Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]],
 // ..., words[seq[len(seq)-2]:seq[len(seq)-1]].
 //
 // The implementation runs in O(n log n) time, where n = len(words),
 // using the algorithm described in D. S. Hirschberg and L. L. Larmore,
 // “[The least weight subsequence problem],” FOCS 1985, pp. 137-143.
 //
 // [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60
 func wrap(words []string, max int) (seq []int) {
 	// The algorithm requires that our scoring function be concave,
 	// meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁,
 	// weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀).
 	//
 	// Our weights are two-element pairs [hi, lo]
 	// ordered by elementwise comparison.
 	// The hi entry counts the weight for lines that are longer than max,
 	// and the lo entry counts the weight for lines that are not.
 	// This forces the algorithm to first minimize the number of lines
 	// that are longer than max, which correspond to lines with
 	// single very long words. Having done that, it can move on to
 	// minimizing the lo score, which is more interesting.
 	//
 	// The lo score is the sum for each line of the square of the
 	// number of spaces remaining at the end of the line and a
 	// penalty of 64 given out for not ending the line in a
 	// punctuation character (.,:;).
 	// The penalty is somewhat arbitrarily chosen by trying
 	// different amounts and judging how nice the wrapped text looks.
 	// Roughly speaking, using 64 means that we are willing to
 	// end a line with eight blank spaces in order to end at a
 	// punctuation character, even if the next word would fit in
 	// those spaces.
 	//
 	// We care about ending in punctuation characters because
 	// it makes the text easier to skim if not too many sentences
 	// or phrases begin with a single word on the previous line.

 	// A score is the score (also called weight) for a given line.
 	// add and cmp add and compare scores.
 	type score struct {
 		hi int64
 		lo int64
 	}
 	add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} }
 	cmp := func(s, t score) int {
 		switch {
 		case s.hi < t.hi:
 			return -1
 		case s.hi > t.hi:
 			return +1
 		case s.lo < t.lo:
 			return -1
 		case s.lo > t.lo:
 			return +1
 		}
 		return 0
 	}

 	// total[j] is the total number of runes
 	// (including separating spaces) in words[:j].
 	total := make([]int, len(words)+1)
 	total[0] = 0
 	for i, s := range words {
 		total[1+i] = total[i] + utf8.RuneCountInString(s) + 1
 	}

 	// weight returns weight(i, j).
 	weight := func(i, j int) score {
 		// On the last line, there is zero weight for being too short.
 		n := total[j] - 1 - total[i]
 		if j == len(words) && n <= max {
 			return score{0, 0}
 		}

 		// Otherwise the weight is the penalty plus the square of the number of
 		// characters remaining on the line or by which the line goes over.
 		// In the latter case, that value goes in the hi part of the score.
 		// (See note above.)
 		p := wrapPenalty(words[j-1])
 		v := int64(max-n) * int64(max-n)
 		if n > max {
 			return score{v, p}
 		}
 		return score{0, v + p}
 	}

 	// The rest of this function is “The Basic Algorithm” from
 	// Hirschberg and Larmore's conference paper,
 	// using the same names as in the paper.
 	f := []score{{0, 0}}
 	g := func(i, j int) score { return add(f[i], weight(i, j)) }

 	bridge := func(a, b, c int) bool {
 		k := c + sort.Search(len(words)+1-c, func(k int) bool {
 			k += c
 			return cmp(g(a, k), g(b, k)) > 0
 		})
 		if k > len(words) {
 			return true
 		}
 		return cmp(g(c, k), g(b, k)) <= 0
 	}

 	// d is a one-ended deque implemented as a slice.
 	d := make([]int, 1, len(words))
 	d[0] = 0
 	bestleft := make([]int, 1, len(words))
 	bestleft[0] = -1
 	for m := 1; m < len(words); m++ {
 		f = append(f, g(d[0], m))
 		bestleft = append(bestleft, d[0])
 		for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
 			d = d[1:] // “Retire”
 		}
 		for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) {
 			d = d[:len(d)-1] // “Fire”
 		}
 		if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 {
 			d = append(d, m) // “Hire”
 			// The next few lines are not in the paper but are necessary
 			// to handle two-word inputs correctly. It appears to be
 			// just a bug in the paper's pseudocode.
 			if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
 				d = d[1:]
 			}
 		}
 	}
 	bestleft = append(bestleft, d[0])

 	// Recover least weight sequence from bestleft.
 	n := 1
 	for m := len(words); m > 0; m = bestleft[m] {
 		n++
 	}
 	seq = make([]int, n)
 	for m := len(words); m > 0; m = bestleft[m] {
 		n--
 		seq[n] = m
 	}
 	return seq
 }

 // wrapPenalty is the penalty for inserting a line break after word s.
 func wrapPenalty(s string) int64 {
 	switch s[len(s)-1] {
 	case '.', ',', ':', ';':
 		return 0
 	}
 	return 64
 }
	// Copyright 2022 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package comment

	import (
	"bytes"
	"fmt"
	"sort"
	"strings"
	"unicode/utf8"
	)

	// A textPrinter holds the state needed for printing a Doc as plain text.
	type textPrinter struct {
	*Printer
	long strings.Builder
	prefix string
	codePrefix string
	width int
	}

	// Text returns a textual formatting of the [Doc].
	// See the [Printer] documentation for ways to customize the text output.
	func (p Printer) Text(d Doc) []byte {
	tp := &textPrinter{
	Printer: p,
	prefix: p.TextPrefix,
	codePrefix: p.TextCodePrefix,
	width: p.TextWidth,
	}
	if tp.codePrefix == "" {
	tp.codePrefix = p.TextPrefix + "\t"
	}
	if tp.width == 0 {
	tp.width = 80 - utf8.RuneCountInString(tp.prefix)
	}

	var out bytes.Buffer
	for i, x := range d.Content {
	if i > 0 && blankBefore(x) {
	out.WriteString(tp.prefix)
	writeNL(&out)
	}
	tp.block(&out, x)
	}
	anyUsed := false
	for _, def := range d.Links {
	if def.Used {
	anyUsed = true
	break
	}
	}
	if anyUsed {
	writeNL(&out)
	for _, def := range d.Links {
	if def.Used {
	fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL)
	}
	}
	}
	return out.Bytes()
	}

	// writeNL calls out.WriteByte('\n')
	// but first trims trailing spaces on the previous line.
	func writeNL(out *bytes.Buffer) {
	// Trim trailing spaces.
	data := out.Bytes()
	n := 0
	for n < len(data) && (data[len(data)-n-1] == ' ' \|\| data[len(data)-n-1] == '\t') {
	n++
	}
	if n > 0 {
	out.Truncate(len(data) - n)
	}
	out.WriteByte('\n')
	}

	// block prints the block x to out.
	func (p textPrinter) block(out bytes.Buffer, x Block) {
	switch x := x.(type) {
	default:
	fmt.Fprintf(out, "?%T\n", x)

	case *Paragraph:
	out.WriteString(p.prefix)
	p.text(out, "", x.Text)

	case *Heading:
	out.WriteString(p.prefix)
	out.WriteString("# ")
	p.text(out, "", x.Text)

	case *Code:
	text := x.Text
	for text != "" {
	var line string
	line, text, _ = strings.Cut(text, "\n")
	if line != "" {
	out.WriteString(p.codePrefix)
	out.WriteString(line)
	}
	writeNL(out)
	}

	case *List:
	loose := x.BlankBetween()
	for i, item := range x.Items {
	if i > 0 && loose {
	out.WriteString(p.prefix)
	writeNL(out)
	}
	out.WriteString(p.prefix)
	out.WriteString(" ")
	if item.Number == "" {
	out.WriteString(" - ")
	} else {
	out.WriteString(item.Number)
	out.WriteString(". ")
	}
	for i, blk := range item.Content {
	const fourSpace = " "
	if i > 0 {
	writeNL(out)
	out.WriteString(p.prefix)
	out.WriteString(fourSpace)
	}
	p.text(out, fourSpace, blk.(*Paragraph).Text)
	}
	}
	}
	}

	// text prints the text sequence x to out.
	func (p textPrinter) text(out bytes.Buffer, indent string, x []Text) {
	p.oneLongLine(&p.long, x)
	words := strings.Fields(p.long.String())
	p.long.Reset()

	var seq []int
	if p.width < 0 \|\| len(words) == 0 {
	seq = []int{0, len(words)} // one long line
	} else {
	seq = wrap(words, p.width-utf8.RuneCountInString(indent))
	}
	for i := 0; i+1 < len(seq); i++ {
	if i > 0 {
	out.WriteString(p.prefix)
	out.WriteString(indent)
	}
	for j, w := range words[seq[i]:seq[i+1]] {
	if j > 0 {
	out.WriteString(" ")
	}
	out.WriteString(w)
	}
	writeNL(out)
	}
	}

	// oneLongLine prints the text sequence x to out as one long line,
	// without worrying about line wrapping.
	// Explicit links have the [ ] dropped to improve readability.
	func (p textPrinter) oneLongLine(out strings.Builder, x []Text) {
	for _, t := range x {
	switch t := t.(type) {
	case Plain:
	out.WriteString(string(t))
	case Italic:
	out.WriteString(string(t))
	case *Link:
	p.oneLongLine(out, t.Text)
	case *DocLink:
	p.oneLongLine(out, t.Text)
	}
	}
	}

	// wrap wraps words into lines of at most max runes,
	// minimizing the sum of the squares of the leftover lengths
	// at the end of each line (except the last, of course),
	// with a preference for ending lines at punctuation (.,:;).
	//
	// The returned slice gives the indexes of the first words
	// on each line in the wrapped text with a final entry of len(words).
	// Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]],
	// ..., words[seq[len(seq)-2]:seq[len(seq)-1]].
	//
	// The implementation runs in O(n log n) time, where n = len(words),
	// using the algorithm described in D. S. Hirschberg and L. L. Larmore,
	// “[The least weight subsequence problem],” FOCS 1985, pp. 137-143.
	//
	// [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60
	func wrap(words []string, max int) (seq []int) {
	// The algorithm requires that our scoring function be concave,
	// meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁,
	// weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀).
	//
	// Our weights are two-element pairs [hi, lo]
	// ordered by elementwise comparison.
	// The hi entry counts the weight for lines that are longer than max,
	// and the lo entry counts the weight for lines that are not.
	// This forces the algorithm to first minimize the number of lines
	// that are longer than max, which correspond to lines with
	// single very long words. Having done that, it can move on to
	// minimizing the lo score, which is more interesting.
	//
	// The lo score is the sum for each line of the square of the
	// number of spaces remaining at the end of the line and a
	// penalty of 64 given out for not ending the line in a
	// punctuation character (.,:;).
	// The penalty is somewhat arbitrarily chosen by trying
	// different amounts and judging how nice the wrapped text looks.
	// Roughly speaking, using 64 means that we are willing to
	// end a line with eight blank spaces in order to end at a
	// punctuation character, even if the next word would fit in
	// those spaces.
	//
	// We care about ending in punctuation characters because
	// it makes the text easier to skim if not too many sentences
	// or phrases begin with a single word on the previous line.

	// A score is the score (also called weight) for a given line.
	// add and cmp add and compare scores.
	type score struct {
	hi int64
	lo int64
	}
	add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} }
	cmp := func(s, t score) int {
	switch {
	case s.hi < t.hi:
	return -1
	case s.hi > t.hi:
	return +1
	case s.lo < t.lo:
	return -1
	case s.lo > t.lo:
	return +1
	}
	return 0
	}

	// total[j] is the total number of runes
	// (including separating spaces) in words[:j].
	total := make([]int, len(words)+1)
	total[0] = 0
	for i, s := range words {
	total[1+i] = total[i] + utf8.RuneCountInString(s) + 1
	}

	// weight returns weight(i, j).
	weight := func(i, j int) score {
	// On the last line, there is zero weight for being too short.
	n := total[j] - 1 - total[i]
	if j == len(words) && n <= max {
	return score{0, 0}
	}

	// Otherwise the weight is the penalty plus the square of the number of
	// characters remaining on the line or by which the line goes over.
	// In the latter case, that value goes in the hi part of the score.
	// (See note above.)
	p := wrapPenalty(words[j-1])
	v := int64(max-n) * int64(max-n)
	if n > max {
	return score{v, p}
	}
	return score{0, v + p}
	}

	// The rest of this function is “The Basic Algorithm” from
	// Hirschberg and Larmore's conference paper,
	// using the same names as in the paper.
	f := []score{{0, 0}}
	g := func(i, j int) score { return add(f[i], weight(i, j)) }

	bridge := func(a, b, c int) bool {
	k := c + sort.Search(len(words)+1-c, func(k int) bool {
	k += c
	return cmp(g(a, k), g(b, k)) > 0
	})
	if k > len(words) {
	return true
	}
	return cmp(g(c, k), g(b, k)) <= 0
	}

	// d is a one-ended deque implemented as a slice.
	d := make([]int, 1, len(words))
	d[0] = 0
	bestleft := make([]int, 1, len(words))
	bestleft[0] = -1
	for m := 1; m < len(words); m++ {
	f = append(f, g(d[0], m))
	bestleft = append(bestleft, d[0])
	for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
	d = d[1:] // “Retire”
	}
	for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) {
	d = d[:len(d)-1] // “Fire”
	}
	if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 {
	d = append(d, m) // “Hire”
	// The next few lines are not in the paper but are necessary
	// to handle two-word inputs correctly. It appears to be
	// just a bug in the paper's pseudocode.
	if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
	d = d[1:]
	}
	}
	}
	bestleft = append(bestleft, d[0])

	// Recover least weight sequence from bestleft.
	n := 1
	for m := len(words); m > 0; m = bestleft[m] {
	n++
	}
	seq = make([]int, n)
	for m := len(words); m > 0; m = bestleft[m] {
	n--
	seq[n] = m
	}
	return seq
	}

	// wrapPenalty is the penalty for inserting a line break after word s.
	func wrapPenalty(s string) int64 {
	switch s[len(s)-1] {
	case '.', ',', ':', ';':
	return 0
	}
	return 64
	}