| // Copyright 2022 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package comment |
| |
| import ( |
| "bytes" |
| "fmt" |
| "sort" |
| "strings" |
| "unicode/utf8" |
| ) |
| |
| // A textPrinter holds the state needed for printing a Doc as plain text. |
| type textPrinter struct { |
| *Printer |
| long strings.Builder |
| prefix string |
| codePrefix string |
| width int |
| } |
| |
| // Text returns a textual formatting of the Doc. |
| // See the [Printer] documentation for ways to customize the text output. |
| func (p *Printer) Text(d *Doc) []byte { |
| tp := &textPrinter{ |
| Printer: p, |
| prefix: p.TextPrefix, |
| codePrefix: p.TextCodePrefix, |
| width: p.TextWidth, |
| } |
| if tp.codePrefix == "" { |
| tp.codePrefix = p.TextPrefix + "\t" |
| } |
| if tp.width == 0 { |
| tp.width = 80 - utf8.RuneCountInString(tp.prefix) |
| } |
| |
| var out bytes.Buffer |
| for i, x := range d.Content { |
| if i > 0 && blankBefore(x) { |
| out.WriteString(tp.prefix) |
| writeNL(&out) |
| } |
| tp.block(&out, x) |
| } |
| anyUsed := false |
| for _, def := range d.Links { |
| if def.Used { |
| anyUsed = true |
| break |
| } |
| } |
| if anyUsed { |
| writeNL(&out) |
| for _, def := range d.Links { |
| if def.Used { |
| fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL) |
| } |
| } |
| } |
| return out.Bytes() |
| } |
| |
| // writeNL calls out.WriteByte('\n') |
| // but first trims trailing spaces on the previous line. |
| func writeNL(out *bytes.Buffer) { |
| // Trim trailing spaces. |
| data := out.Bytes() |
| n := 0 |
| for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') { |
| n++ |
| } |
| if n > 0 { |
| out.Truncate(len(data) - n) |
| } |
| out.WriteByte('\n') |
| } |
| |
| // block prints the block x to out. |
| func (p *textPrinter) block(out *bytes.Buffer, x Block) { |
| switch x := x.(type) { |
| default: |
| fmt.Fprintf(out, "?%T\n", x) |
| |
| case *Paragraph: |
| out.WriteString(p.prefix) |
| p.text(out, "", x.Text) |
| |
| case *Heading: |
| out.WriteString(p.prefix) |
| out.WriteString("# ") |
| p.text(out, "", x.Text) |
| |
| case *Code: |
| text := x.Text |
| for text != "" { |
| var line string |
| line, text, _ = strings.Cut(text, "\n") |
| if line != "" { |
| out.WriteString(p.codePrefix) |
| out.WriteString(line) |
| } |
| writeNL(out) |
| } |
| |
| case *List: |
| loose := x.BlankBetween() |
| for i, item := range x.Items { |
| if i > 0 && loose { |
| out.WriteString(p.prefix) |
| writeNL(out) |
| } |
| out.WriteString(p.prefix) |
| out.WriteString(" ") |
| if item.Number == "" { |
| out.WriteString(" - ") |
| } else { |
| out.WriteString(item.Number) |
| out.WriteString(". ") |
| } |
| for i, blk := range item.Content { |
| const fourSpace = " " |
| if i > 0 { |
| writeNL(out) |
| out.WriteString(p.prefix) |
| out.WriteString(fourSpace) |
| } |
| p.text(out, fourSpace, blk.(*Paragraph).Text) |
| } |
| } |
| } |
| } |
| |
| // text prints the text sequence x to out. |
| func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) { |
| p.oneLongLine(&p.long, x) |
| words := strings.Fields(p.long.String()) |
| p.long.Reset() |
| |
| var seq []int |
| if p.width < 0 || len(words) == 0 { |
| seq = []int{0, len(words)} // one long line |
| } else { |
| seq = wrap(words, p.width-utf8.RuneCountInString(indent)) |
| } |
| for i := 0; i+1 < len(seq); i++ { |
| if i > 0 { |
| out.WriteString(p.prefix) |
| out.WriteString(indent) |
| } |
| for j, w := range words[seq[i]:seq[i+1]] { |
| if j > 0 { |
| out.WriteString(" ") |
| } |
| out.WriteString(w) |
| } |
| writeNL(out) |
| } |
| } |
| |
| // oneLongLine prints the text sequence x to out as one long line, |
| // without worrying about line wrapping. |
| // Explicit links have the [ ] dropped to improve readability. |
| func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) { |
| for _, t := range x { |
| switch t := t.(type) { |
| case Plain: |
| out.WriteString(string(t)) |
| case Italic: |
| out.WriteString(string(t)) |
| case *Link: |
| p.oneLongLine(out, t.Text) |
| case *DocLink: |
| p.oneLongLine(out, t.Text) |
| } |
| } |
| } |
| |
| // wrap wraps words into lines of at most max runes, |
| // minimizing the sum of the squares of the leftover lengths |
| // at the end of each line (except the last, of course), |
| // with a preference for ending lines at punctuation (.,:;). |
| // |
| // The returned slice gives the indexes of the first words |
| // on each line in the wrapped text with a final entry of len(words). |
| // Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]], |
| // ..., words[seq[len(seq)-2]:seq[len(seq)-1]]. |
| // |
| // The implementation runs in O(n log n) time, where n = len(words), |
| // using the algorithm described in D. S. Hirschberg and L. L. Larmore, |
| // “[The least weight subsequence problem],” FOCS 1985, pp. 137-143. |
| // |
| // [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60 |
| func wrap(words []string, max int) (seq []int) { |
| // The algorithm requires that our scoring function be concave, |
| // meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁, |
| // weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀). |
| // |
| // Our weights are two-element pairs [hi, lo] |
| // ordered by elementwise comparison. |
| // The hi entry counts the weight for lines that are longer than max, |
| // and the lo entry counts the weight for lines that are not. |
| // This forces the algorithm to first minimize the number of lines |
| // that are longer than max, which correspond to lines with |
| // single very long words. Having done that, it can move on to |
| // minimizing the lo score, which is more interesting. |
| // |
| // The lo score is the sum for each line of the square of the |
| // number of spaces remaining at the end of the line and a |
| // penalty of 64 given out for not ending the line in a |
| // punctuation character (.,:;). |
| // The penalty is somewhat arbitrarily chosen by trying |
| // different amounts and judging how nice the wrapped text looks. |
| // Roughly speaking, using 64 means that we are willing to |
| // end a line with eight blank spaces in order to end at a |
| // punctuation character, even if the next word would fit in |
| // those spaces. |
| // |
| // We care about ending in punctuation characters because |
| // it makes the text easier to skim if not too many sentences |
| // or phrases begin with a single word on the previous line. |
| |
| // A score is the score (also called weight) for a given line. |
| // add and cmp add and compare scores. |
| type score struct { |
| hi int64 |
| lo int64 |
| } |
| add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} } |
| cmp := func(s, t score) int { |
| switch { |
| case s.hi < t.hi: |
| return -1 |
| case s.hi > t.hi: |
| return +1 |
| case s.lo < t.lo: |
| return -1 |
| case s.lo > t.lo: |
| return +1 |
| } |
| return 0 |
| } |
| |
| // total[j] is the total number of runes |
| // (including separating spaces) in words[:j]. |
| total := make([]int, len(words)+1) |
| total[0] = 0 |
| for i, s := range words { |
| total[1+i] = total[i] + utf8.RuneCountInString(s) + 1 |
| } |
| |
| // weight returns weight(i, j). |
| weight := func(i, j int) score { |
| // On the last line, there is zero weight for being too short. |
| n := total[j] - 1 - total[i] |
| if j == len(words) && n <= max { |
| return score{0, 0} |
| } |
| |
| // Otherwise the weight is the penalty plus the square of the number of |
| // characters remaining on the line or by which the line goes over. |
| // In the latter case, that value goes in the hi part of the score. |
| // (See note above.) |
| p := wrapPenalty(words[j-1]) |
| v := int64(max-n) * int64(max-n) |
| if n > max { |
| return score{v, p} |
| } |
| return score{0, v + p} |
| } |
| |
| // The rest of this function is “The Basic Algorithm” from |
| // Hirschberg and Larmore's conference paper, |
| // using the same names as in the paper. |
| f := []score{{0, 0}} |
| g := func(i, j int) score { return add(f[i], weight(i, j)) } |
| |
| bridge := func(a, b, c int) bool { |
| k := c + sort.Search(len(words)+1-c, func(k int) bool { |
| k += c |
| return cmp(g(a, k), g(b, k)) > 0 |
| }) |
| if k > len(words) { |
| return true |
| } |
| return cmp(g(c, k), g(b, k)) <= 0 |
| } |
| |
| // d is a one-ended deque implemented as a slice. |
| d := make([]int, 1, len(words)) |
| d[0] = 0 |
| bestleft := make([]int, 1, len(words)) |
| bestleft[0] = -1 |
| for m := 1; m < len(words); m++ { |
| f = append(f, g(d[0], m)) |
| bestleft = append(bestleft, d[0]) |
| for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { |
| d = d[1:] // “Retire” |
| } |
| for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) { |
| d = d[:len(d)-1] // “Fire” |
| } |
| if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 { |
| d = append(d, m) // “Hire” |
| // The next few lines are not in the paper but are necessary |
| // to handle two-word inputs correctly. It appears to be |
| // just a bug in the paper's pseudocode. |
| if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { |
| d = d[1:] |
| } |
| } |
| } |
| bestleft = append(bestleft, d[0]) |
| |
| // Recover least weight sequence from bestleft. |
| n := 1 |
| for m := len(words); m > 0; m = bestleft[m] { |
| n++ |
| } |
| seq = make([]int, n) |
| for m := len(words); m > 0; m = bestleft[m] { |
| n-- |
| seq[n] = m |
| } |
| return seq |
| } |
| |
| // wrapPenalty is the penalty for inserting a line break after word s. |
| func wrapPenalty(s string) int64 { |
| switch s[len(s)-1] { |
| case '.', ',', ':', ';': |
| return 0 |
| } |
| return 64 |
| } |