| // Copyright 2017 syzkaller project authors. All rights reserved. |
| // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. |
| |
| package email |
| |
| import ( |
| "bytes" |
| "encoding/base64" |
| "fmt" |
| "io" |
| "io/ioutil" |
| "mime" |
| "mime/multipart" |
| "mime/quotedprintable" |
| "net/mail" |
| "regexp" |
| "sort" |
| "strings" |
| ) |
| |
| type Email struct { |
| BugID string |
| MessageID string |
| Link string |
| Subject string |
| From string |
| Cc []string |
| Body string // text/plain part |
| Patch string // attached patch, if any |
| Command string // command to bot (#syz is stripped) |
| CommandArgs string // arguments for the command |
| } |
| |
| const commandPrefix = "#syz " |
| |
| var groupsLinkRe = regexp.MustCompile("\nTo view this discussion on the web visit" + |
| " (https://groups\\.google\\.com/.*?)\\.(?:\r)?\n") |
| |
| func Parse(r io.Reader, ownEmails []string) (*Email, error) { |
| msg, err := mail.ReadMessage(r) |
| if err != nil { |
| return nil, fmt.Errorf("failed to read email: %v", err) |
| } |
| from, err := msg.Header.AddressList("From") |
| if err != nil { |
| return nil, fmt.Errorf("failed to parse email header 'From': %v", err) |
| } |
| if len(from) == 0 { |
| return nil, fmt.Errorf("failed to parse email header 'To': no senders") |
| } |
| // Ignore errors since To: header may not be present (we've seen such case). |
| to, _ := msg.Header.AddressList("To") |
| // AddressList fails if the header is not present. |
| cc, _ := msg.Header.AddressList("Cc") |
| bugID := "" |
| var ccList []string |
| ownAddrs := make(map[string]bool) |
| for _, email := range ownEmails { |
| ownAddrs[email] = true |
| if addr, err := mail.ParseAddress(email); err == nil { |
| ownAddrs[addr.Address] = true |
| } |
| } |
| fromMe := false |
| for _, addr := range from { |
| cleaned, _, _ := RemoveAddrContext(addr.Address) |
| if addr, err := mail.ParseAddress(cleaned); err == nil && ownAddrs[addr.Address] { |
| fromMe = true |
| } |
| } |
| for _, addr := range append(append(cc, to...), from...) { |
| cleaned, context, _ := RemoveAddrContext(addr.Address) |
| if addr, err := mail.ParseAddress(cleaned); err == nil { |
| cleaned = addr.Address |
| } |
| if ownAddrs[cleaned] { |
| if bugID == "" { |
| bugID = context |
| } |
| } else { |
| ccList = append(ccList, cleaned) |
| } |
| } |
| ccList = MergeEmailLists(ccList) |
| body, attachments, err := parseBody(msg.Body, msg.Header) |
| if err != nil { |
| return nil, err |
| } |
| bodyStr := string(body) |
| patch, cmd, cmdArgs := "", "", "" |
| if !fromMe { |
| for _, a := range attachments { |
| _, patch, _ = ParsePatch(string(a)) |
| if patch != "" { |
| break |
| } |
| } |
| if patch == "" { |
| _, patch, _ = ParsePatch(bodyStr) |
| } |
| cmd, cmdArgs = extractCommand(body) |
| } |
| link := "" |
| if match := groupsLinkRe.FindStringSubmatchIndex(bodyStr); match != nil { |
| link = bodyStr[match[2]:match[3]] |
| } |
| email := &Email{ |
| BugID: bugID, |
| MessageID: msg.Header.Get("Message-ID"), |
| Link: link, |
| Subject: msg.Header.Get("Subject"), |
| From: from[0].String(), |
| Cc: ccList, |
| Body: string(body), |
| Patch: patch, |
| Command: cmd, |
| CommandArgs: cmdArgs, |
| } |
| return email, nil |
| } |
| |
| // AddAddrContext embeds context into local part of the provided email address using '+'. |
| // Returns the resulting email address. |
| func AddAddrContext(email, context string) (string, error) { |
| addr, err := mail.ParseAddress(email) |
| if err != nil { |
| return "", fmt.Errorf("failed to parse %q as email: %v", email, err) |
| } |
| at := strings.IndexByte(addr.Address, '@') |
| if at == -1 { |
| return "", fmt.Errorf("failed to parse %q as email: no @", email) |
| } |
| result := addr.Address[:at] + "+" + context + addr.Address[at:] |
| if addr.Name != "" { |
| addr.Address = result |
| result = addr.String() |
| } |
| return result, nil |
| } |
| |
| // RemoveAddrContext extracts context after '+' from the local part of the provided email address. |
| // Returns address without the context and the context. |
| func RemoveAddrContext(email string) (string, string, error) { |
| addr, err := mail.ParseAddress(email) |
| if err != nil { |
| return "", "", fmt.Errorf("failed to parse %q as email: %v", email, err) |
| } |
| at := strings.IndexByte(addr.Address, '@') |
| if at == -1 { |
| return "", "", fmt.Errorf("failed to parse %q as email: no @", email) |
| } |
| plus := strings.LastIndexByte(addr.Address[:at], '+') |
| if plus == -1 { |
| return email, "", nil |
| } |
| context := addr.Address[plus+1 : at] |
| addr.Address = addr.Address[:plus] + addr.Address[at:] |
| return addr.String(), context, nil |
| } |
| |
| func CanonicalEmail(email string) string { |
| addr, err := mail.ParseAddress(email) |
| if err != nil { |
| return email |
| } |
| at := strings.IndexByte(addr.Address, '@') |
| if at == -1 { |
| return email |
| } |
| if plus := strings.IndexByte(addr.Address[:at], '+'); plus != -1 { |
| addr.Address = addr.Address[:plus] + addr.Address[at:] |
| } |
| return strings.ToLower(addr.Address) |
| } |
| |
| // extractCommand extracts command to syzbot from email body. |
| // Commands are of the following form: |
| // ^#syz cmd args... |
| func extractCommand(body []byte) (cmd, args string) { |
| cmdPos := bytes.Index(append([]byte{'\n'}, body...), []byte("\n"+commandPrefix)) |
| if cmdPos == -1 { |
| return |
| } |
| cmdPos += len(commandPrefix) |
| for cmdPos < len(body) && body[cmdPos] == ' ' { |
| cmdPos++ |
| } |
| cmdEnd := bytes.IndexByte(body[cmdPos:], '\n') |
| if cmdEnd == -1 { |
| cmdEnd = len(body) - cmdPos |
| } |
| if cmdEnd1 := bytes.IndexByte(body[cmdPos:], '\r'); cmdEnd1 != -1 && cmdEnd1 < cmdEnd { |
| cmdEnd = cmdEnd1 |
| } |
| if cmdEnd1 := bytes.IndexByte(body[cmdPos:], ' '); cmdEnd1 != -1 && cmdEnd1 < cmdEnd { |
| cmdEnd = cmdEnd1 |
| } |
| cmd = string(body[cmdPos : cmdPos+cmdEnd]) |
| // Some email clients split text emails at 80 columns are the transformation is irrevesible. |
| // We try hard to restore what was there before. |
| // For "test:" command we know that there must be 2 tokens without spaces. |
| // For "fix:"/"dup:" we need a whole non-empty line of text. |
| switch cmd { |
| case "test:": |
| args = extractArgsTokens(body[cmdPos+cmdEnd:], 2) |
| case "test_5_arg_cmd": |
| args = extractArgsTokens(body[cmdPos+cmdEnd:], 5) |
| case "fix:", "dup:": |
| args = extractArgsLine(body[cmdPos+cmdEnd:]) |
| } |
| return |
| } |
| |
| func extractArgsTokens(body []byte, num int) string { |
| var args []string |
| for pos := 0; len(args) < num && pos < len(body); { |
| lineEnd := bytes.IndexByte(body[pos:], '\n') |
| if lineEnd == -1 { |
| lineEnd = len(body) - pos |
| } |
| line := strings.TrimSpace(string(body[pos : pos+lineEnd])) |
| for { |
| line1 := strings.Replace(line, " ", " ", -1) |
| if line == line1 { |
| break |
| } |
| line = line1 |
| } |
| if line != "" { |
| args = append(args, strings.Split(line, " ")...) |
| } |
| pos += lineEnd + 1 |
| } |
| return strings.TrimSpace(strings.Join(args, " ")) |
| } |
| |
| func extractArgsLine(body []byte) string { |
| pos := 0 |
| for pos < len(body) && (body[pos] == ' ' || body[pos] == '\t' || |
| body[pos] == '\n' || body[pos] == '\r') { |
| pos++ |
| } |
| lineEnd := bytes.IndexByte(body[pos:], '\n') |
| if lineEnd == -1 { |
| lineEnd = len(body) - pos |
| } |
| return strings.TrimSpace(string(body[pos : pos+lineEnd])) |
| } |
| |
| func parseBody(r io.Reader, headers mail.Header) ([]byte, [][]byte, error) { |
| // git-send-email sends emails without Content-Type, let's assume it's text. |
| mediaType := "text/plain" |
| var params map[string]string |
| if contentType := headers.Get("Content-Type"); contentType != "" { |
| var err error |
| mediaType, params, err = mime.ParseMediaType(headers.Get("Content-Type")) |
| if err != nil { |
| return nil, nil, fmt.Errorf("failed to parse email header 'Content-Type': %v", err) |
| } |
| } |
| switch strings.ToLower(headers.Get("Content-Transfer-Encoding")) { |
| case "quoted-printable": |
| r = quotedprintable.NewReader(r) |
| case "base64": |
| r = base64.NewDecoder(base64.StdEncoding, r) |
| } |
| disp, _, _ := mime.ParseMediaType(headers.Get("Content-Disposition")) |
| if disp == "attachment" { |
| attachment, err := ioutil.ReadAll(r) |
| if err != nil { |
| return nil, nil, fmt.Errorf("failed to read email body: %v", err) |
| } |
| return nil, [][]byte{attachment}, nil |
| } |
| if mediaType == "text/plain" { |
| body, err := ioutil.ReadAll(r) |
| if err != nil { |
| return nil, nil, fmt.Errorf("failed to read email body: %v", err) |
| } |
| return body, nil, nil |
| } |
| if !strings.HasPrefix(mediaType, "multipart/") { |
| return nil, nil, nil |
| } |
| var body []byte |
| var attachments [][]byte |
| mr := multipart.NewReader(r, params["boundary"]) |
| for { |
| p, err := mr.NextPart() |
| if err == io.EOF { |
| return body, attachments, nil |
| } |
| if err != nil { |
| return nil, nil, fmt.Errorf("failed to parse MIME parts: %v", err) |
| } |
| body1, attachments1, err1 := parseBody(p, mail.Header(p.Header)) |
| if err1 != nil { |
| return nil, nil, err1 |
| } |
| if body == nil { |
| body = body1 |
| } |
| attachments = append(attachments, attachments1...) |
| } |
| } |
| |
| // MergeEmailLists merges several email lists removing duplicates and invalid entries. |
| func MergeEmailLists(lists ...[]string) []string { |
| const ( |
| maxEmailLen = 1000 |
| maxEmails = 50 |
| ) |
| merged := make(map[string]bool) |
| for _, list := range lists { |
| for _, email := range list { |
| addr, err := mail.ParseAddress(email) |
| if err != nil || len(addr.Address) > maxEmailLen { |
| continue |
| } |
| merged[addr.Address] = true |
| } |
| } |
| var result []string |
| for e := range merged { |
| result = append(result, e) |
| } |
| sort.Strings(result) |
| if len(result) > maxEmails { |
| result = result[:maxEmails] |
| } |
| return result |
| } |