blob: 12b6a5c4b262ee71efd02da1d44379acc85dac84 [file] [log] [blame]
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package binutils provides access to the GNU binutils.
package binutils
import (
"debug/elf"
"debug/macho"
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"sync"
"github.com/google/pprof/internal/elfexec"
"github.com/google/pprof/internal/plugin"
)
// A Binutils implements plugin.ObjTool by invoking the GNU binutils.
type Binutils struct {
mu sync.Mutex
rep *binrep
}
// binrep is an immutable representation for Binutils. It is atomically
// replaced on every mutation to provide thread-safe access.
type binrep struct {
// Commands to invoke.
llvmSymbolizer string
llvmSymbolizerFound bool
addr2line string
addr2lineFound bool
nm string
nmFound bool
objdump string
objdumpFound bool
// if fast, perform symbolization using nm (symbol names only),
// instead of file-line detail from the slower addr2line.
fast bool
}
// get returns the current representation for bu, initializing it if necessary.
func (bu *Binutils) get() *binrep {
bu.mu.Lock()
r := bu.rep
if r == nil {
r = &binrep{}
initTools(r, "")
bu.rep = r
}
bu.mu.Unlock()
return r
}
// update modifies the rep for bu via the supplied function.
func (bu *Binutils) update(fn func(r *binrep)) {
r := &binrep{}
bu.mu.Lock()
defer bu.mu.Unlock()
if bu.rep == nil {
initTools(r, "")
} else {
*r = *bu.rep
}
fn(r)
bu.rep = r
}
// String returns string representation of the binutils state for debug logging.
func (bu *Binutils) String() string {
r := bu.get()
var llvmSymbolizer, addr2line, nm, objdump string
if r.llvmSymbolizerFound {
llvmSymbolizer = r.llvmSymbolizer
}
if r.addr2lineFound {
addr2line = r.addr2line
}
if r.nmFound {
nm = r.nm
}
if r.objdumpFound {
objdump = r.objdump
}
return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
llvmSymbolizer, addr2line, nm, objdump, r.fast)
}
// SetFastSymbolization sets a toggle that makes binutils use fast
// symbolization (using nm), which is much faster than addr2line but
// provides only symbol name information (no file/line).
func (bu *Binutils) SetFastSymbolization(fast bool) {
bu.update(func(r *binrep) { r.fast = fast })
}
// SetTools processes the contents of the tools option. It
// expects a set of entries separated by commas; each entry is a pair
// of the form t:path, where cmd will be used to look only for the
// tool named t. If t is not specified, the path is searched for all
// tools.
func (bu *Binutils) SetTools(config string) {
bu.update(func(r *binrep) { initTools(r, config) })
}
func initTools(b *binrep, config string) {
// paths collect paths per tool; Key "" contains the default.
paths := make(map[string][]string)
for _, t := range strings.Split(config, ",") {
name, path := "", t
if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
name, path = ct[0], ct[1]
}
paths[name] = append(paths[name], path)
}
defaultPath := paths[""]
b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
if !b.addr2lineFound {
// On MacOS, brew installs addr2line under gaddr2line name, so search for
// that if the tool is not found by its default name.
b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
}
b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
}
// findExe looks for an executable command on a set of paths.
// If it cannot find it, returns cmd.
func findExe(cmd string, paths []string) (string, bool) {
for _, p := range paths {
cp := filepath.Join(p, cmd)
if c, err := exec.LookPath(cp); err == nil {
return c, true
}
}
return cmd, false
}
// Disasm returns the assembly instructions for the specified address range
// of a binary.
func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
b := bu.get()
cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
fmt.Sprintf("--start-address=%#x", start),
fmt.Sprintf("--stop-address=%#x", end),
file)
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("%v: %v", cmd.Args, err)
}
return disassemble(out)
}
// Open satisfies the plugin.ObjTool interface.
func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
b := bu.get()
// Make sure file is a supported executable.
// The pprof driver uses Open to sniff the difference
// between an executable and a profile.
// For now, only ELF is supported.
// Could read the first few bytes of the file and
// use a table of prefixes if we need to support other
// systems at some point.
if _, err := os.Stat(name); err != nil {
// For testing, do not require file name to exist.
if strings.Contains(b.addr2line, "testdata/") {
return &fileAddr2Line{file: file{b: b, name: name}}, nil
}
return nil, err
}
if f, err := b.openELF(name, start, limit, offset); err == nil {
return f, nil
}
if f, err := b.openMachO(name, start, limit, offset); err == nil {
return f, nil
}
return nil, fmt.Errorf("unrecognized binary: %s", name)
}
func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
of, err := macho.Open(name)
if err != nil {
return nil, fmt.Errorf("error parsing %s: %v", name, err)
}
defer of.Close()
// Subtract the load address of the __TEXT section. Usually 0 for shared
// libraries or 0x100000000 for executables. You can check this value by
// running `objdump -private-headers <file>`.
textSegment := of.Segment("__TEXT")
if textSegment == nil {
return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
}
if textSegment.Addr > start {
return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
name, textSegment.Addr, start)
}
base := start - textSegment.Addr
if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
return &fileNM{file: file{b: b, name: name, base: base}}, nil
}
return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
}
func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
ef, err := elf.Open(name)
if err != nil {
return nil, fmt.Errorf("error parsing %s: %v", name, err)
}
defer ef.Close()
var stextOffset *uint64
var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
// Reading all Symbols is expensive, and we only rarely need it so
// we don't want to do it every time. But if _stext happens to be
// page-aligned but isn't the same as Vaddr, we would symbolize
// wrong. So if the name the addresses aren't page aligned, or if
// the name is "vmlinux" we read _stext. We can be wrong if: (1)
// someone passes a kernel path that doesn't contain "vmlinux" AND
// (2) _stext is page-aligned AND (3) _stext is not at Vaddr
symbols, err := ef.Symbols()
if err != nil {
return nil, err
}
for _, s := range symbols {
if s.Name == "_stext" {
// The kernel may use _stext as the mapping start address.
stextOffset = &s.Value
break
}
}
}
base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
if err != nil {
return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
}
buildID := ""
if f, err := os.Open(name); err == nil {
if id, err := elfexec.GetBuildID(f); err == nil {
buildID = fmt.Sprintf("%x", id)
}
}
if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
return &fileNM{file: file{b, name, base, buildID}}, nil
}
return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
}
// file implements the binutils.ObjFile interface.
type file struct {
b *binrep
name string
base uint64
buildID string
}
func (f *file) Name() string {
return f.name
}
func (f *file) Base() uint64 {
return f.base
}
func (f *file) BuildID() string {
return f.buildID
}
func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
return []plugin.Frame{}, nil
}
func (f *file) Close() error {
return nil
}
func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
// Get from nm a list of symbols sorted by address.
cmd := exec.Command(f.b.nm, "-n", f.name)
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("%v: %v", cmd.Args, err)
}
return findSymbols(out, f.name, r, addr)
}
// fileNM implements the binutils.ObjFile interface, using 'nm' to map
// addresses to symbols (without file/line number information). It is
// faster than fileAddr2Line.
type fileNM struct {
file
addr2linernm *addr2LinerNM
}
func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
if f.addr2linernm == nil {
addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
if err != nil {
return nil, err
}
f.addr2linernm = addr2liner
}
return f.addr2linernm.addrInfo(addr)
}
// fileAddr2Line implements the binutils.ObjFile interface, using
// llvm-symbolizer, if that's available, or addr2line to map addresses to
// symbols (with file/line number information). It can be slow for large
// binaries with debug information.
type fileAddr2Line struct {
once sync.Once
file
addr2liner *addr2Liner
llvmSymbolizer *llvmSymbolizer
}
func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
f.once.Do(f.init)
if f.llvmSymbolizer != nil {
return f.llvmSymbolizer.addrInfo(addr)
}
if f.addr2liner != nil {
return f.addr2liner.addrInfo(addr)
}
return nil, fmt.Errorf("could not find local addr2liner")
}
func (f *fileAddr2Line) init() {
if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
f.llvmSymbolizer = llvmSymbolizer
return
}
if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
f.addr2liner = addr2liner
// When addr2line encounters some gcc compiled binaries, it
// drops interesting parts of names in anonymous namespaces.
// Fallback to NM for better function names.
if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
f.addr2liner.nm = nm
}
}
}
func (f *fileAddr2Line) Close() error {
if f.llvmSymbolizer != nil {
f.llvmSymbolizer.rw.close()
f.llvmSymbolizer = nil
}
if f.addr2liner != nil {
f.addr2liner.rw.close()
f.addr2liner = nil
}
return nil
}