Add #[inline] to allow for cross-crate inlining
diff --git a/src/lib.rs b/src/lib.rs
index 174a22b..59ba692 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -47,6 +47,7 @@
 
 mod new;
 
+#[inline]
 fn extend_sign(val: u64, nbytes: usize) -> i64 {
     let shift  = (8 - nbytes) * 8;
     (val << shift) as i64 >> shift
@@ -122,6 +123,7 @@
     /// Reads a signed 16 bit integer from `buf`.
     ///
     /// Panics when `buf.len() < 2`.
+    #[inline]
     fn read_i16(buf: &[u8]) -> i16 {
         Self::read_u16(buf) as i16
     }
@@ -129,6 +131,7 @@
     /// Reads a signed 32 bit integer from `buf`.
     ///
     /// Panics when `buf.len() < 4`.
+    #[inline]
     fn read_i32(buf: &[u8]) -> i32 {
         Self::read_u32(buf) as i32
     }
@@ -136,6 +139,7 @@
     /// Reads a signed 64 bit integer from `buf`.
     ///
     /// Panics when `buf.len() < 8`.
+    #[inline]
     fn read_i64(buf: &[u8]) -> i64 {
         Self::read_u64(buf) as i64
     }
@@ -144,6 +148,7 @@
     ///
     /// Panics when `nbytes < 1` or `nbytes > 8` or
     /// `buf.len() < nbytes`
+    #[inline]
     fn read_int(buf: &[u8], nbytes: usize) -> i64 {
         extend_sign(Self::read_uint(buf, nbytes), nbytes)
     }
@@ -151,6 +156,7 @@
     /// Reads a IEEE754 single-precision (4 bytes) floating point number.
     ///
     /// Panics when `buf.len() < 4`.
+    #[inline]
     fn read_f32(buf: &[u8]) -> f32 {
         unsafe { transmute(Self::read_u32(buf)) }
     }
@@ -158,6 +164,7 @@
     /// Reads a IEEE754 double-precision (8 bytes) floating point number.
     ///
     /// Panics when `buf.len() < 8`.
+    #[inline]
     fn read_f64(buf: &[u8]) -> f64 {
         unsafe { transmute(Self::read_u64(buf)) }
     }
@@ -165,6 +172,7 @@
     /// Writes a signed 16 bit integer `n` to `buf`.
     ///
     /// Panics when `buf.len() < 2`.
+    #[inline]
     fn write_i16(buf: &mut [u8], n: i16) {
         Self::write_u16(buf, n as u16)
     }
@@ -172,6 +180,7 @@
     /// Writes a signed 32 bit integer `n` to `buf`.
     ///
     /// Panics when `buf.len() < 4`.
+    #[inline]
     fn write_i32(buf: &mut [u8], n: i32) {
         Self::write_u32(buf, n as u32)
     }
@@ -179,6 +188,7 @@
     /// Writes a signed 64 bit integer `n` to `buf`.
     ///
     /// Panics when `buf.len() < 8`.
+    #[inline]
     fn write_i64(buf: &mut [u8], n: i64) {
         Self::write_u64(buf, n as u64)
     }
@@ -186,6 +196,7 @@
     /// Writes a IEEE754 single-precision (4 bytes) floating point number.
     ///
     /// Panics when `buf.len() < 4`.
+    #[inline]
     fn write_f32(buf: &mut [u8], n: f32) {
         Self::write_u32(buf, unsafe { transmute(n) })
     }
@@ -193,6 +204,7 @@
     /// Writes a IEEE754 double-precision (8 bytes) floating point number.
     ///
     /// Panics when `buf.len() < 8`.
+    #[inline]
     fn write_f64(buf: &mut [u8], n: f64) {
         Self::write_u64(buf, unsafe { transmute(n) })
     }
@@ -270,60 +282,74 @@
 }
 
 impl ByteOrder for BigEndian {
+    #[inline]
     fn read_u16(buf: &[u8]) -> u16 {
         read_num_bytes!(u16, 2, buf, to_be)
     }
 
+    #[inline]
     fn read_u32(buf: &[u8]) -> u32 {
         read_num_bytes!(u32, 4, buf, to_be)
     }
 
+    #[inline]
     fn read_u64(buf: &[u8]) -> u64 {
         read_num_bytes!(u64, 8, buf, to_be)
     }
 
+    #[inline]
     fn read_uint(buf: &[u8], nbytes: usize) -> u64 {
         read_num_bytes!(u64, 8, be nbytes, buf, to_be)
     }
 
+    #[inline]
     fn write_u16(buf: &mut [u8], n: u16) {
         write_num_bytes!(u16, 2, n, buf, to_be);
     }
 
+    #[inline]
     fn write_u32(buf: &mut [u8], n: u32) {
         write_num_bytes!(u32, 4, n, buf, to_be);
     }
 
+    #[inline]
     fn write_u64(buf: &mut [u8], n: u64) {
         write_num_bytes!(u64, 8, n, buf, to_be);
     }
 }
 
 impl ByteOrder for LittleEndian {
+    #[inline]
     fn read_u16(buf: &[u8]) -> u16 {
         read_num_bytes!(u16, 2, buf, to_le)
     }
 
+    #[inline]
     fn read_u32(buf: &[u8]) -> u32 {
         read_num_bytes!(u32, 4, buf, to_le)
     }
 
+    #[inline]
     fn read_u64(buf: &[u8]) -> u64 {
         read_num_bytes!(u64, 8, buf, to_le)
     }
 
+    #[inline]
     fn read_uint(buf: &[u8], nbytes: usize) -> u64 {
         read_num_bytes!(u64, 8, le nbytes, buf, to_le)
     }
 
+    #[inline]
     fn write_u16(buf: &mut [u8], n: u16) {
         write_num_bytes!(u16, 2, n, buf, to_le);
     }
 
+    #[inline]
     fn write_u32(buf: &mut [u8], n: u32) {
         write_num_bytes!(u32, 4, n, buf, to_le);
     }
 
+    #[inline]
     fn write_u64(buf: &mut [u8], n: u64) {
         write_num_bytes!(u64, 8, n, buf, to_le);
     }
diff --git a/src/new.rs b/src/new.rs
index 03b94cb..bbef0cd 100644
--- a/src/new.rs
+++ b/src/new.rs
@@ -88,6 +88,7 @@
     ///
     /// Note that since this reads a single byte, no byte order conversions
     /// are used. It is included for completeness.
+    #[inline]
     fn read_u8(&mut self) -> Result<u8> {
         let mut buf = [0; 1];
         try!(read_full(self, &mut buf));
@@ -98,6 +99,7 @@
     ///
     /// Note that since this reads a single byte, no byte order conversions
     /// are used. It is included for completeness.
+    #[inline]
     fn read_i8(&mut self) -> Result<i8> {
         let mut buf = [0; 1];
         try!(read_full(self, &mut buf));
@@ -105,6 +107,7 @@
     }
 
     /// Reads an unsigned 16 bit integer from the underlying reader.
+    #[inline]
     fn read_u16<T: ByteOrder>(&mut self) -> Result<u16> {
         let mut buf = [0; 2];
         try!(read_full(self, &mut buf));
@@ -112,6 +115,7 @@
     }
 
     /// Reads a signed 16 bit integer from the underlying reader.
+    #[inline]
     fn read_i16<T: ByteOrder>(&mut self) -> Result<i16> {
         let mut buf = [0; 2];
         try!(read_full(self, &mut buf));
@@ -119,6 +123,7 @@
     }
 
     /// Reads an unsigned 32 bit integer from the underlying reader.
+    #[inline]
     fn read_u32<T: ByteOrder>(&mut self) -> Result<u32> {
         let mut buf = [0; 4];
         try!(read_full(self, &mut buf));
@@ -126,6 +131,7 @@
     }
 
     /// Reads a signed 32 bit integer from the underlying reader.
+    #[inline]
     fn read_i32<T: ByteOrder>(&mut self) -> Result<i32> {
         let mut buf = [0; 4];
         try!(read_full(self, &mut buf));
@@ -133,6 +139,7 @@
     }
 
     /// Reads an unsigned 64 bit integer from the underlying reader.
+    #[inline]
     fn read_u64<T: ByteOrder>(&mut self) -> Result<u64> {
         let mut buf = [0; 8];
         try!(read_full(self, &mut buf));
@@ -140,6 +147,7 @@
     }
 
     /// Reads a signed 64 bit integer from the underlying reader.
+    #[inline]
     fn read_i64<T: ByteOrder>(&mut self) -> Result<i64> {
         let mut buf = [0; 8];
         try!(read_full(self, &mut buf));
@@ -147,6 +155,7 @@
     }
 
     /// Reads an unsigned n-bytes integer from the underlying reader.
+    #[inline]
     fn read_uint<T: ByteOrder>(&mut self, nbytes: usize) -> Result<u64> {
         let mut buf = [0; 8];
         try!(read_full(self, &mut buf[..nbytes]));
@@ -154,6 +163,7 @@
     }
 
     /// Reads a signed n-bytes integer from the underlying reader.
+    #[inline]
     fn read_int<T: ByteOrder>(&mut self, nbytes: usize) -> Result<i64> {
         let mut buf = [0; 8];
         try!(read_full(self, &mut buf[..nbytes]));
@@ -162,6 +172,7 @@
 
     /// Reads a IEEE754 single-precision (4 bytes) floating point number from
     /// the underlying reader.
+    #[inline]
     fn read_f32<T: ByteOrder>(&mut self) -> Result<f32> {
         let mut buf = [0; 4];
         try!(read_full(self, &mut buf));
@@ -170,6 +181,7 @@
 
     /// Reads a IEEE754 double-precision (8 bytes) floating point number from
     /// the underlying reader.
+    #[inline]
     fn read_f64<T: ByteOrder>(&mut self) -> Result<f64> {
         let mut buf = [0; 8];
         try!(read_full(self, &mut buf));
@@ -221,6 +233,7 @@
     ///
     /// Note that since this writes a single byte, no byte order conversions
     /// are used. It is included for completeness.
+    #[inline]
     fn write_u8(&mut self, n: u8) -> Result<()> {
         write_all(self, &[n])
     }
@@ -229,11 +242,13 @@
     ///
     /// Note that since this writes a single byte, no byte order conversions
     /// are used. It is included for completeness.
+    #[inline]
     fn write_i8(&mut self, n: i8) -> Result<()> {
         write_all(self, &[n as u8])
     }
 
     /// Writes an unsigned 16 bit integer to the underlying writer.
+    #[inline]
     fn write_u16<T: ByteOrder>(&mut self, n: u16) -> Result<()> {
         let mut buf = [0; 2];
         T::write_u16(&mut buf, n);
@@ -241,6 +256,7 @@
     }
 
     /// Writes a signed 16 bit integer to the underlying writer.
+    #[inline]
     fn write_i16<T: ByteOrder>(&mut self, n: i16) -> Result<()> {
         let mut buf = [0; 2];
         T::write_i16(&mut buf, n);
@@ -248,6 +264,7 @@
     }
 
     /// Writes an unsigned 32 bit integer to the underlying writer.
+    #[inline]
     fn write_u32<T: ByteOrder>(&mut self, n: u32) -> Result<()> {
         let mut buf = [0; 4];
         T::write_u32(&mut buf, n);
@@ -255,6 +272,7 @@
     }
 
     /// Writes a signed 32 bit integer to the underlying writer.
+    #[inline]
     fn write_i32<T: ByteOrder>(&mut self, n: i32) -> Result<()> {
         let mut buf = [0; 4];
         T::write_i32(&mut buf, n);
@@ -262,6 +280,7 @@
     }
 
     /// Writes an unsigned 64 bit integer to the underlying writer.
+    #[inline]
     fn write_u64<T: ByteOrder>(&mut self, n: u64) -> Result<()> {
         let mut buf = [0; 8];
         T::write_u64(&mut buf, n);
@@ -269,6 +288,7 @@
     }
 
     /// Writes a signed 64 bit integer to the underlying writer.
+    #[inline]
     fn write_i64<T: ByteOrder>(&mut self, n: i64) -> Result<()> {
         let mut buf = [0; 8];
         T::write_i64(&mut buf, n);
@@ -277,6 +297,7 @@
 
     /// Writes a IEEE754 single-precision (4 bytes) floating point number to
     /// the underlying writer.
+    #[inline]
     fn write_f32<T: ByteOrder>(&mut self, n: f32) -> Result<()> {
         let mut buf = [0; 4];
         T::write_f32(&mut buf, n);
@@ -285,6 +306,7 @@
 
     /// Writes a IEEE754 double-precision (8 bytes) floating point number to
     /// the underlying writer.
+    #[inline]
     fn write_f64<T: ByteOrder>(&mut self, n: f64) -> Result<()> {
         let mut buf = [0; 8];
         T::write_f64(&mut buf, n);