When doing Linux mmapped capture: Allocate a buffer into which to copy a packet, and have the callback for pcap_next() and pcap_next_ex() copy to that buffer and return a pointer to that buffer; we can't return the packet data pointer passed to the callback, as, once the callback returns, that buffer can be overwritten, even before you read the next packet. Don't tweak filter programs passed into the kernel to return 65535 on success - we don't have to, as we're not reading packets with recvfrom(), and we don't want to, as, if we return the actual snapshot length, the kernel will copy less data to the ring buffer. Truncate the packet snapshot length to the specified length, as we might not have a filter to do that.

commit: 34e950492a8b40673297d0888fafc4f94689cd29 [log] [tgz]
author: Guy Harris <gharris@steve.local> Thu Jul 16 15:08:12 2009 -0700
committer: Guy Harris <gharris@steve.local> Thu Jul 16 15:08:12 2009 -0700
tree: 8fecd3a640ef275f75d5906ae3a74feaa4fafe8d
parent: 7b6487a8a5018eb9e0e37bfcfe75c319d917ffed [diff]
diff --git a/pcap-int.h b/pcap-int.h
index 2d4b9bf..b127a00 100644
--- a/pcap-int.h
+++ b/pcap-int.h

@@ -138,6 +138,7 @@
 	size_t	mmapbuflen;	/* size of region */
 	u_int	tp_version;	/* version of tpacket_hdr for mmaped ring */
 	u_int	tp_hdrlen;	/* hdrlen of tpacket_hdr for mmaped ring */
+	u_char	*oneshot_buffer; /* buffer for copy of packet */
 #endif /* linux */
 
 #ifdef HAVE_DAG_API
@@ -290,6 +291,11 @@
 	setnonblock_op_t setnonblock_op;
 	stats_op_t stats_op;
 
+	/*
+	 * Routine to use as callback for pcap_next()/pcap_next_ex().
+	 */
+	pcap_handler oneshot_callback;
+
 #ifdef WIN32
 	/*
 	 * These are, at least currently, specific to the Win32 NPF
@@ -382,6 +388,16 @@
     unsigned char pkt_type;
 };
 
+/*
+ * User data structure for the one-shot callback used for pcap_next()
+ * and pcap_next_ex().
+ */
+struct oneshot_userdata {
+	struct pcap_pkthdr *hdr;
+	const u_char **pkt;
+	pcap_t *pd;
+};
+
 int	yylex(void);
 
 #ifndef min

diff --git a/pcap-linux.c b/pcap-linux.c
index 525d2f6..5aced44 100644
--- a/pcap-linux.c
+++ b/pcap-linux.c

@@ -312,6 +312,8 @@
 static int pcap_setfilter_linux_mmap(pcap_t *, struct bpf_program *);
 static int pcap_setnonblock_mmap(pcap_t *p, int nonblock, char *errbuf);
 static int pcap_getnonblock_mmap(pcap_t *p, char *errbuf);
+static void pcap_oneshot_mmap(u_char *user, const struct pcap_pkthdr *h,
+    const u_char *bytes);
 #endif
 
 /*
@@ -333,7 +335,8 @@
 static int 	iface_bind_old(int fd, const char *device, char *ebuf);
 
 #ifdef SO_ATTACH_FILTER
-static int	fix_program(pcap_t *handle, struct sock_fprog *fcode);
+static int	fix_program(pcap_t *handle, struct sock_fprog *fcode,
+    int is_mapped);
 static int	fix_offset(struct bpf_insn *p);
 static int	set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode);
 static int	reset_kernel_filter(pcap_t *handle);
@@ -1692,7 +1695,8 @@
  *  Attach the given BPF code to the packet capture device.
  */
 static int
-pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
+pcap_setfilter_linux_common(pcap_t *handle, struct bpf_program *filter,
+    int is_mmapped)
 {
 #ifdef SO_ATTACH_FILTER
 	struct sock_fprog	fcode;
@@ -1745,13 +1749,13 @@
 		 *
 		 * Oh, and we also need to fix it up so that all "ret"
 		 * instructions with non-zero operands have 65535 as the
-		 * operand, and so that, if we're in cooked mode, all
-		 * memory-reference instructions use special magic offsets
-		 * in references to the link-layer header and assume that
-		 * the link-layer payload begins at 0; "fix_program()"
-		 * will do that.
+		 * operand if we're not capturing in memory-mapped modee,
+		 * and so that, if we're in cooked mode, all memory-reference
+		 * instructions use special magic offsets in references to
+		 * the link-layer header and assume that the link-layer
+		 * payload begins at 0; "fix_program()" will do that.
 		 */
-		switch (fix_program(handle, &fcode)) {
+		switch (fix_program(handle, &fcode, is_mmapped)) {
 
 		case -1:
 		default:
@@ -1825,6 +1829,13 @@
 	return 0;
 }
 
+static int
+pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
+{
+	return pcap_setfilter_linux_common(handle, filter, 0);
+}
+
+
 /*
  * Set direction flag: Which packets do we accept on a forwarding
  * single device? IN, OUT or both?
@@ -2495,16 +2506,32 @@
 #ifdef HAVE_PACKET_RING
 	int ret;
 
+	/*
+	 * Attempt to allocate a buffer to hold the contents of one
+	 * packet, for use by the oneshot callback.
+	 */
+	handle->md.oneshot_buffer = malloc(handle->snapshot);
+	if (handle->md.oneshot_buffer == NULL) {
+		snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
+			 "can't allocate oneshot buffer: %s",
+			 pcap_strerror(errno));
+		return PCAP_ERROR;
+	}
+
 	if (handle->opt.buffer_size == 0) {
 		/* by default request 2M for the ring buffer */
 		handle->opt.buffer_size = 2*1024*1024;
 	}
 	ret = prepare_tpacket_socket(handle);
-	if (ret != 1)
+	if (ret != 1) {
+		free(handle->md.oneshot_buffer);
 		return ret;
+	}
 	ret = create_ring(handle);
-	if (ret != 1)
+	if (ret != 1) {
+		free(handle->md.oneshot_buffer);
 		return ret;
+	}
 
 	/* override some defaults and inherit the other fields from
 	 * activate_new
@@ -2515,6 +2542,7 @@
 	handle->setfilter_op = pcap_setfilter_linux_mmap;
 	handle->setnonblock_op = pcap_setnonblock_mmap;
 	handle->getnonblock_op = pcap_getnonblock_mmap;
+	handle->oneshot_callback = pcap_oneshot_mmap;
 	handle->selectable_fd = handle->fd;
 	return 1;
 #else /* HAVE_PACKET_RING */
@@ -2695,10 +2723,43 @@
 	}
 }
 
+/*
+ * Special one-shot callback, used for pcap_next() and pcap_next_ex(),
+ * for Linux mmapped capture.
+ *
+ * The problem is that pcap_next() and pcap_next_ex() expect the packet
+ * data handed to the callback to be valid after the callback returns,
+ * but pcap_read_linux_mmap() has to release that packet as soon as
+ * the callback returns (otherwise, the kernel thinks there's still
+ * at least one unprocessed packet available in the ring, so a select()
+ * will immediately return indicating that there's data to process), so,
+ * in the callback, we have to make a copy of the packet.
+ *
+ * Yes, this means that, if the capture is using the ring buffer, using
+ * pcap_next() or pcap_next_ex() requires more copies than using
+ * pcap_loop() or pcap_dispatch().  If that bothers you, don't use
+ * pcap_next() or pcap_next_ex().
+ */
+static void
+pcap_oneshot_mmap(u_char *user, const struct pcap_pkthdr *h,
+    const u_char *bytes)
+{
+	struct pkt_for_oneshot *sp = (struct pkt_for_oneshot *)user;
+	bpf_u_int32 copylen;
+
+	*sp->hdr = *h;
+	memcpy(sp->pd->md.oneshot_buffer, bytes, h->caplen);
+	*sp->pkt = sp->pd->md.oneshot_buffer;
+}
+    
 static void
 pcap_cleanup_linux_mmap( pcap_t *handle )
 {
 	destroy_ring(handle);
+	if (handle->md.oneshot_buffer != NULL) {
+		free(handle->md.oneshot_buffer);
+		handle->md.oneshot_buffer = NULL;
+	}
 	pcap_cleanup_linux(handle);
 }
 
@@ -2957,6 +3018,18 @@
 		}
 #endif
 
+		/*
+		 * The only way to tell the kernel to cut off the
+		 * packet at a snapshot length is with a filter program;
+		 * if there's no filter program, the kernel won't cut
+		 * the packet off.
+		 *
+		 * Trim the snapshot length to be no longer than the
+		 * specified snapshot length.
+		 */
+		if (pcaphdr.snaplen > handle->snapshot)
+			pcaphdr.snaplen = handle->snapshot;
+
 		/* pass the packet to the user */
 		pkts++;
 		callback(user, &pcaphdr, bp);
@@ -2990,7 +3063,15 @@
 pcap_setfilter_linux_mmap(pcap_t *handle, struct bpf_program *filter)
 {
 	int n, offset;
-	int ret = pcap_setfilter_linux(handle, filter);
+	int ret;
+
+	/*
+	 * Don't rewrite "ret" instructions; we don't need to, as
+	 * we're not reading packets with recvmsg(), and we don't
+	 * want to, as, by not rewriting them, the kernel can avoid
+	 * copying extra data.
+	 */
+	ret = pcap_setfilter_linux_common(handle, filter, 1);
 	if (ret < 0)
 		return ret;
 
@@ -4024,7 +4105,7 @@
 
 #ifdef SO_ATTACH_FILTER
 static int
-fix_program(pcap_t *handle, struct sock_fprog *fcode)
+fix_program(pcap_t *handle, struct sock_fprog *fcode, int is_mmapped)
 {
 	size_t prog_size;
 	register int i;
@@ -4057,26 +4138,33 @@
 
 		case BPF_RET:
 			/*
-			 * It's a return instruction; is the snapshot
-			 * length a constant, rather than the contents
-			 * of the accumulator?
+			 * It's a return instruction; are we capturing
+			 * in memory-mapped mode?
 			 */
-			if (BPF_MODE(p->code) == BPF_K) {
+			if (!is_mmapped) {
 				/*
-				 * Yes - if the value to be returned,
-				 * i.e. the snapshot length, is anything
-				 * other than 0, make it 65535, so that
-				 * the packet is truncated by "recvfrom()",
-				 * not by the filter.
-				 *
-				 * XXX - there's nothing we can easily do
-				 * if it's getting the value from the
-				 * accumulator; we'd have to insert
-				 * code to force non-zero values to be
-				 * 65535.
+				 * No; is the snapshot length a constant,
+				 * rather than the contents of the
+				 * accumulator?
 				 */
-				if (p->k != 0)
-					p->k = 65535;
+				if (BPF_MODE(p->code) == BPF_K) {
+					/*
+					 * Yes - if the value to be returned,
+					 * i.e. the snapshot length, is
+					 * anything other than 0, make it
+					 * 65535, so that the packet is
+					 * truncated by "recvfrom()",
+					 * not by the filter.
+					 *
+					 * XXX - there's nothing we can
+					 * easily do if it's getting the
+					 * value from the accumulator; we'd
+					 * have to insert code to force
+					 * non-zero values to be 65535.
+					 */
+					if (p->k != 0)
+						p->k = 65535;
+				}
 			}
 			break;
 

diff --git a/pcap.c b/pcap.c
index 0adbe34..a82a538 100644
--- a/pcap.c
+++ b/pcap.c

@@ -96,6 +96,84 @@
 	return (0);
 }
 
+/*
+ * Default one-shot callback; overridden for capture types where the
+ * packet data cannot be guaranteed to be available after the callback
+ * returns, so that a copy must be made.
+ */
+static void
+pcap_oneshot(u_char *user, const struct pcap_pkthdr *h, const u_char *pkt)
+{
+	struct oneshot_userdata *sp = (struct oneshot_userdata *)user;
+
+	*sp->hdr = *h;
+	*sp->pkt = pkt;
+}
+
+const u_char *
+pcap_next(pcap_t *p, struct pcap_pkthdr *h)
+{
+	struct oneshot_userdata s;
+	const u_char *pkt;
+
+	s.hdr = h;
+	s.pkt = &pkt;
+	s.pd = p;
+	if (pcap_dispatch(p, 1, p->oneshot_callback, (u_char *)&s) <= 0)
+		return (0);
+	return (pkt);
+}
+
+int 
+pcap_next_ex(pcap_t *p, struct pcap_pkthdr **pkt_header,
+    const u_char **pkt_data)
+{
+	struct oneshot_userdata s;
+
+	s.hdr = &p->pcap_header;
+	s.pkt = pkt_data;
+	s.pd = p;
+
+	/* Saves a pointer to the packet headers */
+	*pkt_header= &p->pcap_header;
+
+	if (p->sf.rfile != NULL) {
+		int status;
+
+		/* We are on an offline capture */
+		status = pcap_offline_read(p, 1, pcap_oneshot,
+		    (u_char *)&s);
+
+		/*
+		 * Return codes for pcap_offline_read() are:
+		 *   -  0: EOF
+		 *   - -1: error
+		 *   - >1: OK
+		 * The first one ('0') conflicts with the return code of
+		 * 0 from pcap_read() meaning "no packets arrived before
+		 * the timeout expired", so we map it to -2 so you can
+		 * distinguish between an EOF from a savefile and a
+		 * "no packets arrived before the timeout expired, try
+		 * again" from a live capture.
+		 */
+		if (status == 0)
+			return (-2);
+		else
+			return (status);
+	}
+
+	/*
+	 * Return codes for pcap_read() are:
+	 *   -  0: timeout
+	 *   - -1: error
+	 *   - -2: loop was broken out of with pcap_breakloop()
+	 *   - >1: OK
+	 * The first one ('0') conflicts with the return code of 0 from
+	 * pcap_offline_read() meaning "end of file".
+	*/
+	return (p->read_op(p, 1, pcap_oneshot, (u_char *)&s));
+}
+
 pcap_t *
 pcap_create_common(const char *source, char *ebuf)
 {
@@ -147,6 +225,12 @@
 #endif
 	p->cleanup_op = pcap_cleanup_live_common;
 
+	/*
+	 * In most cases, the standard one-short callback can
+	 * be used for pcap_next()/pcap_next_ex().
+	 */
+	p->oneshot_callback = pcap_oneshot;
+
 	/* put in some defaults*/
 	pcap_set_timeout(p, 0);
 	pcap_set_snaplen(p, 65535);	/* max packet size */
@@ -312,95 +396,6 @@
 	}
 }
 
-struct singleton {
-	struct pcap_pkthdr *hdr;
-	const u_char *pkt;
-};
-
-
-static void
-pcap_oneshot(u_char *userData, const struct pcap_pkthdr *h, const u_char *pkt)
-{
-	struct singleton *sp = (struct singleton *)userData;
-	*sp->hdr = *h;
-	sp->pkt = pkt;
-}
-
-const u_char *
-pcap_next(pcap_t *p, struct pcap_pkthdr *h)
-{
-	struct singleton s;
-
-	s.hdr = h;
-	if (pcap_dispatch(p, 1, pcap_oneshot, (u_char*)&s) <= 0)
-		return (0);
-	return (s.pkt);
-}
-
-struct pkt_for_fakecallback {
-	struct pcap_pkthdr *hdr;
-	const u_char **pkt;
-};
-
-static void
-pcap_fakecallback(u_char *userData, const struct pcap_pkthdr *h,
-    const u_char *pkt)
-{
-	struct pkt_for_fakecallback *sp = (struct pkt_for_fakecallback *)userData;
-
-	*sp->hdr = *h;
-	*sp->pkt = pkt;
-}
-
-int 
-pcap_next_ex(pcap_t *p, struct pcap_pkthdr **pkt_header,
-    const u_char **pkt_data)
-{
-	struct pkt_for_fakecallback s;
-
-	s.hdr = &p->pcap_header;
-	s.pkt = pkt_data;
-
-	/* Saves a pointer to the packet headers */
-	*pkt_header= &p->pcap_header;
-
-	if (p->sf.rfile != NULL) {
-		int status;
-
-		/* We are on an offline capture */
-		status = pcap_offline_read(p, 1, pcap_fakecallback,
-		    (u_char *)&s);
-
-		/*
-		 * Return codes for pcap_offline_read() are:
-		 *   -  0: EOF
-		 *   - -1: error
-		 *   - >1: OK
-		 * The first one ('0') conflicts with the return code of
-		 * 0 from pcap_read() meaning "no packets arrived before
-		 * the timeout expired", so we map it to -2 so you can
-		 * distinguish between an EOF from a savefile and a
-		 * "no packets arrived before the timeout expired, try
-		 * again" from a live capture.
-		 */
-		if (status == 0)
-			return (-2);
-		else
-			return (status);
-	}
-
-	/*
-	 * Return codes for pcap_read() are:
-	 *   -  0: timeout
-	 *   - -1: error
-	 *   - -2: loop was broken out of with pcap_breakloop()
-	 *   - >1: OK
-	 * The first one ('0') conflicts with the return code of 0 from
-	 * pcap_offline_read() meaning "end of file".
-	*/
-	return (p->read_op(p, 1, pcap_fakecallback, (u_char *)&s));
-}
-
 /*
  * Force the loop in "pcap_read()" or "pcap_read_offline()" to terminate.
  */
commit	34e950492a8b40673297d0888fafc4f94689cd29	[log] [tgz]
author	Guy Harris <gharris@steve.local>	Thu Jul 16 15:08:12 2009 -0700
committer	Guy Harris <gharris@steve.local>	Thu Jul 16 15:08:12 2009 -0700
tree	8fecd3a640ef275f75d5906ae3a74feaa4fafe8d
parent	7b6487a8a5018eb9e0e37bfcfe75c319d917ffed [diff]