How to read a pcap file generated by tcpdump that contains large UDP packets and reassemble the IP fragmented packets? - networking

I would like to read a pcap file generated by tcpdump that contains large UDP packets that have undergone IPV4 fragmentation. The original packets are of a size of around 22000 bytes.
In C++, I would use libtins with its IPV4Reassembler. Is there a way that I can do something similar in Rust?
Currently in Rust here is what I have written so far: a highly incomplete first-pass attempt (using crate pnet):
use pnet::packet::{
ethernet::{EtherTypes, EthernetPacket},
ip::IpNextHeaderProtocols,
ipv4::Ipv4Packet,
udp::UdpPacket,
Packet,
};
struct Ipv4Reassembler {
cap: pcap::Capture<pcap::Offline>,
}
impl Iterator for Ipv4Reassembler {
type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> {
let mut payload = Vec::<u8>::new();
while let Some(packet) = self.cap.next().ok() {
// todo: handle packets other than Ethernet packets
let ethernet = EthernetPacket::new(packet.data).unwrap();
match ethernet.get_ethertype() {
EtherTypes::Ipv4 => {
let ipv4_packet = Ipv4Packet::new(ethernet.payload()).unwrap();
// dbg!(&ipv4_packet);
// todo: discard incomplete packets
// todo: construct header for reassembled packet
// todo: check id, etc
let off: usize = 8 * ipv4_packet.get_fragment_offset() as usize;
let end = off + ipv4_packet.payload().len();
if payload.len() < end {
payload.resize(end, 0);
}
payload[off..end].clone_from_slice(ipv4_packet.payload());
if ipv4_packet.get_flags() & 1 == 0 {
return Some(payload);
}
}
_ => {}
}
}
None
}
}
fn main() {
let pcap_path = "os-992114000702.pcap";
let reass = Ipv4Reassembler {
cap: pcap::Capture::from_file(&pcap_path).unwrap(),
};
for payload in reass {
let udp_packet = UdpPacket::new(&payload).unwrap();
dbg!(&udp_packet);
dbg!(&udp_packet.payload().len());
}
}
In C++ here is the code I would use (using libtins):
#include <tins/ip_reassembler.h>
#include <tins/packet.h>
#include <tins/rawpdu.h>
#include <tins/sniffer.h>
#include <tins/tins.h>
#include <tins/udp.h>
#include <iostream>
#include <string>
void read_packets(const std::string &pcap_filename) {
Tins::IPv4Reassembler reassembler;
Tins::FileSniffer sniffer(pcap_filename);
while (Tins::Packet packet = sniffer.next_packet()) {
auto &pdu = *packet.pdu();
const Tins::Timestamp &timestamp = packet.timestamp();
if (reassembler.process(pdu) != Tins::IPv4Reassembler::FRAGMENTED) {
const Tins::UDP *udp = pdu.find_pdu<Tins::UDP>();
if (!udp) {
continue;
}
const Tins::RawPDU *raw = pdu.find_pdu<Tins::RawPDU>();
if (!raw) {
continue;
}
const Tins::RawPDU::payload_type &payload = raw->payload();
std::cout << "Packet: " << payload.size() << std::endl;
// do something with the reassembled packet here
}
}
}
int main() {
const std::string pcap_path = "os-992114000702.pcap";
read_packets(pcap_path);
}
g++ -O3 -o pcap pcap.cpp -ltins
It seems that one solution is to implement RFC815 but I am not sure how to do that in Rust. I have found:
this old pull request to smolltcp but it appears to have been abandoned.
Fuschia reassembly.rs but I have no idea how to use this outside of Fuschia.

Related

Implementing cloning and (de/en)capsulation of packets using eBPF

I am trying to create a TC program that will clone a packet, encapsulate it with a modified L3 header and send the clone to a different host ("Monitor host") - Can I do that using a combination of bpf_skb_adjust_room with bpf_clone_redirect?
Kernel examples do not shed too much details into this use-case (for example, here.)
My current attempt seems to be mutating the original packet:
// Represents the redirect destination.
struct destination {
__u32 destination_ip;
__u8 destination_mac[ETH_ALEN];
};
// Contains the destination to redirect traffic to.
struct bpf_map_def SEC("maps") destinations = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__u32),
.value_size = sizeof(struct destination),
.max_entries = 1,
.map_flags = BPF_F_NO_PREALLOC,
};
SEC("tc")
int tc_ingress(struct __sk_buff *skb) {
__u32 key = 0;
struct destination *dest = bpf_map_lookup_elem(&destinations, &key);
if (dest != NULL) {
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
// Necessary validation: if L3 layer does not exist, ignore and continue.
if (data + sizeof(struct ethhdr) > data_end) {
return TC_ACT_OK;
}
struct ethhdr *eth = data;
struct iphdr encapsulate_iphdr = {};
struct iphdr *original_iphdr = data + sizeof(struct ethhdr);
if ((void*) original_iphdr + sizeof(struct iphdr) > data_end) {
return TC_ACT_OK;
}
// Change the L2 destination to the provided MAC destination
// and the source to the MAC addr of the recieving host.
memcpy(&eth->h_source, &eth->h_dest, ETH_ALEN);
memcpy(&eth->h_dest, dest->destination_mac, ETH_ALEN);
// Change the L3 destination to the provided destination IP
// and the source to the ip addr of the recieving host.
memcpy(&encapsulate_iphdr.daddr, &dest->destination_ip, IPV4_ADDR_LEN);
memcpy(&encapsulate_iphdr.saddr, &original_iphdr->daddr, IPV4_ADDR_LEN);
// Adjust room for another iphdr after the L2 layer.
if (bpf_skb_adjust_room(skb, sizeof(struct iphdr), BPF_ADJ_ROOM_NET, 0)) {
return TC_ACT_OK;
}
// Store the headers at after L2 headers at the original headers offset.
unsigned long offset = (unsigned long) original_iphdr;
if (bpf_skb_store_bytes(skb, (int)offset, &encapsulate_iphdr, sizeof(struct iphdr), 0)) {
return TC_ACT_OK;
}
// route back the to egress path.
// Zero flag means that the socket buffer is
// cloned to the iface egress path.
bpf_clone_redirect(skb, skb->ifindex, 0);
}
return TC_ACT_OK;
}
I believe that's not possible within the same BPF program run today because bpf_clone_redirect will redirect the clone as soon as it's called and there is not clone helper that wouldn't redirect as well.
You could however implement this with a recirculation to the same interface. The pseudo code would look something like:
if (skb->mark == ORIGINAL_PACKET) {
skb->mark = 0;
return TC_ACT_OK;
}
skb->mark = ORIGINAL_PACKET;
bpf_clone_redirect(skb, skb->ifindex, BPF_F_INGRESS);
skb->mark = 0;
... implement changes ...
return bpf_redirect(skb, skb->ifindex, 0);

nghttp2: Using server-sent events to be use by EventSource

I'm using nghttp2 to implement a REST server which should use HTTP/2 and server-sent events (to be consumed by an EventSource in the browser). However, based on the examples it is unclear to me how to implement SSE. Using res.push() as in asio-sv.cc doesn't seem to be the right approach.
What would be the right way to do it? I'd prefer to use nghttp2's C++ API, but the C API would do as well.
Yup, I did something like that back in 2018. The documentation was rather sparse :).
First of all, ignore response::push because that's the HTTP2 push -- something for proactively sending unsolicited objects to the client before it requests them. I know it sounds like what you need, but it is not -- the typical use case would be proactively sending a CSS file and some images along with the originally requested HTML page.
The key thing is that your end() callback must eventually return NGHTTP2_ERR_DEFERRED whenever you run out of data to send. When your application somehow obtains more data to be sent, call http::response::resume().
Here's a simple code. Build it as g++ -std=c++17 -Wall -O3 -ggdb clock.cpp -lssl -lcrypto -pthread -lnghttp2_asio -lspdlog -lfmt. Be careful, modern browsers don't do HTTP/2 over a plaintext socket, so you'll need to reverse-proxy it via something like nghttpx -f '*,8080;no-tls' -b '::1,10080;;proto=h2'.
#include <boost/asio/io_service.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/signals2.hpp>
#include <chrono>
#include <list>
#include <nghttp2/asio_http2_server.h>
#define SPDLOG_FMT_EXTERNAL
#include <spdlog/spdlog.h>
#include <thread>
using namespace nghttp2::asio_http2;
using namespace std::literals;
using Signal = boost::signals2::signal<void(const std::string& message)>;
class Client {
const server::response& res;
enum State {
HasEvents,
WaitingForEvents,
};
std::atomic<State> state;
std::list<std::string> queue;
mutable std::mutex mtx;
boost::signals2::scoped_connection subscription;
size_t send_chunk(uint8_t* destination, std::size_t len, uint32_t* data_flags [[maybe_unused]])
{
std::size_t written{0};
std::lock_guard lock{mtx};
if (state != HasEvents) throw std::logic_error{std::to_string(__LINE__)};
while (!queue.empty()) {
auto num = std::min(queue.front().size(), len - written);
std::copy_n(queue.front().begin(), num, destination + written);
written += num;
if (num < queue.front().size()) {
queue.front() = queue.front().substr(num);
spdlog::debug("{} send_chunk: partial write", (void*)this);
return written;
}
queue.pop_front();
spdlog::debug("{} send_chunk: sent one event", (void*)this);
}
state = WaitingForEvents;
return written;
}
public:
Client(const server::request& req, const server::response& res, Signal& signal)
: res{res}
, state{WaitingForEvents}
, subscription{signal.connect([this](const auto& msg) {
enqueue(msg);
})}
{
spdlog::warn("{}: {} {} {}", (void*)this, boost::lexical_cast<std::string>(req.remote_endpoint()), req.method(), req.uri().raw_path);
res.write_head(200, {{"content-type", {"text/event-stream", false}}});
}
void onClose(const uint32_t ec)
{
spdlog::error("{} onClose", (void*)this);
subscription.disconnect();
}
ssize_t process(uint8_t* destination, std::size_t len, uint32_t* data_flags)
{
spdlog::trace("{} process", (void*)this);
switch (state) {
case HasEvents:
return send_chunk(destination, len, data_flags);
case WaitingForEvents:
return NGHTTP2_ERR_DEFERRED;
}
__builtin_unreachable();
}
void enqueue(const std::string& what)
{
{
std::lock_guard lock{mtx};
queue.push_back("data: " + what + "\n\n");
}
state = HasEvents;
res.resume();
}
};
int main(int argc [[maybe_unused]], char** argv [[maybe_unused]])
{
spdlog::set_level(spdlog::level::trace);
Signal sig;
std::thread timer{[&sig]() {
for (int i = 0; /* forever */; ++i) {
std::this_thread::sleep_for(std::chrono::milliseconds{666});
spdlog::info("tick: {}", i);
sig("ping #" + std::to_string(i));
}
}};
server::http2 server;
server.num_threads(4);
server.handle("/events", [&sig](const server::request& req, const server::response& res) {
auto client = std::make_shared<Client>(req, res, sig);
res.on_close([client](const auto ec) {
client->onClose(ec);
});
res.end([client](uint8_t* destination, std::size_t len, uint32_t* data_flags) {
return client->process(destination, len, data_flags);
});
});
server.handle("/", [](const auto& req, const auto& resp) {
spdlog::warn("{} {} {}", boost::lexical_cast<std::string>(req.remote_endpoint()), req.method(), req.uri().raw_path);
resp.write_head(200, {{"content-type", {"text/html", false}}});
resp.end(R"(<html><head><title>nghttp2 event stream</title></head>
<body><h1>events</h1><ul id="x"></ul>
<script type="text/javascript">
const ev = new EventSource("/events");
ev.onmessage = function(event) {
const li = document.createElement("li");
li.textContent = event.data;
document.getElementById("x").appendChild(li);
};
</script>
</body>
</html>)");
});
boost::system::error_code ec;
if (server.listen_and_serve(ec, "::", "10080")) {
return 1;
}
return 0;
}
I have a feeling that my queue handling is probably too complex. When testing via curl, I never seem to run out of buffer space. In other words, even if the client is not reading any data from the socket, the library keep invoking send_chunk, asking for up to 16kB of data at a time for me. Strange. I have no idea how it works when pushing more data more heavily.
My "real code" used to have a third state, Closed, but I think that blocking events via on_close is enough here. However, I think you never want to enter send_chunk if the client has already disconnected, but before the destructor gets called.

Using BPF/XDP with Mininet

I've created the following network topology in Mininet to run an algorithm I've implemented using the Linux kernel eXpress Data Path.
The objective is to sample packets on the incoming link s1-eth1 on Switch 1 using XDP and store metadata in a shared BPF map. The execution is successful when run on multiple VMs (instead of using Mininet to create an emulation).
However, when using XDP on Mininet (to listen on the emulated network interface), packets aren't recorded.
To further diagnose the cause, I ran Wireshark to listen on the s1-eth1 interface, which does record packets hitting the interface, but for some reason these same packets aren't being registered through the XDP pipeline.
#define KBUILD_MODNAME "foo"
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
//BPF_TABLE("percpu_array", uint32_t, long, dropcnt, 256);
BPF_HASH(proto_map, uint32_t, uint32_t, 256);
//Packet Counter to keep track of number of packets flowing through XDP
BPF_ARRAY(pkt_count, uint64_t, 1);
//Map to keep track of the current EPOCH SIZE
BPF_ARRAY(epoch_size_map, uint64_t, 1);
static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
__be32 *src, __be32 *dest)
{
struct iphdr *iph = data + nh_off;
if (iph + 1 > data_end)
return 0;
*src = iph->saddr;
*dest = iph->daddr;
return iph->protocol;
}
static inline int bitXor(int* x, int* y)
{
int a = *x & *y;
int b = ~*x & ~*y;
int z = ~a & ~b;
return z;
}
int xdp_dsa(struct CTXTYPE *ctx) {
void* data_end = (void*)(long)ctx->data_end;
void* data = (void*)(long)ctx->data;
struct ethhdr *eth = data;
// drop packets
int rc = RETURNCODE; // let pass XDP_PASS or redirect to tx via XDP_TX
uint32_t *value;
uint32_t *counter_value;
uint32_t *epoch_size;
uint16_t h_proto;
uint64_t nh_off = 0;
uint32_t ipproto;
uint64_t magic_value = 12345678;
uint32_t packet = 0;
__be32 src_ip = 0, dest_ip = 0;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
pkt_count.increment(packet);
return rc;
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_IP))
ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
/*
else if (h_proto == htons(ETH_P_IPV6))
index = parse_ipv6(data, nh_off, data_end);
*/
else
ipproto = 0; //i.e. unknown protocol
/*XOR the srcIP, destIP, and ipproto to encode, then hash*/
int xor_src_dest = bitXor(&src_ip, &dest_ip);
int xor_srcdst_ipproto = bitXor(&xor_src_dest, &ipproto);
uint32_t zero = 0;
//Predecided initial epoch size
uint32_t init_epoch_size = 10;
//Variable to store the current epoch size (to check end of epoch)
uint32_t cur_epoch_size;
//Lookup epoch size from shared map (to check whether intialized else read)
epoch_size = epoch_size_map.lookup(&zero);
// Start condition (epoch size map is initialized with zero), then set to initial epoch size
// Else read the current epoch size into a variable
if(epoch_size)
{
if(*epoch_size == 0)
{
*epoch_size = init_epoch_size;
}
else
{
cur_epoch_size = *epoch_size;
}
}
counter_value = pkt_count.lookup(&packet);
if (counter_value)
{
if (*counter_value < cur_epoch_size)
{
value = proto_map.lookup_or_init(&xor_srcdst_ipproto, &zero);
if (value)
{
pkt_count.increment(packet);
*value += 1;
}
}
else if (*counter_value == cur_epoch_size)
{
pkt_count.update(&packet, &magic_value);
}
else if(*counter_value == magic_value)
{
return rc;
}
}
return rc;
}
Any ideas?

How to write a dummy network device driver

I'm trying to write a dummy network driver and have written the code, but I'm facing issue while trying to load driver i.e. it's crashing the kernel sometimes and sometimes it doesn't respond.
Dummy device code
#include <linux/module.h>
#include <linux/netdevice.h>
int virtualNIC_open(struct net_device *dev) {
printk("virtualNIC_open called\n");
netif_start_queue(dev);
return 0;
}
int virtualNIC_release(struct net_device *dev) {
printk("virtualNIC_release called\n");
netif_stop_queue(dev);
return 0;
}
int virtualNIC_xmit(struct sk_buff *skb, struct net_device *dev) {
printk("dummy xmit function called...\n");
dev_kfree_skb(skb);
return 0;
}
int virtualNIC_init(struct net_device *dev);
const struct net_device_ops my_netdev_ops = {
.ndo_init = virtualNIC_init,
.ndo_open = virtualNIC_open,
.ndo_stop = virtualNIC_release,
.ndo_start_xmit = virtualNIC_xmit,
};
int virtualNIC_init(struct net_device *dev) {
dev->netdev_ops = &my_netdev_ops;
printk("virtualNIC device initialized\n");
}
struct net_device virtualNIC = {
.netdev_ops = &my_netdev_ops,
/* .netdev_ops.ndo_init: virtualNIC_init*/
};
int virtualNIC_init_module(void) {
int result;
strcpy(virtualNIC.name, "virtualNIC");
if((result = register_netdev(&virtualNIC))) {
printk("virtualNIC: Error %d initalizing card ...", result);
return result;
}
return 0;
}
void virtualNIC_cleanup (void)
{
printk ("<0> Cleaning Up the Module\n");
unregister_netdev (&virtualNIC);
return;
}
module_init(virtualNIC_init_module);
module_exit(virtualNIC_cleanup);
MODULE_LICENSE("GPL");
Please help me to figure, where I'm going wrong.
Thanks in Advance
There is already network dummy codec in the mainline kernel. But still if you want to write for the practice. Then I think you can proceed with your own driver as well.
I have modified some of things in your driver. I think you can give one try to it see whether you can see the dummy interface in your ifconfig or not. It is just a sample code (for the interface entry in the ifconfig) and I am not handling any kind of locking or network packet transmission or reception.
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/kernel.h>
#include <linux/etherdevice.h>
struct net_device *virtualNIC;
int virtualNIC_open(struct net_device *dev) {
printk("virtualNIC_open called\n");
return 0;
}
int virtualNIC_release(struct net_device *dev) {
printk("virtualNIC_release called\n");
netif_stop_queue(dev);
return 0;
}
int virtualNIC_xmit(struct sk_buff *skb, struct net_device *dev) {
printk("dummy xmit function called...\n");
dev_kfree_skb(skb);
return 0;
}
const struct net_device_ops my_netdev_ops = {
.ndo_init = virtualNIC_init,
.ndo_open = virtualNIC_open,
.ndo_stop = virtualNIC_release,
.ndo_start_xmit = virtualNIC_xmit,
};
int virtualNIC_init(struct net_device *dev) {
printk("virtualNIC device initialized\n");
return 0;
};
static void virtual_setup(struct net_device *dev){
dev->netdev_ops = &my_netdev_ops;
}
int virtualNIC_init_module(void) {
int result;
virtualNIC = alloc_netdev(0, "virtnC%d", virtual_setup);
if((result = register_netdev(virtualNIC))) {
printk("virtualNIC: Error %d initalizing card ...", result);
return result;
}
return 0;
}
void virtualNIC_cleanup (void)
{
printk ("<0> Cleaning Up the Module\n");
unregister_netdev (virtualNIC);
}
module_init(virtualNIC_init_module);
module_exit(virtualNIC_cleanup);
MODULE_LICENSE("GPL");
This is very helpful, I just want to add this part of code:
virtualNIC = alloc_netdev (0, "virtnC%d", NET_NAME_UNKNOWN, virtual_setup);
this has 4 parameter in new kernel...

dev_add_pack hook and memory leak

I have to write lkm, which would resend all incoming packets. Yep, I know about xt_TEE, but have to write it on my own. I've looked through some examples: http://www.phrack.org/archives/55/p55_0x0c_Building%20Into%20The%20Linux%20Network%20Layer_by_lifeline%20&%20kossak.txt (it's rather old) and http://www.xakep.ru/post/20794/default.asp?print=true (packet sniffer).
Then I've wrote my code:
//INCLUDES//////////////////////////////////////////////////////////////
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/module.h>
#include <linux/kernel.h>
//ABOUT/////////////////////////////////////////////////////////////////
MODULE_AUTHOR("");
MODULE_DESCRIPTION("");
MODULE_LICENSE("GPL");
MODULE_VERSION("0.0.1");
//DEFINES///////////////////////////////////////////////////////////////
//SYSCALLS//////////////////////////////////////////////////////////////
//PROTOTYPES/////////////////////////////////////////////////////////////
int new_hook_func(struct sk_buff *skb, struct device *dv, struct packet_type *pt);
void test();
//GLOBALS///////////////////////////////////////////////////////////////
static struct packet_type my_packet_type;
static char *dev = "eth0";
struct net_dev *d;
//INIT//////////////////////////////////////////////////////////////////
static int __init init(void)
{
printk(KERN_ALERT "module init\n");
d = dev_get_by_name(&init_net, dev);
my_packet_type.type = htons(ETH_P_ALL);
my_packet_type.func = new_hook_func;
my_packet_type.dev = d;
dev_add_pack(&my_packet_type);
return 0;
}
//EXIT//////////////////////////////////////////////////////////////////
static void __exit exit(void)
{
dev_remove_pack(&my_packet_type);
printk(KERN_ALERT "module exit");
}
////////////////////////////////////////////////////////////////////////
module_init(init);
module_exit(exit);
////////////////////////////////////////////////////////////////////////
//CORE//////////////////////////////////////////////////////////////////
int new_hook_func(struct sk_buff *skb, struct device *dv, struct packet_type *pt)
{
struct iphdr *ip;
ip = (struct iphdr*)skb_network_header(skb);
if(skb->pkt_type != PACKET_OUTGOING)
{
if(ip->version == 4 && ip->protocol == IPPROTO_ICMP)
{
struct sk_buff *my_skb = 0;
//copy incoming skb
my_skb = skb_copy_expand(skb, 16, 16, GFP_ATOMIC);
//get eth header
struct ethhdr *eth = eth_hdr(my_skb);
//push ethernet layer to skb
skb_push(my_skb, ETH_HLEN);
//set packet type to outgoing
skb->pkt_type = PACKET_OUTGOING;
//send skb struct
dev_queue_xmit(my_skb);
//drop all incoming packets
// kfree_skb(my_skb);
// kfree_skb(skb);
}
}
return NET_RX_DROP;
}
This code is supposed to resend every icmp packet recieved.
So, I've faced three problems:
1) Memory leak. Some how it leaks. I tried to comment whole hook func and there was only return, but memory was still leaking.
2) Return codes don't work. It's no matter what I return(NET_RX_DROP/NET_RX_ACCEPT/NF_DROP/NF_ACCEPT/1/0) is still recieves packets and answers to it.
3) Problems with mac-layer. As you may see in my code, I copy skb struct with expansion and have to push 14 bytes of mac-layer there. Otherwise packet will be sent without any mac bytes.
I apologize for my poor english and kindly ask for help.

Resources