Implementing cloning and (de/en)capsulation of packets using eBPF - networking

I am trying to create a TC program that will clone a packet, encapsulate it with a modified L3 header and send the clone to a different host ("Monitor host") - Can I do that using a combination of bpf_skb_adjust_room with bpf_clone_redirect?
Kernel examples do not shed too much details into this use-case (for example, here.)
My current attempt seems to be mutating the original packet:
// Represents the redirect destination.
struct destination {
__u32 destination_ip;
__u8 destination_mac[ETH_ALEN];
};
// Contains the destination to redirect traffic to.
struct bpf_map_def SEC("maps") destinations = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__u32),
.value_size = sizeof(struct destination),
.max_entries = 1,
.map_flags = BPF_F_NO_PREALLOC,
};
SEC("tc")
int tc_ingress(struct __sk_buff *skb) {
__u32 key = 0;
struct destination *dest = bpf_map_lookup_elem(&destinations, &key);
if (dest != NULL) {
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
// Necessary validation: if L3 layer does not exist, ignore and continue.
if (data + sizeof(struct ethhdr) > data_end) {
return TC_ACT_OK;
}
struct ethhdr *eth = data;
struct iphdr encapsulate_iphdr = {};
struct iphdr *original_iphdr = data + sizeof(struct ethhdr);
if ((void*) original_iphdr + sizeof(struct iphdr) > data_end) {
return TC_ACT_OK;
}
// Change the L2 destination to the provided MAC destination
// and the source to the MAC addr of the recieving host.
memcpy(&eth->h_source, &eth->h_dest, ETH_ALEN);
memcpy(&eth->h_dest, dest->destination_mac, ETH_ALEN);
// Change the L3 destination to the provided destination IP
// and the source to the ip addr of the recieving host.
memcpy(&encapsulate_iphdr.daddr, &dest->destination_ip, IPV4_ADDR_LEN);
memcpy(&encapsulate_iphdr.saddr, &original_iphdr->daddr, IPV4_ADDR_LEN);
// Adjust room for another iphdr after the L2 layer.
if (bpf_skb_adjust_room(skb, sizeof(struct iphdr), BPF_ADJ_ROOM_NET, 0)) {
return TC_ACT_OK;
}
// Store the headers at after L2 headers at the original headers offset.
unsigned long offset = (unsigned long) original_iphdr;
if (bpf_skb_store_bytes(skb, (int)offset, &encapsulate_iphdr, sizeof(struct iphdr), 0)) {
return TC_ACT_OK;
}
// route back the to egress path.
// Zero flag means that the socket buffer is
// cloned to the iface egress path.
bpf_clone_redirect(skb, skb->ifindex, 0);
}
return TC_ACT_OK;
}

I believe that's not possible within the same BPF program run today because bpf_clone_redirect will redirect the clone as soon as it's called and there is not clone helper that wouldn't redirect as well.
You could however implement this with a recirculation to the same interface. The pseudo code would look something like:
if (skb->mark == ORIGINAL_PACKET) {
skb->mark = 0;
return TC_ACT_OK;
}
skb->mark = ORIGINAL_PACKET;
bpf_clone_redirect(skb, skb->ifindex, BPF_F_INGRESS);
skb->mark = 0;
... implement changes ...
return bpf_redirect(skb, skb->ifindex, 0);

Related

How to read a pcap file generated by tcpdump that contains large UDP packets and reassemble the IP fragmented packets?

I would like to read a pcap file generated by tcpdump that contains large UDP packets that have undergone IPV4 fragmentation. The original packets are of a size of around 22000 bytes.
In C++, I would use libtins with its IPV4Reassembler. Is there a way that I can do something similar in Rust?
Currently in Rust here is what I have written so far: a highly incomplete first-pass attempt (using crate pnet):
use pnet::packet::{
ethernet::{EtherTypes, EthernetPacket},
ip::IpNextHeaderProtocols,
ipv4::Ipv4Packet,
udp::UdpPacket,
Packet,
};
struct Ipv4Reassembler {
cap: pcap::Capture<pcap::Offline>,
}
impl Iterator for Ipv4Reassembler {
type Item = Vec<u8>;
fn next(&mut self) -> Option<Self::Item> {
let mut payload = Vec::<u8>::new();
while let Some(packet) = self.cap.next().ok() {
// todo: handle packets other than Ethernet packets
let ethernet = EthernetPacket::new(packet.data).unwrap();
match ethernet.get_ethertype() {
EtherTypes::Ipv4 => {
let ipv4_packet = Ipv4Packet::new(ethernet.payload()).unwrap();
// dbg!(&ipv4_packet);
// todo: discard incomplete packets
// todo: construct header for reassembled packet
// todo: check id, etc
let off: usize = 8 * ipv4_packet.get_fragment_offset() as usize;
let end = off + ipv4_packet.payload().len();
if payload.len() < end {
payload.resize(end, 0);
}
payload[off..end].clone_from_slice(ipv4_packet.payload());
if ipv4_packet.get_flags() & 1 == 0 {
return Some(payload);
}
}
_ => {}
}
}
None
}
}
fn main() {
let pcap_path = "os-992114000702.pcap";
let reass = Ipv4Reassembler {
cap: pcap::Capture::from_file(&pcap_path).unwrap(),
};
for payload in reass {
let udp_packet = UdpPacket::new(&payload).unwrap();
dbg!(&udp_packet);
dbg!(&udp_packet.payload().len());
}
}
In C++ here is the code I would use (using libtins):
#include <tins/ip_reassembler.h>
#include <tins/packet.h>
#include <tins/rawpdu.h>
#include <tins/sniffer.h>
#include <tins/tins.h>
#include <tins/udp.h>
#include <iostream>
#include <string>
void read_packets(const std::string &pcap_filename) {
Tins::IPv4Reassembler reassembler;
Tins::FileSniffer sniffer(pcap_filename);
while (Tins::Packet packet = sniffer.next_packet()) {
auto &pdu = *packet.pdu();
const Tins::Timestamp &timestamp = packet.timestamp();
if (reassembler.process(pdu) != Tins::IPv4Reassembler::FRAGMENTED) {
const Tins::UDP *udp = pdu.find_pdu<Tins::UDP>();
if (!udp) {
continue;
}
const Tins::RawPDU *raw = pdu.find_pdu<Tins::RawPDU>();
if (!raw) {
continue;
}
const Tins::RawPDU::payload_type &payload = raw->payload();
std::cout << "Packet: " << payload.size() << std::endl;
// do something with the reassembled packet here
}
}
}
int main() {
const std::string pcap_path = "os-992114000702.pcap";
read_packets(pcap_path);
}
g++ -O3 -o pcap pcap.cpp -ltins
It seems that one solution is to implement RFC815 but I am not sure how to do that in Rust. I have found:
this old pull request to smolltcp but it appears to have been abandoned.
Fuschia reassembly.rs but I have no idea how to use this outside of Fuschia.

AsyncTCP on ESP32 and Odd Heap/Socket Issues w/SOFTAP

I'm struggling with an issue where an ESP32 is running as a AP with AsyncTCP connecting multiple ESP32 clients. The AP receives some JSON data and replies with some JSON data. Without the handleData() function, the code runs 100% fine with no issues. Heap is static when no clients connect and issues only occur when clients start connecting.
Can anyone see anything with my code that could be causing heap corruption or other memory weirdness?
static void handleData(void* arg, AsyncClient* client, void *data, size_t len) {
int i = 0, j = 0;
char clientData[CLIENT_DATA_MAX];
char packetData[len];
char *packetBuf;
packetBuf = (char *)data;
clientData[0] = '\0';
for (i=0;i <= len;i++) {
packetData[j] = packetBuf[i]; //packetBuf[i];
if ((packetData[j] == '\n') || (i == len)) {
packetData[j] = '\0';
if ((j > 0) && (packetData[0] != '\n') && (packetData[0] != '\r')) {
// See sensorData() below...
parseData.function(packetData, clientData);
if (clientData != NULL) {
// TCP reply to client
if (client->space() > 32 && client->canSend()) {
client->write(clientData);
}
}
}
j = 0;
} else
j++;
}
}
void sensorData(void *data, void *retData) {
StaticJsonDocument<CLIENT_DATA_MAX> fields;
StaticJsonDocument<CLIENT_DATA_MAX> output;
char sensor[15] = "\0";
char MAC[18] = "\0";
char value[20] = "\0";
bool sendOK = false;
memcpy((char *)retData, "\0", 1);
DeserializationError error = deserializeJson(fields, (char *)data, CLIENT_DATA_MAX);
if (error) {
DEBUG_PRINTLN(F("deserializeJson() failed"));
return;
}
if (fields["type"])
strcpy(sensor, fields["type"]);
switch (sensor[0]) {
case 'C':
if (fields["value"])
strcpy(value, fields["value"]);
sendOK = true;
break;
case 'T': //DEBUG_PRINT(F("Temp "));
setExtTempSensor(fields["value"]);
sendOK = true;
break;
case 'N':
output["IT"] = intTempC; //Internal temp
output["B1"] = battLevels[0];
serializeJson(output, (char *)retData, CLIENT_DATA_MAX-1);
break;
}
if (sendOK) {
output["Resp"] = "Ok";
serializeJson(output, (char *)retData, CLIENT_DATA_MAX-1);
}
strcat((char *)retData, "\n");
}
static void handleNewClient(void* arg, AsyncClient* client) {
client->setRxTimeout(1000);
client->setAckTimeout(500);
client->onData(&handleData, NULL);
client->onError(&handleError, NULL);
client->onDisconnect(&handleDisconnect, NULL);
client->onTimeout(&handleTimeOut, NULL);
}
void startServer() {
server = new AsyncServer(WIFI_SERVER_PORT);
server->onClient(&handleNewClient, &server)
}
Using AsyncTCP on the ESP32 was having multiple issues. Heap issues, socket issues, assert issues, ACK timeouts, connection timeouts, etc. Swapping to AsyncUDP using the exact same code as shown above with romkey's changes, resolved all of my issues. (Just using romkey's fixes did not fix the errors I was having with AsyncTCP.) I don't believe the issue is with AsyncTCP but with ESP32 libraries.
Either you should declare packetData to be of length len + 1 or your for loop should iterate until i < len. Because the index starts at 0, packetData[len] is actually byte len + 1, so you'll overwrite something random when you store something in packetData[len] if the array is only len chars long.That something random may be the pointer stored in packetBuf, which could easily cause heap corruption.
You should always use strncpy() and never strcpy(). Likewise use strncat() rather than strcat(). Don't depend on having done the math correctly or on sizes not changing as your code evolves. strncpy() and strncat() will guard against overflows. You'll need to pass a length into sensorData() to do that, but sensorData() shouldn't be making assumptions about the available length of retData.
Your test
if (clientData != NULL) {
will never fail because clientData is the address of array and cannot change. I'm not sure what you're trying to test for here but this if will always succeed.
You can just write:
char sensor[15] = "";
you don't need to explicitly assign a string with a null byte in it.
And
memcpy((char *)retData, "\0", 1);
is equivalent to
((char *)retData)[0] = '\0';
What's the point of declaring retData to be void * in the arguments to sensorData()? Your code starts out with it being a char* before calling sensorData() and uses it as a char* inside sensorData(). void * is meant to be an escape hatch for passing around pointers without worrying about their type. You don't need that here and end up needing to extra casts back to char* because of it. Just declare the argument to be char* and don't worry about casting it again.
You didn't share the code that calls handleData() so there may well be issues outside of these functions.

Capturing berr-counter tx/rx from ip link show

I would like to be able to capture the berr-counter values in a shell script. I can view the values with:
ip -det link show can0 which gives:
2: can0: <NOARP,ECHO> mtu 16 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 1000
link/can promiscuity 0
can state STOPPED (berr-counter tx 144 rx 128) restart-ms 100
bitrate 125000 sample-point 0.866
tq 133 prop-seg 6 phase-seg1 6 phase-seg2 2 sjw 1
flexcan: tseg1 4..16 tseg2 2..8 sjw 1..4 brp 1..256 brp-inc 1
clock 30000000
I could just parse this output and capture the tx/rx berr-counter, but I would rather capture these values directly. So, I have been trying find where to access these values. I dug into https://github.com/shemminger/iproute2 's code and found where these values are being printed in ip/iplink_can.c in the function:
static void can_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
There is the code:
if (tb[IFLA_CAN_BERR_COUNTER]) {
struct can_berr_counter *bc =
RTA_DATA(tb[IFLA_CAN_BERR_COUNTER]);
fprintf(f, "(berr-counter tx %d rx %d) ", bc->txerr, bc->rxerr);
}
And at the bottom of the same file there is a struct:
struct link_util can_link_util = {
.id = "can",
.maxattr = IFLA_CAN_MAX,
.parse_opt = can_parse_opt,
.print_opt = can_print_opt,
.print_xstats = can_print_xstats,
.print_help = can_print_help,
};
But I can't find anywhere where can_print_opt, or can_link_util.print_opt are called, and I haven't found any success sifting through all of the struct rtattr in the repo.
I'm not sure where to go from here to get these values other than just grabbing them from the output of ip -det link show can0
Maybe a little bit late, but I was trying the same thing : access CAN interface state and error counters from within a userspace application, without calling ip and parsing output.
As you did, I explored iproute2's code, and then read some documentation about netlink for interacting with network devices. Mainly what you have to do is to send an RTM_GETLINK message to a netlink socket, then parse the response, that is a nested list of netlink attributes.
I found this very interesting starting point : http://iijean.blogspot.com/2010/03/howto-get-list-of-network-interfaces-in.html
In this blog the link to full code is broken, but it's available here : https://gist.github.com/cl4u2/5204374.
Note that instead of doing all this "manually", it is also possible to use libnetlink.
Based on this, I was able to write a test code - quick and dirty - that does what you want. You only need to determine my ifIndex_ variable, which is the integer index of your CAN network interface (can be determined by a SIOCGIFINDEX ioctl on your socketcan socket).
printf("Starting rtnetlink stats reading ...\n");
struct sockaddr_nl local;
struct {
struct nlmsghdr nlh;
struct ifinfomsg ifinfo;
} request;
struct sockaddr_nl kernel;
struct msghdr rtnl_msg;
struct iovec io;
pid_t pid = getpid();
qint64 rtnetlink_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
memset(&local, 0, sizeof(local));
local.nl_family = AF_NETLINK;
local.nl_pid = pid;
local.nl_groups = 0;
if (bind(rtnetlink_socket, (struct sockaddr *) &local, sizeof(local)) < 0) {
printf("Binding failed !\n");
return true;
}
printf("Binding successful.\n");
memset(&rtnl_msg, 0, sizeof(rtnl_msg));
memset(&kernel, 0, sizeof(kernel));
memset(&request, 0, sizeof(request));
kernel.nl_family = AF_NETLINK;
request.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
request.nlh.nlmsg_type = RTM_GETLINK;
request.nlh.nlmsg_flags = NLM_F_REQUEST; // NLM_F_ROOT|NLM_F_MATCH| were originally specified and return all interfaces.
request.nlh.nlmsg_pid = pid;
request.nlh.nlmsg_seq = 1; // Must be monotonically increasing, but we send only one.
// Interface is specified only with index.
request.ifinfo.ifi_family = AF_PACKET;
request.ifinfo.ifi_index = ifIndex_;
request.ifinfo.ifi_change = 0;
io.iov_base = &request;
io.iov_len = request.nlh.nlmsg_len;
rtnl_msg.msg_iov = &io;
rtnl_msg.msg_iovlen = 1;
rtnl_msg.msg_name = &kernel;
rtnl_msg.msg_namelen = sizeof(kernel);
if (sendmsg(rtnetlink_socket, &rtnl_msg, 0) < 0) {
printf("Sendmsg finished with an error.\n");
return true;
}
printf("Sendmsg finished successfully.\n");
// Reply reception
int end = 0;
int replyMaxSize = 8192;
char reply[replyMaxSize];
while (!end) {
int len;
struct nlmsghdr *msg_ptr;
struct msghdr rtnl_reply;
struct iovec io_reply;
memset(&io_reply, 0, sizeof(io_reply));
memset(&rtnl_reply, 0, sizeof(rtnl_reply));
io.iov_base = reply;
io.iov_len = replyMaxSize;
rtnl_reply.msg_iov = &io;
rtnl_reply.msg_iovlen = 1;
rtnl_reply.msg_name = &kernel;
rtnl_reply.msg_namelen = sizeof(kernel);
printf("Waiting for data ...\n");
len = recvmsg(rtnetlink_socket, &rtnl_reply, 0);
printf("Received data with length %d.\n", len);
if (len) {
for (msg_ptr = (struct nlmsghdr *) reply; NLMSG_OK(msg_ptr, len); msg_ptr = NLMSG_NEXT(msg_ptr, len)) {
switch(msg_ptr->nlmsg_type) {
case NLMSG_DONE:
end++;
printf("Received NLMSG_DONE end message.\n");
break;
case RTM_NEWLINK:
printf("Received RTM_NEWLINK message with multipart flag : %d.\n", msg_ptr->nlmsg_flags & NLM_F_MULTI);
if (!(msg_ptr->nlmsg_flags & NLM_F_MULTI)) { end++; }
struct ifinfomsg *iface;
struct rtattr *attribute;
struct rtattr *subAttr;
int msgLen, attrPayloadLen;
iface = (struct ifinfomsg*)NLMSG_DATA(msg_ptr);
msgLen = msg_ptr->nlmsg_len - NLMSG_LENGTH(sizeof(*iface));
for (attribute = IFLA_RTA(iface); RTA_OK(attribute, msgLen); attribute = RTA_NEXT(attribute, msgLen)) {
switch(attribute->rta_type) {
case IFLA_IFNAME:
printf("Interface %d name : %s\n", iface->ifi_index, (char *) RTA_DATA(attribute));
break;
case IFLA_LINKINFO:
attrPayloadLen = RTA_PAYLOAD(attribute);
printf("Found link information. Parsing %d payload bytes ...\n", attrPayloadLen);
for (subAttr = (struct rtattr *)RTA_DATA(attribute); RTA_OK(subAttr, attrPayloadLen); subAttr = RTA_NEXT(subAttr, attrPayloadLen)) {
struct rtattr *subSubAttr;
int subAttrPayloadLen = RTA_PAYLOAD(subAttr);
printf("Found sub-attribute. Type : %d, length : %d.\n", subAttr->rta_type, subAttr->rta_len);
switch (subAttr->rta_type) {
case IFLA_INFO_KIND:
printf("\t Link kind : %s.\n", (char *) RTA_DATA(subAttr));
break;
case IFLA_INFO_DATA:
printf("Found link information data. Parsing %d payload bytes ...\n", RTA_PAYLOAD(subAttr));
for (subSubAttr = (struct rtattr *)RTA_DATA(subAttr); RTA_OK(subSubAttr, subAttrPayloadLen); subSubAttr = RTA_NEXT(subSubAttr, subAttrPayloadLen)) {
printf("Found sub-sub-attribute. Type : %d, length : %d.\n", subSubAttr->rta_type, subSubAttr->rta_len);
switch (subSubAttr->rta_type) {
case IFLA_CAN_STATE:
{
int state = *(int *)RTA_DATA(subSubAttr);
printf("State : %d\n", state);
break;
}
case IFLA_CAN_BERR_COUNTER:
{
struct can_berr_counter *bc = (struct can_berr_counter *)RTA_DATA(subSubAttr);
printf("Error counters : (berr-counter tx %d rx %d)\n", bc->txerr, bc->rxerr);
break;
}
default:
break;
}
}
break;
case IFLA_INFO_XSTATS:
default:
break;
}
}
break;
default:
printf("New attribute. Type : %d, length : %d.\n", attribute->rta_type, attribute->rta_len);
break;
}
}
printf("Finished parsing attributes.\n");
break;
case NLMSG_ERROR:
printf("Could not read link details for interface %d.\n", ifIndex_);
end++;
break;
default:
printf("Received unexpected message ID : %d.\n", msg_ptr->nlmsg_type);
break;
}
printf("Finished parsing message.\n");
}
printf("Finished parsing data.\n");
}
}
close(rtnetlink_socket);
return true;

Using BPF/XDP with Mininet

I've created the following network topology in Mininet to run an algorithm I've implemented using the Linux kernel eXpress Data Path.
The objective is to sample packets on the incoming link s1-eth1 on Switch 1 using XDP and store metadata in a shared BPF map. The execution is successful when run on multiple VMs (instead of using Mininet to create an emulation).
However, when using XDP on Mininet (to listen on the emulated network interface), packets aren't recorded.
To further diagnose the cause, I ran Wireshark to listen on the s1-eth1 interface, which does record packets hitting the interface, but for some reason these same packets aren't being registered through the XDP pipeline.
#define KBUILD_MODNAME "foo"
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
//BPF_TABLE("percpu_array", uint32_t, long, dropcnt, 256);
BPF_HASH(proto_map, uint32_t, uint32_t, 256);
//Packet Counter to keep track of number of packets flowing through XDP
BPF_ARRAY(pkt_count, uint64_t, 1);
//Map to keep track of the current EPOCH SIZE
BPF_ARRAY(epoch_size_map, uint64_t, 1);
static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
__be32 *src, __be32 *dest)
{
struct iphdr *iph = data + nh_off;
if (iph + 1 > data_end)
return 0;
*src = iph->saddr;
*dest = iph->daddr;
return iph->protocol;
}
static inline int bitXor(int* x, int* y)
{
int a = *x & *y;
int b = ~*x & ~*y;
int z = ~a & ~b;
return z;
}
int xdp_dsa(struct CTXTYPE *ctx) {
void* data_end = (void*)(long)ctx->data_end;
void* data = (void*)(long)ctx->data;
struct ethhdr *eth = data;
// drop packets
int rc = RETURNCODE; // let pass XDP_PASS or redirect to tx via XDP_TX
uint32_t *value;
uint32_t *counter_value;
uint32_t *epoch_size;
uint16_t h_proto;
uint64_t nh_off = 0;
uint32_t ipproto;
uint64_t magic_value = 12345678;
uint32_t packet = 0;
__be32 src_ip = 0, dest_ip = 0;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
pkt_count.increment(packet);
return rc;
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_IP))
ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
/*
else if (h_proto == htons(ETH_P_IPV6))
index = parse_ipv6(data, nh_off, data_end);
*/
else
ipproto = 0; //i.e. unknown protocol
/*XOR the srcIP, destIP, and ipproto to encode, then hash*/
int xor_src_dest = bitXor(&src_ip, &dest_ip);
int xor_srcdst_ipproto = bitXor(&xor_src_dest, &ipproto);
uint32_t zero = 0;
//Predecided initial epoch size
uint32_t init_epoch_size = 10;
//Variable to store the current epoch size (to check end of epoch)
uint32_t cur_epoch_size;
//Lookup epoch size from shared map (to check whether intialized else read)
epoch_size = epoch_size_map.lookup(&zero);
// Start condition (epoch size map is initialized with zero), then set to initial epoch size
// Else read the current epoch size into a variable
if(epoch_size)
{
if(*epoch_size == 0)
{
*epoch_size = init_epoch_size;
}
else
{
cur_epoch_size = *epoch_size;
}
}
counter_value = pkt_count.lookup(&packet);
if (counter_value)
{
if (*counter_value < cur_epoch_size)
{
value = proto_map.lookup_or_init(&xor_srcdst_ipproto, &zero);
if (value)
{
pkt_count.increment(packet);
*value += 1;
}
}
else if (*counter_value == cur_epoch_size)
{
pkt_count.update(&packet, &magic_value);
}
else if(*counter_value == magic_value)
{
return rc;
}
}
return rc;
}
Any ideas?

dev_add_pack hook and memory leak

I have to write lkm, which would resend all incoming packets. Yep, I know about xt_TEE, but have to write it on my own. I've looked through some examples: http://www.phrack.org/archives/55/p55_0x0c_Building%20Into%20The%20Linux%20Network%20Layer_by_lifeline%20&%20kossak.txt (it's rather old) and http://www.xakep.ru/post/20794/default.asp?print=true (packet sniffer).
Then I've wrote my code:
//INCLUDES//////////////////////////////////////////////////////////////
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/module.h>
#include <linux/kernel.h>
//ABOUT/////////////////////////////////////////////////////////////////
MODULE_AUTHOR("");
MODULE_DESCRIPTION("");
MODULE_LICENSE("GPL");
MODULE_VERSION("0.0.1");
//DEFINES///////////////////////////////////////////////////////////////
//SYSCALLS//////////////////////////////////////////////////////////////
//PROTOTYPES/////////////////////////////////////////////////////////////
int new_hook_func(struct sk_buff *skb, struct device *dv, struct packet_type *pt);
void test();
//GLOBALS///////////////////////////////////////////////////////////////
static struct packet_type my_packet_type;
static char *dev = "eth0";
struct net_dev *d;
//INIT//////////////////////////////////////////////////////////////////
static int __init init(void)
{
printk(KERN_ALERT "module init\n");
d = dev_get_by_name(&init_net, dev);
my_packet_type.type = htons(ETH_P_ALL);
my_packet_type.func = new_hook_func;
my_packet_type.dev = d;
dev_add_pack(&my_packet_type);
return 0;
}
//EXIT//////////////////////////////////////////////////////////////////
static void __exit exit(void)
{
dev_remove_pack(&my_packet_type);
printk(KERN_ALERT "module exit");
}
////////////////////////////////////////////////////////////////////////
module_init(init);
module_exit(exit);
////////////////////////////////////////////////////////////////////////
//CORE//////////////////////////////////////////////////////////////////
int new_hook_func(struct sk_buff *skb, struct device *dv, struct packet_type *pt)
{
struct iphdr *ip;
ip = (struct iphdr*)skb_network_header(skb);
if(skb->pkt_type != PACKET_OUTGOING)
{
if(ip->version == 4 && ip->protocol == IPPROTO_ICMP)
{
struct sk_buff *my_skb = 0;
//copy incoming skb
my_skb = skb_copy_expand(skb, 16, 16, GFP_ATOMIC);
//get eth header
struct ethhdr *eth = eth_hdr(my_skb);
//push ethernet layer to skb
skb_push(my_skb, ETH_HLEN);
//set packet type to outgoing
skb->pkt_type = PACKET_OUTGOING;
//send skb struct
dev_queue_xmit(my_skb);
//drop all incoming packets
// kfree_skb(my_skb);
// kfree_skb(skb);
}
}
return NET_RX_DROP;
}
This code is supposed to resend every icmp packet recieved.
So, I've faced three problems:
1) Memory leak. Some how it leaks. I tried to comment whole hook func and there was only return, but memory was still leaking.
2) Return codes don't work. It's no matter what I return(NET_RX_DROP/NET_RX_ACCEPT/NF_DROP/NF_ACCEPT/1/0) is still recieves packets and answers to it.
3) Problems with mac-layer. As you may see in my code, I copy skb struct with expansion and have to push 14 bytes of mac-layer there. Otherwise packet will be sent without any mac bytes.
I apologize for my poor english and kindly ask for help.

Resources