diff --git a/configure.ac b/configure.ac
index ca964d9039a0..3151bff17a92 100644
--- a/configure.ac
+++ b/configure.ac
@@ -741,6 +741,33 @@
         exit 1
     fi
 
+    LIBHWLOC=""
+    AC_ARG_ENABLE(hwloc,
+            AS_HELP_STRING([--enable-hwloc], [Enable hwloc support [default=no]]),
+                        [enable_hwloc=$enableval],[enable_hwloc=no])
+    AS_IF([test "x$enable_hwloc" = "xyes"], [
+        PKG_CHECK_MODULES([HWLOC], [hwloc >= 2.0.0],
+            [AC_DEFINE([HAVE_HWLOC], [1], [Define if hwloc library is present and meets version requirements])],
+            LIBHWLOC="no")
+
+        if test "$LIBHWLOC" = "no"; then
+            echo
+            echo "   ERROR!  hwloc library version > 2.0.0 not found, go get it"
+            echo "   from https://www.open-mpi.org/projects/hwloc/ "
+            echo "   or your distribution:"
+            echo
+            echo "   Ubuntu: apt-get install hwloc libhwloc-dev"
+            echo "   Fedora: dnf install hwloc hwloc-devel"
+            echo "   CentOS/RHEL: yum install hwloc hwloc-devel"
+            echo
+            exit 1
+        else
+            CFLAGS="${CFLAGS} ${HWLOC_CFLAGS}"
+            LDFLAGS="${LDFLAGS} ${HWLOC_LIBS}"
+            enable_hwloc="yes"
+        fi
+    ])
+
   # libpthread
     AC_ARG_WITH(libpthread_includes,
             [  --with-libpthread-includes=DIR  libpthread include directory],
@@ -2561,6 +2588,7 @@ SURICATA_BUILD_CONF="Suricata Configuration:
   JA4 support:                             ${enable_ja4}
   Non-bundled htp:                         ${enable_non_bundled_htp}
   Hyperscan support:                       ${enable_hyperscan}
+  Hwloc support:                           ${enable_hwloc}
   Libnet support:                          ${enable_libnet}
   liblz4 support:                          ${enable_liblz4}
   Landlock support:                        ${enable_landlock}
diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst
index 33b3c5528fed..17a68d14067e 100644
--- a/doc/userguide/configuration/suricata-yaml.rst
+++ b/doc/userguide/configuration/suricata-yaml.rst
@@ -917,6 +917,7 @@ per available CPU/CPU core.
 
     threading:
       set-cpu-affinity: yes
+      autopin: no
       cpu-affinity:
         management-cpu-set:
           cpu: [ 0 ]  # include only these cpus in affinity settings
@@ -933,6 +934,13 @@ per available CPU/CPU core.
             medium: [ "1-2" ]
             high: [ 3 ]
             default: "medium"
+          interface-specific-cpu-set:
+            - interface: "enp4s0f0" # 0000:3b:00.0 # net_bonding0 # ens1f0
+              cpu: [ 1,3,5,7,9 ]
+              mode: "exclusive"
+              prio:
+                high: [ "all" ]
+                default: "medium"
         verdict-cpu-set:
           cpu: [ 0 ]
           prio:
@@ -969,6 +977,80 @@ Runmode Workers::
 	worker-cpu-set - used for receive,streamtcp,decode,detect,output(logging),respond/reject, verdict
 
 
+Interface-specific CPU affinity settings
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Using the new configuration format introduced in Suricata 8.0 it is possible
+to set CPU affinity settings per interface. This can be useful
+when you have multiple interfaces and you want to dedicate specific CPU cores
+to specific interfaces. This can be useful for example when Suricata runs on
+multiple NUMA nodes and reads from interfaces on each NUMA node.
+
+Interface-specific affinity settings can be configured for the worker-cpu-set
+and the receive-cpu-set (only used in autofp mode).
+This feature is available for capture modes which work with interfaces
+(af-packet, dpdk, etc.). The value of the interface key can be the kernel
+interface name (e.g. eth0 for af-packet), the PCI address of the interface
+(e.g. 0000:3b:00.0 for DPDK capture mode), or the name of the virtual device
+interface (e.g. net_bonding0 for DPDK capture mode).
+The interface names needs to be unique and be located under the capture mode
+configuration.
+
+The interface-specific settings will override the global settings for the
+worker-cpu-set and receive-cpu-set. The CPUs do not need to be contained in
+the parent node settings. If the interface-specific settings are not defined,
+the global settings will be used.
+
+::
+
+  threading:
+    set-cpu-affinity: yes
+    cpu-affinity:
+      worker-cpu-set:
+        interface-specific-cpu-set:
+          - interface: "eth0" # 0000:3b:00.0 # net_bonding0
+            cpu: [ 1,3,5,7,9 ]
+            mode: "exclusive"
+            prio:
+              high: [ "all" ]
+              default: "medium"
+
+Automatic NUMA-aware CPU core pinning
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When Suricata is running on a system with multiple NUMA nodes, it is possible
+to automatically use CPUs from the same NUMA node as the network capture
+interface.
+CPU cores on the same NUMA nodes as the network capture interface have
+reduced memory access latency and increased the performance of Suricata.
+This is enabled by setting the `autopin` option to `yes` in the threading
+section. This option is available for worker-cpu-set and receive-cpu-set.
+
+::
+
+  threading:
+    set-cpu-affinity: yes
+    autopin: yes
+    cpu-affinity:
+      worker-cpu-set:
+        cpu: [ "all" ]
+        mode: "exclusive"
+        prio:
+          high: [ "all" ]
+
+Consider 2 interfaces defined in the capture mode configuration, one on each
+NUMA node. The `autopin` option is enabled to automatically use CPUs from the
+same NUMA node as the interface. The worker-cpu-set is set to use all CPUs.
+When interface on the first NUMA node is used, the worker threads will be
+pinned to CPUs on the first NUMA node. When interface on the second NUMA node
+is used, the worker threads will be pinned to CPUs on the second NUMA node.
+If the number of CPU cores on a given NUMA node is exhausted then the worker
+threads will be pinned to CPUs on the other NUMA node.
+
+The option `threading.autopin` can be combined with the interface-specific CPU
+affinity settings.
+To use the `autopin` option, the system must have the `hwloc`
+dependency installed and pass `--enable-hwloc` to the configure script.
 
 IP Defrag
 ---------
diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst
index fa08c8d14280..a738224d990a 100644
--- a/doc/userguide/upgrade.rst
+++ b/doc/userguide/upgrade.rst
@@ -99,6 +99,20 @@ Major changes
     +     worker-cpu-set:
     +       cpu: [0, 1]
 
+  - The `threading.cpu-affinity` configuration has been extended to support
+    interface-specific CPU affinity settings. This allows you to specify
+    CPU affinity settings for each interface separately.
+    The new configuration format is described in :ref:`suricata-yaml-threading`.
+    The old configuration format does not support this extension and will be
+    removed in Suricata 9.0.
+  - The `threading.cpu-affinity` configuration now supports autopinning
+    worker or receive threads to the same NUMA node as the network capture
+    interface is located on.
+    This can be enabled by setting `threading.autopin` to `yes`.
+    See :ref:`suricata-yaml-threading` for more information.
+    This requires hwloc dependency to be installed and `--enable-hwloc`
+    to be passed to configure script.
+
 Removals
 ~~~~~~~~
 - The ssh keywords ``ssh.protoversion`` and ``ssh.softwareversion`` have been removed.
diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c
index 6bbe3c1f2ed6..32d8a1ff255b 100644
--- a/src/runmode-dpdk.c
+++ b/src/runmode-dpdk.c
@@ -368,12 +368,17 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
         SCReturnInt(-EINVAL);
     }
 
-    ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set");
+    bool wtaf_periface = true;
+    ThreadsAffinityType *wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", iconf->iface);
     if (wtaf == NULL) {
-        SCLogError("Specify worker-cpu-set list in the threading section");
-        SCReturnInt(-EINVAL);
+        wtaf_periface = false;
+        wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", NULL); // mandatory
+        if (wtaf == NULL) {
+            SCLogError("Specify worker-cpu-set list in the threading section");
+            SCReturnInt(-EINVAL);
+        }
     }
-    ThreadsAffinityType *mtaf = GetAffinityTypeFromName("management-cpu-set");
+    ThreadsAffinityType *mtaf = GetAffinityTypeForNameAndIface("management-cpu-set", NULL);
     if (mtaf == NULL) {
         SCLogError("Specify management-cpu-set list in the threading section");
         SCReturnInt(-EINVAL);
@@ -406,7 +411,12 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
     }
 
     if (strcmp(entry_str, "auto") == 0) {
-        iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCount();
+        if (wtaf_periface) {
+            iconf->threads = (uint16_t)sched_cpus;
+            SCLogConfig("%s: auto-assigned %u threads", iconf->iface, iconf->threads);
+            SCReturnInt(0);
+        }
+        iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCountWithoutAssignedThreading();
         if (iconf->threads == 0) {
             SCLogError("Not enough worker CPU cores with affinity were configured");
             SCReturnInt(-ERANGE);
@@ -416,7 +426,8 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
             iconf->threads++;
             remaining_auto_cpus--;
         } else if (remaining_auto_cpus == -1) {
-            remaining_auto_cpus = (int32_t)sched_cpus % LiveGetDeviceCount();
+            remaining_auto_cpus =
+                    (int32_t)sched_cpus % LiveGetDeviceCountWithoutAssignedThreading();
             if (remaining_auto_cpus > 0) {
                 iconf->threads++;
                 remaining_auto_cpus--;
@@ -844,23 +855,46 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface)
     SCReturnInt(0);
 }
 
-static int32_t ConfigValidateThreads(uint16_t iface_threads)
+static bool ConfigThreadsGenericIsValid(uint16_t iface_threads, ThreadsAffinityType *wtaf)
 {
     static uint32_t total_cpus = 0;
     total_cpus += iface_threads;
-    ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set");
     if (wtaf == NULL) {
         SCLogError("Specify worker-cpu-set list in the threading section");
-        return -1;
+        return false;
     }
     if (total_cpus > UtilAffinityGetAffinedCPUNum(wtaf)) {
-        SCLogError("Interfaces requested more cores than configured in the threading section "
-                   "(requested %d configured %d",
+        SCLogError("Interfaces requested more cores than configured in the worker-cpu-set "
+                   "threading section (requested %d configured %d",
                 total_cpus, UtilAffinityGetAffinedCPUNum(wtaf));
-        return -1;
+        return false;
     }
 
-    return 0;
+    return true;
+}
+
+static bool ConfigThreadsInterfaceIsValid(uint16_t iface_threads, ThreadsAffinityType *itaf)
+{
+    if (iface_threads > UtilAffinityGetAffinedCPUNum(itaf)) {
+        SCLogError("Interface requested more cores than configured in the interface-specific "
+                   "threading section (requested %d configured %d",
+                iface_threads, UtilAffinityGetAffinedCPUNum(itaf));
+        return false;
+    }
+
+    return true;
+}
+
+static bool ConfigIsThreadingValid(uint16_t iface_threads, const char *iface)
+{
+    ThreadsAffinityType *itaf = GetAffinityTypeForNameAndIface("worker-cpu-set", iface);
+    ThreadsAffinityType *wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", NULL);
+    if (itaf && !ConfigThreadsInterfaceIsValid(iface_threads, itaf)) {
+        return false;
+    } else if (itaf == NULL && !ConfigThreadsGenericIsValid(iface_threads, wtaf)) {
+        return false;
+    }
+    return true;
 }
 
 static DPDKIfaceConfig *ConfigParse(const char *iface)
@@ -873,7 +907,7 @@ static DPDKIfaceConfig *ConfigParse(const char *iface)
 
     ConfigInit(&iconf);
     retval = ConfigLoad(iconf, iface);
-    if (retval < 0 || ConfigValidateThreads(iconf->threads) != 0) {
+    if (retval < 0 || !ConfigIsThreadingValid(iconf->threads, iface)) {
         iconf->DerefFunc(iconf);
         SCReturnPtr(NULL, "void *");
     }
diff --git a/src/suricata.c b/src/suricata.c
index ee9dfc0b5b69..b0b1721ccab0 100644
--- a/src/suricata.c
+++ b/src/suricata.c
@@ -111,6 +111,7 @@
 #include "tmqh-packetpool.h"
 #include "tm-queuehandlers.h"
 
+#include "util-affinity.h"
 #include "util-byte.h"
 #include "util-conf.h"
 #include "util-coredump-config.h"
@@ -2298,6 +2299,9 @@ void PostRunDeinit(const int runmode, struct timeval *start_time)
     StreamTcpFreeConfig(STREAM_VERBOSE);
     DefragDestroy();
     HttpRangeContainersDestroy();
+#ifdef HAVE_HWLOC
+    TopologyDestroy();
+#endif /* HAVE_HWLOC */
 
     TmqResetQueues();
 #ifdef PROFILING
diff --git a/src/threadvars.h b/src/threadvars.h
index 6f339e9839d5..471714a254c4 100644
--- a/src/threadvars.h
+++ b/src/threadvars.h
@@ -136,6 +136,9 @@ typedef struct ThreadVars_ {
     struct FlowQueue_ *flow_queue;
     bool break_loop;
 
+    /** Interface-specific thread affinity */
+    char *iface_name;
+
     Storage storage[];
 } ThreadVars;
 
diff --git a/src/tm-threads.c b/src/tm-threads.c
index 07f9a9390df0..20ea29120e0d 100644
--- a/src/tm-threads.c
+++ b/src/tm-threads.c
@@ -865,8 +865,24 @@ TmEcode TmThreadSetupOptions(ThreadVars *tv)
         TmThreadSetPrio(tv);
     if (tv->thread_setup_flags & THREAD_SET_AFFTYPE) {
         ThreadsAffinityType *taf = &thread_affinity[tv->cpu_affinity];
+        bool use_iface_affinity = RunmodeIsAutofp() && tv->cpu_affinity == RECEIVE_CPU_SET &&
+                                  FindAffinityByInterface(taf, tv->iface_name) != NULL;
+        use_iface_affinity |= RunmodeIsWorkers() && tv->cpu_affinity == WORKER_CPU_SET &&
+                              FindAffinityByInterface(taf, tv->iface_name) != NULL;
+
+        if (use_iface_affinity) {
+            taf = FindAffinityByInterface(taf, tv->iface_name);
+        }
+
+        if (CPU_COUNT(&taf->cpu_set) == 0) {
+            if (!taf->nocpu_warned) {
+                SCLogWarning("No CPU affinity set for %s", AffinityGetYamlPath(taf));
+                taf->nocpu_warned = true;
+            }
+        }
+
         if (taf->mode_flag == EXCLUSIVE_AFFINITY) {
-            uint16_t cpu = AffinityGetNextCPU(taf);
+            uint16_t cpu = AffinityGetNextCPU(tv, taf);
             SetCPUAffinity(cpu);
             /* If CPU is in a set overwrite the default thread prio */
             if (CPU_ISSET(cpu, &taf->lowprio_cpu)) {
@@ -1600,6 +1616,10 @@ static void TmThreadFree(ThreadVars *tv)
         SCFree(tv->printable_name);
     }
 
+    if (tv->iface_name) {
+        SCFree(tv->iface_name);
+    }
+
     if (tv->stream_pq_local) {
         BUG_ON(tv->stream_pq_local->len);
         SCMutexDestroy(&tv->stream_pq_local->mutex_q);
diff --git a/src/util-affinity.c b/src/util-affinity.c
index ee365372702a..e78fd7e8af61 100644
--- a/src/util-affinity.c
+++ b/src/util-affinity.c
@@ -31,50 +31,169 @@
 #include "util-cpu.h"
 #include "util-byte.h"
 #include "util-debug.h"
+#include "util-dpdk.h"
 
 ThreadsAffinityType thread_affinity[MAX_CPU_SET] = {
     {
             .name = "receive-cpu-set",
             .mode_flag = EXCLUSIVE_AFFINITY,
             .prio = PRIO_MEDIUM,
-            .lcpu = 0,
+            .lcpu = { 0 },
     },
     {
             .name = "worker-cpu-set",
             .mode_flag = EXCLUSIVE_AFFINITY,
             .prio = PRIO_MEDIUM,
-            .lcpu = 0,
+            .lcpu = { 0 },
     },
     {
             .name = "verdict-cpu-set",
             .mode_flag = BALANCED_AFFINITY,
             .prio = PRIO_MEDIUM,
-            .lcpu = 0,
+            .lcpu = { 0 },
     },
     {
             .name = "management-cpu-set",
             .mode_flag = BALANCED_AFFINITY,
             .prio = PRIO_MEDIUM,
-            .lcpu = 0,
+            .lcpu = { 0 },
     },
 
 };
 
 int thread_affinity_init_done = 0;
 
+#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
+#ifdef HAVE_HWLOC
+static hwloc_topology_t topology = NULL;
+#endif /* HAVE_HWLOC */
+#endif /* OS_WIN32 and __OpenBSD__ */
+
+static ThreadsAffinityType *AllocAndInitAffinityType(
+        const char *name, const char *interface_name, ThreadsAffinityType *parent)
+{
+    ThreadsAffinityType *new_affinity = SCCalloc(1, sizeof(ThreadsAffinityType));
+    if (new_affinity == NULL) {
+        FatalError("Unable to allocate memory for new CPU affinity type");
+    }
+
+    new_affinity->name = SCStrdup(interface_name);
+    if (new_affinity->name == NULL) {
+        FatalError("Unable to allocate memory for new CPU affinity type name");
+    }
+    new_affinity->parent = parent;
+    new_affinity->mode_flag = EXCLUSIVE_AFFINITY;
+    new_affinity->prio = PRIO_MEDIUM;
+    for (int i = 0; i < MAX_NUMA_NODES; i++) {
+        new_affinity->lcpu[i] = 0;
+    }
+
+    if (parent != NULL) {
+        if (parent->nb_children == parent->nb_children_capacity) {
+            if (parent->nb_children_capacity == 0) {
+                parent->nb_children_capacity = 2;
+            } else {
+                parent->nb_children_capacity *= 2;
+            }
+            void *p = SCRealloc(
+                    parent->children, parent->nb_children_capacity * sizeof(ThreadsAffinityType *));
+            if (p == NULL) {
+                FatalError("Unable to reallocate memory for children CPU affinity types");
+            }
+            parent->children = p;
+        }
+        parent->children[parent->nb_children++] = new_affinity;
+    }
+
+    return new_affinity;
+}
+
+ThreadsAffinityType *FindAffinityByInterface(
+        ThreadsAffinityType *parent, const char *interface_name)
+{
+    for (uint32_t i = 0; i < parent->nb_children; i++) {
+        if (interface_name && strcmp(parent->children[i]->name, interface_name) == 0) {
+            return parent->children[i];
+        }
+    }
+    return NULL;
+}
+
+/**
+ * \brief Find affinity by name (*-cpu-set name) and an interface name.
+ * \param name the name of the affinity (e.g. worker-cpu-set, receive-cpu-set).
+ * The name is required and cannot be NULL.
+ * \param interface_name the name of the interface.
+ * If NULL, the affinity is looked up by name only.
+ *  \retval a pointer to the affinity or NULL if not found
+ */
+ThreadsAffinityType *GetAffinityTypeForNameAndIface(const char *name, const char *interface_name)
+{
+    int i;
+    ThreadsAffinityType *parent_affinity = NULL;
+
+    for (i = 0; i < MAX_CPU_SET; i++) {
+        if (strcmp(thread_affinity[i].name, name) == 0) {
+            parent_affinity = &thread_affinity[i];
+            break;
+        }
+    }
+
+    if (parent_affinity == NULL) {
+        SCLogError("CPU affinity with name \"%s\" not found", name);
+        return NULL;
+    }
+
+    if (interface_name != NULL) {
+        ThreadsAffinityType *child_affinity =
+                FindAffinityByInterface(parent_affinity, interface_name);
+        // found or not found, it is returned
+        return child_affinity;
+    }
+
+    return parent_affinity;
+}
+
 /**
- * \brief find affinity by its name
+ * \brief Finds affinity by its name and interface name.
+ * Interfaces are children of cpu-set names. If the queried interface is not
+ * found, then it is allocated, initialized and assigned to the queried cpu-set.
+ * \param name the name of the affinity (e.g. worker-cpu-set, receive-cpu-set).
+ * The name is required and cannot be NULL.
+ * \param interface_name the name of the interface.
+ * If NULL, the affinity is looked up by name only.
  * \retval a pointer to the affinity or NULL if not found
  */
-ThreadsAffinityType * GetAffinityTypeFromName(const char *name)
+ThreadsAffinityType *GetOrAllocAffinityTypeForIfaceOfName(
+        const char *name, const char *interface_name)
 {
     int i;
+    ThreadsAffinityType *parent_affinity = NULL;
+
     for (i = 0; i < MAX_CPU_SET; i++) {
-        if (!strcmp(thread_affinity[i].name, name)) {
-            return &thread_affinity[i];
+        if (strcmp(thread_affinity[i].name, name) == 0) {
+            parent_affinity = &thread_affinity[i];
+            break;
         }
     }
-    return NULL;
+
+    if (parent_affinity == NULL) {
+        SCLogError("CPU affinity with name \"%s\" not found", name);
+        return NULL;
+    }
+
+    if (interface_name != NULL) {
+        ThreadsAffinityType *child_affinity =
+                FindAffinityByInterface(parent_affinity, interface_name);
+        if (child_affinity != NULL) {
+            return child_affinity;
+        }
+
+        // If not found, allocate and initialize a new child affinity
+        return AllocAndInitAffinityType(name, interface_name, parent_affinity);
+    }
+
+    return parent_affinity;
 }
 
 #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
@@ -275,38 +394,114 @@ static void SetupAffinityThreads(ThreadsAffinityType *taf, ConfNode *affinity)
     }
 }
 
-static bool AllCPUsUsed(ThreadsAffinityType *taf)
+/**
+ * \brief Get the YAML path for the given affinity type.
+ * The path is built using the parent name (if available) and the affinity name.
+ * Do not free the returned string.
+ * \param taf the affinity type - if NULL, the path is built for the root node
+ * \return a string containing the YAML path, or NULL if the path is too long
+ */
+char *AffinityGetYamlPath(ThreadsAffinityType *taf)
 {
-    if (taf->lcpu < UtilCpuGetNumProcessorsOnline()) {
-        return false;
+    static char rootpath[] = "threading.cpu-affinity";
+    static char path[1024] = { 0 };
+    char subpath[256] = { 0 };
+
+    if (taf == NULL) {
+        return rootpath;
+    }
+
+    if (taf->parent != NULL) {
+        long r = snprintf(
+                subpath, sizeof(subpath), "%s.interface-specific-cpu-set.", taf->parent->name);
+        if (r < 0 || r >= (long)sizeof(subpath)) {
+            FatalError("Unable to build YAML path for CPU affinity %s.%s", taf->parent->name,
+                    taf->name);
+        }
+    } else {
+        subpath[0] = '\0';
     }
-    return true;
+
+    long r = snprintf(path, sizeof(path), "%s.%s%s", rootpath, subpath, taf->name);
+    if (r < 0 || r >= (long)sizeof(path)) {
+        FatalError("Unable to build YAML path for CPU affinity %s", taf->name);
+    }
+
+    return path;
 }
 
 static void ResetCPUs(ThreadsAffinityType *taf)
 {
-    taf->lcpu = 0;
+    for (int i = 0; i < MAX_NUMA_NODES; i++) {
+        taf->lcpu[i] = 0;
+    }
 }
 
-static uint16_t GetNextAvailableCPU(ThreadsAffinityType *taf)
+/**
+ * \brief Check if the set name corresponds to a worker CPU set.
+ */
+static bool IsWorkerCpuSet(const char *setname)
 {
-    uint16_t cpu = taf->lcpu;
-    int attempts = 0;
+    return (strcmp(setname, "worker-cpu-set") == 0);
+}
 
-    while (!CPU_ISSET(cpu, &taf->cpu_set) && attempts < 2) {
-        cpu = (cpu + 1) % UtilCpuGetNumProcessorsOnline();
-        if (cpu == 0)
-            attempts++;
+/**
+ * \brief Check if the set name corresponds to a receive CPU set.
+ */
+static bool IsReceiveCpuSet(const char *setname)
+{
+    return (strcmp(setname, "receive-cpu-set") == 0);
+}
+
+/**
+ * \brief Set up affinity configuration for a single interface.
+ */
+static void SetupSingleIfaceAffinity(ThreadsAffinityType *taf, ConfNode *iface_node)
+{
+    // offload to Setup function
+    ConfNode *child_node;
+    const char *interface_name = NULL;
+    TAILQ_FOREACH (child_node, &iface_node->head, next) {
+        if (strcmp(child_node->name, "interface") == 0) {
+            interface_name = child_node->val;
+            break;
+        }
+    }
+    if (interface_name == NULL) {
+        return;
+    }
+
+    ThreadsAffinityType *iface_taf =
+            GetOrAllocAffinityTypeForIfaceOfName(taf->name, interface_name);
+    if (iface_taf == NULL) {
+        FatalError("Unknown CPU affinity type for interface: %s", interface_name);
     }
 
-    taf->lcpu = cpu + 1;
+    SetupCpuSets(iface_taf, iface_node, interface_name);
+    SetupAffinityPriority(iface_taf, iface_node, interface_name);
+    SetupAffinityMode(iface_taf, iface_node);
+    SetupAffinityThreads(iface_taf, iface_node);
+}
 
-    if (attempts == 2) {
-        SCLogError(
-                "cpu_set does not contain available CPUs, CPU affinity configuration is invalid");
+/**
+ * \brief Set up per-interface affinity configurations.
+ */
+static void SetupPerIfaceAffinity(ThreadsAffinityType *taf, ConfNode *affinity)
+{
+    char if_af[] = "interface-specific-cpu-set";
+    ConfNode *per_iface_node = ConfNodeLookupChild(affinity, if_af);
+    if (per_iface_node == NULL) {
+        return;
     }
 
-    return cpu;
+    ConfNode *iface_node;
+    TAILQ_FOREACH (iface_node, &per_iface_node->head, next) {
+        if (strcmp(iface_node->val, "interface") == 0) {
+            SetupSingleIfaceAffinity(taf, iface_node);
+        } else {
+            SCLogWarning("Unknown node in %s: %s", if_af, iface_node->name);
+        }
+    }
 }
 
 /**
@@ -323,9 +518,8 @@ static bool AffinityConfigIsDeprecated(void)
         return threading_affinity_deprecated;
     }
 
-    ConfNode *root = ConfGetNode("threading.cpu-affinity");
+    ConfNode *root = ConfGetNode(AffinityGetYamlPath(NULL));
     if (root == NULL) {
-        threading_affinity_deprecated = false;
         initialized = true;
         return threading_affinity_deprecated;
     }
@@ -357,18 +551,17 @@ void AffinitySetupLoadFromConfig(void)
         AffinitySetupInit();
         thread_affinity_init_done = 1;
         if (AffinityConfigIsDeprecated()) {
-            SCLogWarning("CPU affinity configuration uses a deprecated structure and will become "
-                         "obsolete in a future major release (Suricata 9.0). Please update your "
-                         "threading.cpu-affinity to the new format. "
-                         "See notes in %s/upgrade.html#upgrading-7-0-to-8-0",
-                    GetDocURL());
+            SCLogWarning("CPU affinity configuration uses a deprecated structure and will not be "
+                         "supported in a future major release (Suricata 9.0). Please update your "
+                         "%s to the new format. See notes in %s/upgrade.html#upgrading-7-0-to-8-0",
+                    AffinityGetYamlPath(NULL), GetDocURL());
         }
     }
 
-    SCLogDebug("Loading threading.cpu-affinity from config");
-    ConfNode *root = ConfGetNode("threading.cpu-affinity");
+    SCLogDebug("Loading %s from config", AffinityGetYamlPath(NULL));
+    ConfNode *root = ConfGetNode(AffinityGetYamlPath(NULL));
     if (root == NULL) {
-        SCLogInfo("Cannot find threading.cpu-affinity node in config");
+        SCLogInfo("Cannot find %s node in config", AffinityGetYamlPath(NULL));
         return;
     }
 
@@ -380,7 +573,7 @@ void AffinitySetupLoadFromConfig(void)
             continue;
         }
 
-        ThreadsAffinityType *taf = GetAffinityTypeFromName(setname);
+        ThreadsAffinityType *taf = GetOrAllocAffinityTypeForIfaceOfName(setname, NULL);
         if (taf == NULL) {
             FatalError("Unknown CPU affinity type: %s", setname);
         }
@@ -393,25 +586,372 @@ void AffinitySetupLoadFromConfig(void)
         SetupAffinityPriority(taf, aff_query_node, setname);
         SetupAffinityMode(taf, aff_query_node);
         SetupAffinityThreads(taf, aff_query_node);
+
+        if (!AffinityConfigIsDeprecated() &&
+                (IsWorkerCpuSet(setname) || IsReceiveCpuSet(setname))) {
+            SetupPerIfaceAffinity(taf, affinity);
+        }
     }
 #endif /* OS_WIN32 and __OpenBSD__ */
 }
 
+#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
+#ifdef HAVE_HWLOC
+static int HwLocDeviceNumaGet(hwloc_topology_t topo, hwloc_obj_t obj)
+{
+#if HWLOC_VERSION_MAJOR >= 2 && HWLOC_VERSION_MINOR >= 5
+    hwloc_obj_t nodes[MAX_NUMA_NODES];
+    unsigned num_nodes = MAX_NUMA_NODES;
+    struct hwloc_location location;
+
+    location.type = HWLOC_LOCATION_TYPE_OBJECT;
+    location.location.object = obj;
+
+    int result = hwloc_get_local_numanode_objs(topo, &location, &num_nodes, nodes, 0);
+    if (result == 0 && num_nodes > 0 && num_nodes <= MAX_NUMA_NODES) {
+        return nodes[0]->logical_index;
+    }
+    return -1;
+#endif /* HWLOC_VERSION_MAJOR >= 2 && HWLOC_VERSION_MINOR >= 5 */
+
+    hwloc_obj_t non_io_ancestor = hwloc_get_non_io_ancestor_obj(topo, obj);
+    if (non_io_ancestor == NULL) {
+        return -1;
+    }
+
+    // Iterate over NUMA nodes and check their nodeset
+    hwloc_obj_t numa_node = NULL;
+    while ((numa_node = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) {
+        if (hwloc_bitmap_isset(non_io_ancestor->nodeset, numa_node->os_index)) {
+            return numa_node->logical_index;
+        }
+    }
+
+    return -1;
+}
+
+static hwloc_obj_t HwLocDeviceGetByKernelName(hwloc_topology_t topo, const char *interface_name)
+{
+    hwloc_obj_t obj = NULL;
+
+    while ((obj = hwloc_get_next_osdev(topo, obj)) != NULL) {
+        if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK &&
+                strcmp(obj->name, interface_name) == 0) {
+            hwloc_obj_t parent = obj->parent;
+            while (parent) {
+                if (parent->type == HWLOC_OBJ_PCI_DEVICE) {
+                    return parent;
+                }
+                parent = parent->parent;
+            }
+        }
+    }
+    return NULL;
+}
+
+// Static function to deparse PCIe interface string name to individual components
 /**
- * \brief Return next cpu to use for a given thread family
- * \retval the cpu to used given by its id
+ * \brief Parse PCIe address string to individual components
+ * \param[in] pcie_address PCIe address string
+ * \param[out] domain Domain component
+ * \param[out] bus Bus component
+ * \param[out] device Device component
+ * \param[out] function Function component
  */
-uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf)
+static int PcieAddressToComponents(const char *pcie_address, unsigned int *domain,
+        unsigned int *bus, unsigned int *device, unsigned int *function)
 {
-    uint16_t ncpu = 0;
-#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
+    // Handle both full and short PCIe address formats
+    if (sscanf(pcie_address, "%x:%x:%x.%x", domain, bus, device, function) != 4) {
+        if (sscanf(pcie_address, "%x:%x.%x", bus, device, function) != 3) {
+            return -1;
+        }
+        *domain = 0; // Default domain to 0 if not provided
+    }
+    return 0;
+}
+
+// Function to convert PCIe address to hwloc object
+static hwloc_obj_t HwLocDeviceGetByPcie(hwloc_topology_t topo, const char *pcie_address)
+{
+    hwloc_obj_t obj = NULL;
+    unsigned int domain, bus, device, function;
+    int r = PcieAddressToComponents(pcie_address, &domain, &bus, &device, &function);
+    if (r == 0) {
+        while ((obj = hwloc_get_next_pcidev(topo, obj)) != NULL) {
+            if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus &&
+                    obj->attr->pcidev.dev == device && obj->attr->pcidev.func == function) {
+                return obj;
+            }
+        }
+    }
+    return NULL;
+}
+
+static void HwlocObjectDump(hwloc_obj_t obj, const char *iface_name)
+{
+    if (!obj) {
+        SCLogDebug("No object found for the given PCIe address.\n");
+        return;
+    }
+
+    static char pcie_address[32];
+    snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj->attr->pcidev.domain,
+            obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func);
+    SCLogDebug("Interface (%s / %s) has NUMA ID %d", iface_name, pcie_address,
+            HwLocDeviceNumaGet(topology, obj));
+
+    SCLogDebug("Object type: %s\n", hwloc_obj_type_string(obj->type));
+    SCLogDebug("Logical index: %u\n", obj->logical_index);
+    SCLogDebug("Depth: %u\n", obj->depth);
+    SCLogDebug("Attributes:\n");
+    if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
+        SCLogDebug("  Domain: %04x\n", obj->attr->pcidev.domain);
+        SCLogDebug("  Bus: %02x\n", obj->attr->pcidev.bus);
+        SCLogDebug("  Device: %02x\n", obj->attr->pcidev.dev);
+        SCLogDebug("  Function: %01x\n", obj->attr->pcidev.func);
+        SCLogDebug("  Class ID: %04x\n", obj->attr->pcidev.class_id);
+        SCLogDebug("  Vendor ID: %04x\n", obj->attr->pcidev.vendor_id);
+        SCLogDebug("  Device ID: %04x\n", obj->attr->pcidev.device_id);
+        SCLogDebug("  Subvendor ID: %04x\n", obj->attr->pcidev.subvendor_id);
+        SCLogDebug("  Subdevice ID: %04x\n", obj->attr->pcidev.subdevice_id);
+        SCLogDebug("  Revision: %02x\n", obj->attr->pcidev.revision);
+        SCLogDebug("  Link speed: %f GB/s\n", obj->attr->pcidev.linkspeed);
+    } else {
+        SCLogDebug("  No PCI device attributes available.\n");
+    }
+}
+
+static bool TopologyShouldAutopin(ThreadVars *tv, ThreadsAffinityType *taf)
+{
+    bool cond;
     SCMutexLock(&taf->taf_mutex);
-    ncpu = GetNextAvailableCPU(taf);
+    cond = tv->type == TVT_PPT && tv->iface_name &&
+           (strcmp(tv->iface_name, taf->name) == 0 ||
+                   (strcmp("worker-cpu-set", taf->name) == 0 && RunmodeIsWorkers()) ||
+                   (strcmp("receive-cpu-set", taf->name) == 0 && RunmodeIsAutofp()));
+    SCMutexUnlock(&taf->taf_mutex);
+    return cond;
+}
+
+static void TopologyInitialize(void)
+{
+    if (topology == NULL) {
+        if (hwloc_topology_init(&topology) == -1) {
+            FatalError("Failed to initialize topology");
+        }
+
+        if (hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) == -1 ||
+                hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL) == -1 ||
+                hwloc_topology_load(topology) == -1) {
+            FatalError("Failed to set/load topology");
+        }
+    }
+}
+
+void TopologyDestroy()
+{
+    if (topology != NULL) {
+        hwloc_topology_destroy(topology);
+        topology = NULL;
+    }
+}
+
+static int InterfaceGetNumaNode(ThreadVars *tv)
+{
+    hwloc_obj_t if_obj = HwLocDeviceGetByKernelName(topology, tv->iface_name);
+    if (if_obj == NULL) {
+        if_obj = HwLocDeviceGetByPcie(topology, tv->iface_name);
+    }
+
+    if (if_obj != NULL && SCLogGetLogLevel() == SC_LOG_DEBUG) {
+        HwlocObjectDump(if_obj, tv->iface_name);
+    }
+
+    int32_t numa_id = HwLocDeviceNumaGet(topology, if_obj);
+    if (numa_id < 0 && SCRunmodeGet() == RUNMODE_DPDK) {
+        // DPDK fallback for e.g. net_bonding (vdev) PMDs
+        int32_t r = DPDKDeviceNameSetSocketID(tv->iface_name, &numa_id);
+        if (r < 0) {
+            numa_id = -1;
+        }
+    }
+
+    if (numa_id < 0) {
+        SCLogDebug("Unable to find NUMA node for interface %s", tv->iface_name);
+    }
+
+    return numa_id;
+}
+#endif /* HAVE_HWLOC */
+
+static bool CPUIsFromNuma(uint16_t ncpu, uint16_t numa)
+{
+#ifdef HAVE_HWLOC
+    int core_id = ncpu;
+    int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+    hwloc_obj_t numa_node = NULL;
+
+    while ((numa_node = hwloc_get_next_obj_by_depth(topology, depth, numa_node)) != NULL) {
+        hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
+        hwloc_bitmap_copy(cpuset, numa_node->cpuset);
+
+        if (hwloc_bitmap_isset(cpuset, core_id)) {
+            SCLogDebug("Core %d - NUMA %d", core_id, numa_node->logical_index);
+            hwloc_bitmap_free(cpuset);
+            break;
+        }
+        hwloc_bitmap_free(cpuset);
+    }
+
+    if (numa == numa_node->logical_index) {
+        return true;
+    }
+
+#endif /* HAVE_HWLOC */
+
+    return false;
+}
+
+static int16_t FindCPUInNumaNode(int numa_node, ThreadsAffinityType *taf)
+{
+    if (numa_node < 0) {
+        return -1;
+    }
+
+    if (taf->lcpu[numa_node] >= UtilCpuGetNumProcessorsOnline()) {
+        return -1;
+    }
+
+    uint16_t cpu = taf->lcpu[numa_node];
+    while (cpu < UtilCpuGetNumProcessorsOnline() &&
+            (!CPU_ISSET(cpu, &taf->cpu_set) || !CPUIsFromNuma(cpu, (uint16_t)numa_node))) {
+        cpu++;
+    }
+
+    taf->lcpu[numa_node] =
+            (CPU_ISSET(cpu, &taf->cpu_set) && CPUIsFromNuma(cpu, (uint16_t)numa_node))
+                    ? cpu + 1
+                    : UtilCpuGetNumProcessorsOnline();
+    return (CPU_ISSET(cpu, &taf->cpu_set) && CPUIsFromNuma(cpu, (uint16_t)numa_node)) ? (int16_t)cpu
+                                                                                      : -1;
+}
+
+static int16_t CPUSelectFromNuma(int iface_numa, ThreadsAffinityType *taf)
+{
+    if (iface_numa != -1) {
+        return FindCPUInNumaNode(iface_numa, taf);
+    }
+    return -1;
+}
+
+static int16_t CPUSelectAlternative(int iface_numa, ThreadsAffinityType *taf)
+{
+    for (int nid = 0; nid < MAX_NUMA_NODES; nid++) {
+        if (iface_numa == nid) {
+            continue;
+        }
+
+        int16_t cpu = FindCPUInNumaNode(nid, taf);
+        if (cpu != -1) {
+            SCLogPerf("CPU %d from NUMA %d assigned to a network interface located on NUMA %d", cpu,
+                    nid, iface_numa);
+            return cpu;
+        }
+    }
+    return -1;
+}
+
+/**
+ * \brief Select the next available CPU for the given affinity type.
+ * taf->cpu_set is a bit array where each bit represents a CPU core.
+ * The function iterates over the bit array and returns the first available CPU.
+ * If last used CPU core index is higher than the indexes of available cores,
+ * we reach the end of the array, and we reset the CPU selection.
+ * On the second reset attempt, the function bails out with a default value.
+ * The second attempt should only happen with an empty CPU set.
+ */
+static uint16_t CPUSelectDefault(ThreadsAffinityType *taf)
+{
+    uint16_t cpu = taf->lcpu[0];
+    int attempts = 0;
+    while (!CPU_ISSET(cpu, &taf->cpu_set) && attempts < 2) {
+        cpu = (cpu + 1) % UtilCpuGetNumProcessorsOnline();
+        if (cpu == 0) {
+            attempts++;
+        }
+    }
 
-    if (AllCPUsUsed(taf)) {
-        ResetCPUs(taf);
+    taf->lcpu[0] = cpu + 1;
+    return cpu;
+}
+
+static uint16_t CPUSelectFromNumaOrDefault(int iface_numa, ThreadsAffinityType *taf)
+{
+    uint16_t attempts = 0;
+    int16_t cpu = -1;
+    while (attempts < 2) {
+        cpu = CPUSelectFromNuma(iface_numa, taf);
+        if (cpu == -1) {
+            cpu = CPUSelectAlternative(iface_numa, taf);
+            if (cpu == -1) {
+                // All CPUs from all NUMAs are used at this point
+                ResetCPUs(taf);
+                attempts++;
+            }
+        }
+
+        if (cpu >= 0) {
+            return (uint16_t)cpu;
+        }
+    }
+    return CPUSelectDefault(taf);
+}
+
+static uint16_t GetNextAvailableCPU(int iface_numa, ThreadsAffinityType *taf)
+{
+    if (iface_numa < 0) {
+        return CPUSelectDefault(taf);
     }
 
+    return CPUSelectFromNumaOrDefault(iface_numa, taf);
+}
+
+static bool AutopinEnabled(void)
+{
+    int autopin = 0;
+    if (ConfGetBool("threading.autopin", &autopin) != 1) {
+        return false;
+    }
+    return (bool)autopin;
+}
+
+#endif /* OS_WIN32 and __OpenBSD__ */
+
+uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf)
+{
+    uint16_t ncpu = 0;
+#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun
+    int iface_numa = -1;
+    if (AutopinEnabled()) {
+#ifdef HAVE_HWLOC
+        if (TopologyShouldAutopin(tv, taf)) {
+            TopologyInitialize();
+            iface_numa = InterfaceGetNumaNode(tv);
+        }
+#else
+        static bool printed = false;
+        if (!printed) {
+            printed = true;
+            SCLogWarning(
+                    "threading.autopin option is enabled but hwloc support is not compiled in. "
+                    "Make sure to pass --enable-nfqueue to configure when building Suricata.");
+        }
+#endif /* HAVE_HWLOC */
+    }
+
+    SCMutexLock(&taf->taf_mutex);
+    ncpu = GetNextAvailableCPU(iface_numa, taf);
     SCLogDebug("Setting affinity on CPU %d", ncpu);
     SCMutexUnlock(&taf->taf_mutex);
 #endif /* OS_WIN32 and __OpenBSD__ */
diff --git a/src/util-affinity.h b/src/util-affinity.h
index 2fa4509ffa2c..ceca26dfeff9 100644
--- a/src/util-affinity.h
+++ b/src/util-affinity.h
@@ -26,6 +26,11 @@
 #include "suricata-common.h"
 #include "conf.h"
 #include "threads.h"
+#include "threadvars.h"
+
+#ifdef HAVE_HWLOC
+#include <hwloc.h>
+#endif /* HAVE_HWLOC */
 
 #if defined OS_FREEBSD
 #include <sched.h>
@@ -62,10 +67,12 @@ enum {
     MAX_AFFINITY
 };
 
+#define MAX_NUMA_NODES 16
+
 typedef struct ThreadsAffinityType_ {
     const char *name;
     uint8_t mode_flag;
-    uint16_t lcpu; /* use by exclusive mode */
+    uint16_t lcpu[MAX_NUMA_NODES]; /* use by exclusive mode */
     int prio;
     uint32_t nb_threads;
     SCMutex taf_mutex;
@@ -76,6 +83,12 @@ typedef struct ThreadsAffinityType_ {
     cpu_set_t medprio_cpu;
     cpu_set_t hiprio_cpu;
 #endif
+    struct ThreadsAffinityType_ **children;
+    uint32_t nb_children;
+    uint32_t nb_children_capacity;
+    struct ThreadsAffinityType_ *parent;
+    // a flag to avoid multiple warnings when no CPU is set
+    bool nocpu_warned;
 } ThreadsAffinityType;
 
 /** store thread affinity mode for all type of threads */
@@ -83,10 +96,16 @@ typedef struct ThreadsAffinityType_ {
 extern ThreadsAffinityType thread_affinity[MAX_CPU_SET];
 #endif
 
+char *AffinityGetYamlPath(ThreadsAffinityType *taf);
 void AffinitySetupLoadFromConfig(void);
-ThreadsAffinityType * GetAffinityTypeFromName(const char *name);
+ThreadsAffinityType *GetOrAllocAffinityTypeForIfaceOfName(
+        const char *name, const char *interface_name);
+ThreadsAffinityType *GetAffinityTypeForNameAndIface(const char *name, const char *interface_name);
+ThreadsAffinityType *FindAffinityByInterface(
+        ThreadsAffinityType *parent, const char *interface_name);
 
-uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf);
+void TopologyDestroy(void);
+uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf);
 uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf);
 #ifdef HAVE_DPDK
 uint16_t UtilAffinityCpusOverlap(ThreadsAffinityType *taf1, ThreadsAffinityType *taf2);
diff --git a/src/util-device.c b/src/util-device.c
index fd4cf5685f0b..ec1e91b41374 100644
--- a/src/util-device.c
+++ b/src/util-device.c
@@ -24,6 +24,7 @@
 
 #include "device-storage.h"
 #include "util-debug.h"
+#include "util-affinity.h"
 
 #define MAX_DEVNAME 10
 
@@ -173,6 +174,20 @@ int LiveGetDeviceCount(void)
     return i;
 }
 
+int LiveGetDeviceCountWithoutAssignedThreading(void)
+{
+    int i = 0;
+    LiveDevice *pd;
+
+    TAILQ_FOREACH (pd, &live_devices, next) {
+        if (GetAffinityTypeForNameAndIface("worker-cpu-set", pd->dev) == NULL) {
+            i++;
+        }
+    }
+
+    return i;
+}
+
 /**
  *  \brief Get a pointer to the device name at idx
  *
diff --git a/src/util-device.h b/src/util-device.h
index 0774825385a3..075c21567c81 100644
--- a/src/util-device.h
+++ b/src/util-device.h
@@ -85,6 +85,7 @@ void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family);
 void LiveDevSubBypassStats(LiveDevice *dev, uint64_t cnt, int family);
 void LiveDevAddBypassFail(LiveDevice *dev, uint64_t cnt, int family);
 void LiveDevAddBypassSuccess(LiveDevice *dev, uint64_t cnt, int family);
+int LiveGetDeviceCountWithoutAssignedThreading(void);
 int LiveGetDeviceCount(void);
 const char *LiveGetDeviceName(int number);
 LiveDevice *LiveGetDevice(const char *dev);
diff --git a/src/util-runmodes.c b/src/util-runmodes.c
index f78e857abfc6..be4da6bd49ee 100644
--- a/src/util-runmodes.c
+++ b/src/util-runmodes.c
@@ -175,6 +175,14 @@ int RunModeSetLiveCaptureAutoFp(ConfigIfaceParserFunc ConfigParser,
                     FatalError("TmThreadsCreate failed");
                 }
                 tv_receive->printable_name = printable_threadname;
+                if (dev) {
+                    tv_receive->iface_name = SCStrdup(dev);
+                    if (tv_receive->iface_name == NULL) {
+                        FatalError("Failed to allocate memory for iface name");
+                    }
+                } else {
+                    tv_receive->iface_name = NULL;
+                }
                 TmModule *tm_module = TmModuleGetByName(recv_mod_name);
                 if (tm_module == NULL) {
                     FatalError("TmModuleGetByName failed for %s", recv_mod_name);
@@ -283,6 +291,14 @@ static int RunModeSetLiveCaptureWorkersForDevice(ConfigIfaceThreadsCountFunc Mod
             FatalError("TmThreadsCreate failed");
         }
         tv->printable_name = printable_threadname;
+        if (live_dev) {
+            tv->iface_name = SCStrdup(live_dev);
+            if (tv->iface_name == NULL) {
+                FatalError("Failed to allocate memory for iface name");
+            }
+        } else {
+            tv->iface_name = NULL;
+        }
 
         tm_module = TmModuleGetByName(recv_mod_name);
         if (tm_module == NULL) {
diff --git a/suricata.yaml.in b/suricata.yaml.in
index 954acde2d3ae..c4319f02b45b 100644
--- a/suricata.yaml.in
+++ b/suricata.yaml.in
@@ -1777,6 +1777,7 @@ spm-algo: auto
 # Suricata is multi-threaded. Here the threading can be influenced.
 threading:
   set-cpu-affinity: no
+  autopin: no
   # Tune cpu affinity of threads. Each family of threads can be bound
   # to specific CPUs.
   #
@@ -1793,6 +1794,13 @@ threading:
       cpu: [ 0 ]  # include only these CPUs in affinity settings
     receive-cpu-set:
       cpu: [ 0 ]  # include only these CPUs in affinity settings
+      # interface-specific-cpu-set:
+      #   - interface: "enp4s0f0"
+      #     cpu: [ 1,3,5,7,9 ]
+      #     mode: "exclusive"
+      #     prio:
+      #       high: [ "all" ]
+      #       default: "medium"
     worker-cpu-set:
       cpu: [ "all" ]
       mode: "exclusive"
@@ -1804,6 +1812,13 @@ threading:
         medium: [ "1-2" ]
         high: [ 3 ]
         default: "medium"
+      interface-specific-cpu-set:
+        - interface: "enp4s0f0" # 0000:3b:00.0 # net_bonding0 # ens1f0
+          cpu: [ 1,3,5,7,9 ]
+          mode: "exclusive"
+          prio:
+            high: [ "all" ]
+            default: "medium"
     #verdict-cpu-set:
     #  cpu: [ 0 ]
     #  prio: