Skip to content

Commit 860f242

Browse files
Santiago LeonJeff Garzik
Santiago Leon
authored and
Jeff Garzik
committed
[PATCH] ibmveth change buffer pools dynamically
This patch provides a sysfs interface to change some properties of the ibmveth buffer pools (size of the buffers, number of buffers per pool, and whether a pool is active). Ethernet drivers use ethtool to provide this type of functionality. However, the buffers in the ibmveth driver can have an arbitrary size (not only regular, mini, and jumbo which are the only sizes that ethtool can change), and also ibmveth can have an arbitrary number of buffer pools Under heavy load we have seen dropped packets which obviously kills TCP performance. We have created several fixes that mitigate this issue, but we definitely need a way of changing the number of buffers for an adapter dynamically. Also, changing the size of the buffers allows users to change the MTU to something big (bigger than a jumbo frame) greatly improving performance on partition to partition transfers. The patch creates directories pool1...pool4 in the device directory in sysfs, each with files: num, size, and active (which default to the values in the mainline version). Comments and suggestions are welcome... -- Santiago A. Leon Power Linux Development IBM Linux Technology Center Signed-off-by: Jeff Garzik <[email protected]>
1 parent 7b32a31 commit 860f242

File tree

2 files changed

+174
-44
lines changed

2 files changed

+174
-44
lines changed

drivers/net/ibmveth.c

Lines changed: 168 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter);
9696
static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
9797
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
9898
static inline void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
99+
static struct kobj_type ktype_veth_pool;
99100

100101
#ifdef CONFIG_PROC_FS
101102
#define IBMVETH_PROC_DIR "net/ibmveth"
@@ -133,12 +134,13 @@ static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
133134
}
134135

135136
/* setup the initial settings for a buffer pool */
136-
static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, u32 buff_size)
137+
static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, u32 buff_size, u32 pool_active)
137138
{
138139
pool->size = pool_size;
139140
pool->index = pool_index;
140141
pool->buff_size = buff_size;
141142
pool->threshold = pool_size / 2;
143+
pool->active = pool_active;
142144
}
143145

144146
/* allocate and setup an buffer pool - called during open */
@@ -180,7 +182,6 @@ static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool)
180182
atomic_set(&pool->available, 0);
181183
pool->producer_index = 0;
182184
pool->consumer_index = 0;
183-
pool->active = 0;
184185

185186
return 0;
186187
}
@@ -301,7 +302,6 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, struct ibm
301302
kfree(pool->skbuff);
302303
pool->skbuff = NULL;
303304
}
304-
pool->active = 0;
305305
}
306306

307307
/* remove a buffer from a pool */
@@ -433,7 +433,9 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
433433
}
434434

435435
for(i = 0; i<IbmVethNumBufferPools; i++)
436-
ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[i]);
436+
if (adapter->rx_buff_pool[i].active)
437+
ibmveth_free_buffer_pool(adapter,
438+
&adapter->rx_buff_pool[i]);
437439
}
438440

439441
static int ibmveth_open(struct net_device *netdev)
@@ -489,9 +491,6 @@ static int ibmveth_open(struct net_device *netdev)
489491
adapter->rx_queue.num_slots = rxq_entries;
490492
adapter->rx_queue.toggle = 1;
491493

492-
/* call change_mtu to init the buffer pools based in initial mtu */
493-
ibmveth_change_mtu(netdev, netdev->mtu);
494-
495494
memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
496495
mac_address = mac_address >> 16;
497496

@@ -522,6 +521,17 @@ static int ibmveth_open(struct net_device *netdev)
522521
return -ENONET;
523522
}
524523

524+
for(i = 0; i<IbmVethNumBufferPools; i++) {
525+
if(!adapter->rx_buff_pool[i].active)
526+
continue;
527+
if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) {
528+
ibmveth_error_printk("unable to alloc pool\n");
529+
adapter->rx_buff_pool[i].active = 0;
530+
ibmveth_cleanup(adapter);
531+
return -ENOMEM ;
532+
}
533+
}
534+
525535
ibmveth_debug_printk("registering irq 0x%x\n", netdev->irq);
526536
if((rc = request_irq(netdev->irq, &ibmveth_interrupt, 0, netdev->name, netdev)) != 0) {
527537
ibmveth_error_printk("unable to request irq 0x%x, rc %d\n", netdev->irq, rc);
@@ -550,7 +560,8 @@ static int ibmveth_close(struct net_device *netdev)
550560

551561
ibmveth_debug_printk("close starting\n");
552562

553-
netif_stop_queue(netdev);
563+
if (!adapter->pool_config)
564+
netif_stop_queue(netdev);
554565

555566
free_irq(netdev->irq, netdev);
556567

@@ -876,46 +887,22 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
876887
static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
877888
{
878889
struct ibmveth_adapter *adapter = dev->priv;
890+
int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
879891
int i;
880-
int prev_smaller = 1;
881892

882-
if ((new_mtu < 68) ||
883-
(new_mtu > (pool_size[IbmVethNumBufferPools-1]) - IBMVETH_BUFF_OH))
893+
if (new_mtu < IBMVETH_MAX_MTU)
884894
return -EINVAL;
885895

896+
/* Look for an active buffer pool that can hold the new MTU */
886897
for(i = 0; i<IbmVethNumBufferPools; i++) {
887-
int activate = 0;
888-
if (new_mtu > (pool_size[i] - IBMVETH_BUFF_OH)) {
889-
activate = 1;
890-
prev_smaller= 1;
891-
} else {
892-
if (prev_smaller)
893-
activate = 1;
894-
prev_smaller= 0;
898+
if (!adapter->rx_buff_pool[i].active)
899+
continue;
900+
if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
901+
dev->mtu = new_mtu;
902+
return 0;
895903
}
896-
897-
if (activate && !adapter->rx_buff_pool[i].active) {
898-
struct ibmveth_buff_pool *pool =
899-
&adapter->rx_buff_pool[i];
900-
if(ibmveth_alloc_buffer_pool(pool)) {
901-
ibmveth_error_printk("unable to alloc pool\n");
902-
return -ENOMEM;
903-
}
904-
adapter->rx_buff_pool[i].active = 1;
905-
} else if (!activate && adapter->rx_buff_pool[i].active) {
906-
adapter->rx_buff_pool[i].active = 0;
907-
h_free_logical_lan_buffer(adapter->vdev->unit_address,
908-
(u64)pool_size[i]);
909-
}
910-
911904
}
912-
913-
/* kick the interrupt handler so that the new buffer pools get
914-
replenished or deallocated */
915-
ibmveth_interrupt(dev->irq, dev, NULL);
916-
917-
dev->mtu = new_mtu;
918-
return 0;
905+
return -EINVAL;
919906
}
920907

921908
static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
@@ -960,6 +947,7 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
960947
adapter->vdev = dev;
961948
adapter->netdev = netdev;
962949
adapter->mcastFilterSize= *mcastFilterSize_p;
950+
adapter->pool_config = 0;
963951

964952
/* Some older boxes running PHYP non-natively have an OF that
965953
returns a 8-byte local-mac-address field (and the first
@@ -994,9 +982,16 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
994982

995983
memcpy(&netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
996984

997-
for(i = 0; i<IbmVethNumBufferPools; i++)
985+
for(i = 0; i<IbmVethNumBufferPools; i++) {
986+
struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
998987
ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
999-
pool_count[i], pool_size[i]);
988+
pool_count[i], pool_size[i],
989+
pool_active[i]);
990+
kobj->parent = &dev->dev.kobj;
991+
sprintf(kobj->name, "pool%d", i);
992+
kobj->ktype = &ktype_veth_pool;
993+
kobject_register(kobj);
994+
}
1000995

1001996
ibmveth_debug_printk("adapter @ 0x%p\n", adapter);
1002997

@@ -1025,6 +1020,10 @@ static int __devexit ibmveth_remove(struct vio_dev *dev)
10251020
{
10261021
struct net_device *netdev = dev->dev.driver_data;
10271022
struct ibmveth_adapter *adapter = netdev->priv;
1023+
int i;
1024+
1025+
for(i = 0; i<IbmVethNumBufferPools; i++)
1026+
kobject_unregister(&adapter->rx_buff_pool[i].kobj);
10281027

10291028
unregister_netdev(netdev);
10301029

@@ -1169,6 +1168,132 @@ static void ibmveth_proc_unregister_driver(void)
11691168
}
11701169
#endif /* CONFIG_PROC_FS */
11711170

1171+
static struct attribute veth_active_attr;
1172+
static struct attribute veth_num_attr;
1173+
static struct attribute veth_size_attr;
1174+
1175+
static ssize_t veth_pool_show(struct kobject * kobj,
1176+
struct attribute * attr, char * buf)
1177+
{
1178+
struct ibmveth_buff_pool *pool = container_of(kobj,
1179+
struct ibmveth_buff_pool,
1180+
kobj);
1181+
1182+
if (attr == &veth_active_attr)
1183+
return sprintf(buf, "%d\n", pool->active);
1184+
else if (attr == &veth_num_attr)
1185+
return sprintf(buf, "%d\n", pool->size);
1186+
else if (attr == &veth_size_attr)
1187+
return sprintf(buf, "%d\n", pool->buff_size);
1188+
return 0;
1189+
}
1190+
1191+
static ssize_t veth_pool_store(struct kobject * kobj, struct attribute * attr,
1192+
const char * buf, size_t count)
1193+
{
1194+
struct ibmveth_buff_pool *pool = container_of(kobj,
1195+
struct ibmveth_buff_pool,
1196+
kobj);
1197+
struct net_device *netdev =
1198+
container_of(kobj->parent, struct device, kobj)->driver_data;
1199+
struct ibmveth_adapter *adapter = netdev->priv;
1200+
long value = simple_strtol(buf, NULL, 10);
1201+
long rc;
1202+
1203+
if (attr == &veth_active_attr) {
1204+
if (value && !pool->active) {
1205+
if(ibmveth_alloc_buffer_pool(pool)) {
1206+
ibmveth_error_printk("unable to alloc pool\n");
1207+
return -ENOMEM;
1208+
}
1209+
pool->active = 1;
1210+
adapter->pool_config = 1;
1211+
ibmveth_close(netdev);
1212+
adapter->pool_config = 0;
1213+
if ((rc = ibmveth_open(netdev)))
1214+
return rc;
1215+
} else if (!value && pool->active) {
1216+
int mtu = netdev->mtu + IBMVETH_BUFF_OH;
1217+
int i;
1218+
/* Make sure there is a buffer pool with buffers that
1219+
can hold a packet of the size of the MTU */
1220+
for(i = 0; i<IbmVethNumBufferPools; i++) {
1221+
if (pool == &adapter->rx_buff_pool[i])
1222+
continue;
1223+
if (!adapter->rx_buff_pool[i].active)
1224+
continue;
1225+
if (mtu < adapter->rx_buff_pool[i].buff_size) {
1226+
pool->active = 0;
1227+
h_free_logical_lan_buffer(adapter->
1228+
vdev->
1229+
unit_address,
1230+
pool->
1231+
buff_size);
1232+
}
1233+
}
1234+
if (pool->active) {
1235+
ibmveth_error_printk("no active pool >= MTU\n");
1236+
return -EPERM;
1237+
}
1238+
}
1239+
} else if (attr == &veth_num_attr) {
1240+
if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
1241+
return -EINVAL;
1242+
else {
1243+
adapter->pool_config = 1;
1244+
ibmveth_close(netdev);
1245+
adapter->pool_config = 0;
1246+
pool->size = value;
1247+
if ((rc = ibmveth_open(netdev)))
1248+
return rc;
1249+
}
1250+
} else if (attr == &veth_size_attr) {
1251+
if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE)
1252+
return -EINVAL;
1253+
else {
1254+
adapter->pool_config = 1;
1255+
ibmveth_close(netdev);
1256+
adapter->pool_config = 0;
1257+
pool->buff_size = value;
1258+
if ((rc = ibmveth_open(netdev)))
1259+
return rc;
1260+
}
1261+
}
1262+
1263+
/* kick the interrupt handler to allocate/deallocate pools */
1264+
ibmveth_interrupt(netdev->irq, netdev, NULL);
1265+
return count;
1266+
}
1267+
1268+
1269+
#define ATTR(_name, _mode) \
1270+
struct attribute veth_##_name##_attr = { \
1271+
.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \
1272+
};
1273+
1274+
static ATTR(active, 0644);
1275+
static ATTR(num, 0644);
1276+
static ATTR(size, 0644);
1277+
1278+
static struct attribute * veth_pool_attrs[] = {
1279+
&veth_active_attr,
1280+
&veth_num_attr,
1281+
&veth_size_attr,
1282+
NULL,
1283+
};
1284+
1285+
static struct sysfs_ops veth_pool_ops = {
1286+
.show = veth_pool_show,
1287+
.store = veth_pool_store,
1288+
};
1289+
1290+
static struct kobj_type ktype_veth_pool = {
1291+
.release = NULL,
1292+
.sysfs_ops = &veth_pool_ops,
1293+
.default_attrs = veth_pool_attrs,
1294+
};
1295+
1296+
11721297
static struct vio_device_id ibmveth_device_table[] __devinitdata= {
11731298
{ "network", "IBM,l-lan"},
11741299
{ "", "" }

drivers/net/ibmveth.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,13 @@
7575

7676
#define IbmVethNumBufferPools 5
7777
#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
78+
#define IBMVETH_MAX_MTU 68
79+
#define IBMVETH_MAX_POOL_COUNT 4096
80+
#define IBMVETH_MAX_BUF_SIZE (1024 * 128)
7881

79-
/* pool_size should be sorted */
8082
static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
8183
static int pool_count[] = { 256, 768, 256, 256, 256 };
84+
static int pool_active[] = { 1, 1, 0, 0, 0};
8285

8386
#define IBM_VETH_INVALID_MAP ((u16)0xffff)
8487

@@ -94,6 +97,7 @@ struct ibmveth_buff_pool {
9497
dma_addr_t *dma_addr;
9598
struct sk_buff **skbuff;
9699
int active;
100+
struct kobject kobj;
97101
};
98102

99103
struct ibmveth_rx_q {
@@ -118,6 +122,7 @@ struct ibmveth_adapter {
118122
dma_addr_t filter_list_dma;
119123
struct ibmveth_buff_pool rx_buff_pool[IbmVethNumBufferPools];
120124
struct ibmveth_rx_q rx_queue;
125+
int pool_config;
121126

122127
/* adapter specific stats */
123128
u64 replenish_task_cycles;

0 commit comments

Comments
 (0)