中断亲和性和进程亲和性配置-Nginx

本文阅读量 Posted by Kird on 2020-06-11

本文准备总结下平时工作中用到的CPU亲和性设置,主要是把用到的配置和验证方法总结起来。因为之前刚接触的时候总是这一篇那一篇的查找文章,遂以Nginx设置亲和性调优来举例,希望新手看到这篇文章能有所帮助。
性能优化目标为下图:

网卡RSS

关于网卡相关的调优,可以系统的学习下这两篇文章Linux 网络栈监控和调优:接收数据深入了解10G网卡

查看网卡多队列:

1
2
3
4
5
6
7
8
9
10
11
12
# ethtool -l  eth0
Channel parameters for eth0:
Pre-set maximums:
RX: 0
TX: 0
Other: 1
Combined: 63
Current hardware settings:
RX: 0
TX: 0
Other: 1
Combined: 20

设置多队列和CPU数量相同:

1
2
3
4
5
6
7
8
9
10
11
12
# ethtool -L  eth0 combined 24  #会中断下网络
Channel parameters for eth0:
Pre-set maximums:
RX: 0
TX: 0
Other: 1
Combined: 63
Current hardware settings:
RX: 0
TX: 0
Other: 1
Combined: 24

查看网卡多队列对应的irq数量:

1
cat /proc/interrupts |grep eth0- |wc -l

绑定网卡中断irq到cpu

set_aff.sh:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
#!/bin/bash
#
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Affinitize interrupts to cores
#
# typical usage is (as root):
# set_irq_affinity -x local eth1 <eth2> <eth3>
#
# to get help:
# set_irq_affinity

usage()
{
echo
echo "Usage: $0 [-x|-X] {all|local|remote|one|custom} [ethX] <[ethY]>"
echo " options: -x Configure XPS as well as smp_affinity"
echo " options: -X Disable XPS but set smp_affinity"
echo " options: {remote|one} can be followed by a specific node number"
echo " Ex: $0 local eth0"
echo " Ex: $0 remote 1 eth0"
echo " Ex: $0 custom eth0 eth1"
echo " Ex: $0 0-7,16-23 eth0"
echo
exit 1
}

usageX()
{
echo "options -x and -X cannot both be specified, pick one"
exit 1
}

if [ "$1" == "-x" ]; then
XPS_ENA=1
shift
fi

if [ "$1" == "-X" ]; then
if [ -n "$XPS_ENA" ]; then
usageX
fi
XPS_DIS=2
shift
fi

if [ "$1" == -x ]; then
usageX
fi

if [ -n "$XPS_ENA" ] && [ -n "$XPS_DIS" ]; then
usageX
fi

if [ -z "$XPS_ENA" ]; then
XPS_ENA=$XPS_DIS
fi

num='^[0-9]+$'
# Vars
AFF=$1
shift

case "$AFF" in
remote) [[ $1 =~ $num ]] && rnode=$1 && shift ;;
one) [[ $1 =~ $num ]] && cnt=$1 && shift ;;
all) ;;
local) ;;
custom) ;;
[0-9]*) ;;
-h|--help) usage ;;
"") usage ;;
*) IFACES=$AFF && AFF=all ;; # Backwards compat mode
esac

# append the interfaces listed to the string with spaces
while [ "$#" -ne "0" ] ; do
IFACES+=" $1"
shift
done

# for now the user must specify interfaces
if [ -z "$IFACES" ]; then
usage
exit 1
fi

# support functions

set_affinity()
{
VEC=$core
if [ $VEC -ge 32 ]
then
MASK_FILL=""
MASK_ZERO="00000000"
let "IDX = $VEC / 32"
for ((i=1; i<=$IDX;i++))
do
MASK_FILL="${MASK_FILL},${MASK_ZERO}"
done

let "VEC -= 32 * $IDX"
MASK_TMP=$((1<<$VEC))
MASK=$(printf "%X%s" $MASK_TMP $MASK_FILL)
else
MASK_TMP=$((1<<$VEC))
MASK=$(printf "%X" $MASK_TMP)
fi

printf "%s" $MASK > /proc/irq/$IRQ/smp_affinity
printf "%s %d %s -> /proc/irq/$IRQ/smp_affinity\n" $IFACE $core $MASK
case "$XPS_ENA" in
1)
printf "%s %d %s -> /sys/class/net/%s/queues/tx-%d/xps_cpus\n" $IFACE $core $MASK $IFACE $((n-1))
printf "%s" $MASK > /sys/class/net/$IFACE/queues/tx-$((n-1))/xps_cpus
;;
2)
MASK=0
printf "%s %d %s -> /sys/class/net/%s/queues/tx-%d/xps_cpus\n" $IFACE $core $MASK $IFACE $((n-1))
printf "%s" $MASK > /sys/class/net/$IFACE/queues/tx-$((n-1))/xps_cpus
;;
*)
esac
}

# Allow usage of , or -
#
parse_range () {
RANGE=${@//,/ }
RANGE=${RANGE//-/..}
LIST=""
for r in $RANGE; do
# eval lets us use vars in {#..#} range
[[ $r =~ '..' ]] && r="$(eval echo {$r})"
LIST+=" $r"
done
echo $LIST
}

# Affinitize interrupts
#
setaff()
{
CORES=$(parse_range $CORES)
ncores=$(echo $CORES | wc -w)
n=1

# this script only supports interrupt vectors in pairs,
# modification would be required to support a single Tx or Rx queue
# per interrupt vector

queues="${IFACE}-.*TxRx"

irqs=$(grep "$queues" /proc/interrupts | cut -f1 -d:)
[ -z "$irqs" ] && irqs=$(grep $IFACE /proc/interrupts | cut -f1 -d:)
[ -z "$irqs" ] && irqs=$(for i in `ls -Ux /sys/class/net/$IFACE/device/msi_irqs` ;\
do grep "$i:.*TxRx" /proc/interrupts | grep -v fdir | cut -f 1 -d : ;\
done)
[ -z "$irqs" ] && echo "Error: Could not find interrupts for $IFACE"

echo "IFACE CORE MASK -> FILE"
echo "======================="
for IRQ in $irqs; do
[ "$n" -gt "$ncores" ] && n=1
j=1
# much faster than calling cut for each
for i in $CORES; do
[ $((j++)) -ge $n ] && break
done
core=$i
set_affinity
((n++))
done
}

# now the actual useful bits of code

# these next 2 lines would allow script to auto-determine interfaces
#[ -z "$IFACES" ] && IFACES=$(ls /sys/class/net)
#[ -z "$IFACES" ] && echo "Error: No interfaces up" && exit 1

# echo IFACES is $IFACES

CORES=$(</sys/devices/system/cpu/online)
[ "$CORES" ] || CORES=$(grep ^proc /proc/cpuinfo | cut -f2 -d:)

# Core list for each node from sysfs
node_dir=/sys/devices/system/node
for i in $(ls -d $node_dir/node*); do
i=${i/*node/}
corelist[$i]=$(<$node_dir/node${i}/cpulist)
done

for IFACE in $IFACES; do
# echo $IFACE being modified

dev_dir=/sys/class/net/$IFACE/device
[ -e $dev_dir/numa_node ] && node=$(<$dev_dir/numa_node)
[ "$node" ] && [ "$node" -gt 0 ] || node=0

case "$AFF" in
local)
CORES=${corelist[$node]}
;;
remote)
[ "$rnode" ] || { [ $node -eq 0 ] && rnode=1 || rnode=0; }
CORES=${corelist[$rnode]}
;;
one)
[ -n "$cnt" ] || cnt=0
CORES=$cnt
;;
all)
CORES=$CORES
;;
custom)
echo -n "Input cores for $IFACE (ex. 0-7,15-23): "
read CORES
;;
[0-9]*)
CORES=$AFF
;;
*)
usage
exit 1
;;
esac

# call the worker function
setaff
done

# check for irqbalance running
IRQBALANCE_ON=`ps ax | grep -v grep | grep -q irqbalance; echo $?`
if [ "$IRQBALANCE_ON" == "0" ] ; then
echo " WARNING: irqbalance is running and will"
echo " likely override this script's affinitization."
echo " Please stop the irqbalance service and/or execute"
echo " 'killall irqbalance'"
fi

运行绑定,输出结果中的中断号数量对应着网卡多队列的数量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
[root@]# sh set_aff.sh eth0
IFACE CORE MASK -> FILE
=======================
eth0 0 1 -> /proc/irq/68/smp_affinity
eth0 1 2 -> /proc/irq/69/smp_affinity
eth0 2 4 -> /proc/irq/70/smp_affinity
eth0 3 8 -> /proc/irq/71/smp_affinity
eth0 4 10 -> /proc/irq/74/smp_affinity
eth0 5 20 -> /proc/irq/75/smp_affinity
eth0 6 40 -> /proc/irq/76/smp_affinity
eth0 7 80 -> /proc/irq/77/smp_affinity
eth0 8 100 -> /proc/irq/78/smp_affinity
eth0 9 200 -> /proc/irq/79/smp_affinity
eth0 10 400 -> /proc/irq/80/smp_affinity
eth0 11 800 -> /proc/irq/81/smp_affinity
eth0 12 1000 -> /proc/irq/82/smp_affinity
eth0 13 2000 -> /proc/irq/83/smp_affinity
eth0 14 4000 -> /proc/irq/84/smp_affinity
eth0 15 8000 -> /proc/irq/85/smp_affinity
eth0 16 10000 -> /proc/irq/86/smp_affinity
eth0 17 20000 -> /proc/irq/87/smp_affinity
eth0 18 40000 -> /proc/irq/88/smp_affinity
eth0 19 80000 -> /proc/irq/89/smp_affinity
eth0 20 100000 -> /proc/irq/90/smp_affinity
eth0 21 200000 -> /proc/irq/91/smp_affinity
eth0 22 400000 -> /proc/irq/92/smp_affinity
eth0 23 800000 -> /proc/irq/93/smp_affinity

绑定nginx worker进程到cpu

查看进程对应的cpu

top和ps均可以查看,top通过编辑设置添加列动态查看。
ps 可以直接输出过滤

1
# ps -eo user,stat,pid,psr,%cpu,%mem,comm |grep nginx

nginx默认worker_cpu_affinity 下进程运行在CPU的情况

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
[root@]# ps -eo user,stat,pid,psr,%cpu,%mem,comm |grep nginx
root Ss 17931 8 0.0 0.0 nginx
nginx S 27740 3 0.0 0.0 nginx
nginx S 27741 23 0.0 0.0 nginx
nginx S 27742 4 0.0 0.0 nginx
nginx S 27743 22 0.0 0.0 nginx
nginx S 27744 5 0.0 0.0 nginx
nginx S 27745 21 0.0 0.0 nginx
nginx S 27746 1 0.0 0.0 nginx
nginx S 27747 2 0.0 0.0 nginx
nginx S 27748 8 0.0 0.0 nginx
nginx S 27749 0 0.0 0.0 nginx
nginx S 27750 15 0.0 0.0 nginx
nginx S 27751 10 0.0 0.0 nginx
nginx S 27752 16 0.0 0.0 nginx
nginx S 27753 17 0.0 0.0 nginx
nginx S 27754 7 0.0 0.0 nginx
nginx S 27755 2 0.0 0.0 nginx
nginx S 27756 4 0.0 0.0 nginx
nginx S 27757 6 0.0 0.0 nginx
nginx S 27758 5 0.0 0.0 nginx
nginx S 27759 3 0.0 0.0 nginx
nginx S 27760 11 0.0 0.0 nginx
nginx S 27761 14 0.0 0.0 nginx
nginx S 27762 13 0.0 0.0 nginx
nginx S 27763 9 0.0 0.0 nginx
[root@]# ps -eo user,stat,pid,psr,%cpu,%mem,comm |grep nginx
root Ss 17931 8 0.0 0.0 nginx
nginx S 27740 0 0.0 0.0 nginx
nginx S 27741 9 0.0 0.0 nginx
nginx S 27742 2 0.0 0.0 nginx
nginx S 27743 12 0.0 0.0 nginx
nginx S 27744 5 0.0 0.0 nginx
nginx S 27745 14 0.0 0.0 nginx
nginx S 27746 12 0.0 0.0 nginx
nginx S 27747 13 0.0 0.0 nginx
nginx S 27748 5 0.0 0.0 nginx
nginx S 27749 9 0.0 0.0 nginx
nginx S 27750 12 0.0 0.0 nginx
nginx S 27751 8 0.0 0.0 nginx
nginx S 27752 16 0.0 0.0 nginx
nginx S 27753 5 0.0 0.0 nginx
nginx S 27754 6 0.0 0.0 nginx
nginx S 27755 12 0.0 0.0 nginx
nginx S 27756 12 0.0 0.0 nginx
nginx S 27757 16 0.0 0.0 nginx
nginx S 27758 1 0.0 0.0 nginx
nginx S 27759 0 0.0 0.0 nginx
nginx S 27760 17 0.0 0.0 nginx
nginx S 27761 3 0.0 0.0 nginx
nginx S 27762 3 0.0 0.0 nginx
nginx S 27763 3 0.0 0.0 nginx

可以看到27741号等进程运行的cpu时刻在变。

nginx绑定进程到CPU

添加配置:

1
worker_cpu_affinity 00000000000000000000001;

reload后查看:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
[root@]# ps -eo user,stat,pid,psr,%cpu,%mem,comm |grep nginx
nginx S 3567 0 0.0 0.0 nginx
nginx S 3568 0 0.0 0.0 nginx
nginx S 3569 0 0.0 0.0 nginx
nginx S 3570 0 0.0 0.0 nginx
nginx S 3571 0 0.0 0.0 nginx
nginx S 3572 0 0.0 0.0 nginx
nginx S 3573 0 0.0 0.0 nginx
nginx S 3574 0 0.0 0.0 nginx
nginx S 3575 0 0.0 0.0 nginx
nginx S 3576 0 0.0 0.0 nginx
nginx S 3577 0 0.0 0.0 nginx
nginx S 3578 0 0.0 0.0 nginx
nginx S 3579 0 0.0 0.0 nginx
nginx S 3580 0 0.0 0.0 nginx
nginx S 3581 0 0.0 0.0 nginx
nginx S 3582 0 0.0 0.0 nginx
nginx S 3583 0 0.0 0.0 nginx
nginx S 3584 0 0.0 0.0 nginx
nginx S 3585 0 0.0 0.0 nginx
nginx S 3586 0 0.0 0.0 nginx
nginx S 3587 0 0.0 0.0 nginx
nginx S 3588 0 0.0 0.0 nginx
nginx S 3589 0 0.0 0.0 nginx
nginx S 3590 0 0.0 0.0 nginx
root Ss 17931 20 0.0 0.0 nginx

可见所有的worker进程都绑定在0号CPU;
同理优化配置为:

1
worker_cpu_affinity 00000000000000000000001 00000000000000000000010 00000000000000000000100 00000000000000000001000 ......... 10000000000000000000000;  ### 太长不写,一共CPU个数,每个worker占一个,或者想你所想

emm… 可以设置为auto.

其他

开启NUMA



支付宝打赏 微信打赏

赞赏支持一下