Skip to content

Commit 38f07ca

Browse files
committed
(re)-tackle CNI concurrency issues
Signed-off-by: apostasie <spam_blackhole@farcloser.world>
1 parent e94e9c6 commit 38f07ca

File tree

3 files changed

+153
-97
lines changed

3 files changed

+153
-97
lines changed

pkg/netutil/netutil.go

+50-96
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ import (
3838

3939
"github.com/containerd/nerdctl/v2/pkg/api/types"
4040
"github.com/containerd/nerdctl/v2/pkg/labels"
41-
"github.com/containerd/nerdctl/v2/pkg/lockutil"
4241
"github.com/containerd/nerdctl/v2/pkg/netutil/nettype"
4342
subnetutil "github.com/containerd/nerdctl/v2/pkg/netutil/subnet"
4443
"github.com/containerd/nerdctl/v2/pkg/strutil"
@@ -53,14 +52,7 @@ type CNIEnv struct {
5352
type CNIEnvOpt func(e *CNIEnv) error
5453

5554
func (e *CNIEnv) ListNetworksMatch(reqs []string, allowPseudoNetwork bool) (list map[string][]*NetworkConfig, errs []error) {
56-
var err error
57-
58-
var networkConfigs []*NetworkConfig
59-
// NOTE: we cannot lock NetconfPath directly, as Cilium (maybe others) are also locking it.
60-
err = lockutil.WithDirLock(filepath.Join(e.NetconfPath, ".nerdctl.lock"), func() error {
61-
networkConfigs, err = e.networkConfigList()
62-
return err
63-
})
55+
networkConfigs, err := fsRead(e)
6456
if err != nil {
6557
return nil, []error{err}
6658
}
@@ -188,7 +180,8 @@ func WithDefaultNetwork(bridgeIP string) CNIEnvOpt {
188180

189181
func WithNamespace(namespace string) CNIEnvOpt {
190182
return func(e *CNIEnv) error {
191-
if err := os.MkdirAll(filepath.Join(e.NetconfPath, namespace), 0755); err != nil {
183+
err := fsEnsureRoot(e, namespace)
184+
if err != nil {
192185
return err
193186
}
194187
e.Namespace = namespace
@@ -201,7 +194,8 @@ func NewCNIEnv(cniPath, cniConfPath string, opts ...CNIEnvOpt) (*CNIEnv, error)
201194
Path: cniPath,
202195
NetconfPath: cniConfPath,
203196
}
204-
if err := os.MkdirAll(e.NetconfPath, 0755); err != nil {
197+
198+
if err := fsEnsureRoot(&e, ""); err != nil {
205199
return nil, err
206200
}
207201

@@ -215,25 +209,17 @@ func NewCNIEnv(cniPath, cniConfPath string, opts ...CNIEnvOpt) (*CNIEnv, error)
215209
}
216210

217211
func (e *CNIEnv) NetworkList() ([]*NetworkConfig, error) {
218-
var netConfigList []*NetworkConfig
219-
var err error
220-
fn := func() error {
221-
netConfigList, err = e.networkConfigList()
222-
return err
223-
}
224-
err = lockutil.WithDirLock(filepath.Join(e.NetconfPath, ".nerdctl.lock"), fn)
225-
226-
return netConfigList, err
212+
return fsRead(e)
227213
}
228214

229215
func (e *CNIEnv) NetworkMap() (map[string]*NetworkConfig, error) { //nolint:revive
230-
networks, err := e.networkConfigList()
216+
netConfigList, err := fsRead(e)
231217
if err != nil {
232218
return nil, err
233219
}
234220

235-
m := make(map[string]*NetworkConfig, len(networks))
236-
for _, n := range networks {
221+
m := make(map[string]*NetworkConfig, len(netConfigList))
222+
for _, n := range netConfigList {
237223
if original, exists := m[n.Name]; exists {
238224
log.L.Warnf("duplicate network name %q, %#v will get superseded by %#v", n.Name, original, n)
239225
}
@@ -243,12 +229,12 @@ func (e *CNIEnv) NetworkMap() (map[string]*NetworkConfig, error) { //nolint:revi
243229
}
244230

245231
func (e *CNIEnv) NetworkByNameOrID(key string) (*NetworkConfig, error) {
246-
networks, err := e.networkConfigList()
232+
netConfigList, err := fsRead(e)
247233
if err != nil {
248234
return nil, err
249235
}
250236

251-
for _, n := range networks {
237+
for _, n := range netConfigList {
252238
if n.Name == key {
253239
return n, nil
254240
}
@@ -261,36 +247,31 @@ func (e *CNIEnv) NetworkByNameOrID(key string) (*NetworkConfig, error) {
261247
}
262248

263249
func (e *CNIEnv) filterNetworks(filterf func(*NetworkConfig) bool) ([]*NetworkConfig, error) {
264-
networkConfigs, err := e.networkConfigList()
250+
netConfigList, err := fsRead(e)
265251
if err != nil {
266252
return nil, err
267253
}
268254
result := []*NetworkConfig{}
269-
for _, networkConfig := range networkConfigs {
255+
for _, networkConfig := range netConfigList {
270256
if filterf(networkConfig) {
271257
result = append(result, networkConfig)
272258
}
273259
}
274260
return result, nil
275261
}
276262

277-
func (e *CNIEnv) getConfigPathForNetworkName(netName string) string {
278-
if netName == DefaultNetworkName || e.Namespace == "" {
279-
return filepath.Join(e.NetconfPath, "nerdctl-"+netName+".conflist")
280-
}
281-
return filepath.Join(e.NetconfPath, e.Namespace, "nerdctl-"+netName+".conflist")
282-
}
283-
284263
func (e *CNIEnv) usedSubnets() ([]*net.IPNet, error) {
285264
usedSubnets, err := subnetutil.GetLiveNetworkSubnets()
286265
if err != nil {
287266
return nil, err
288267
}
289-
networkConfigs, err := e.networkConfigList()
268+
269+
netConfigList, err := fsRead(e)
290270
if err != nil {
291271
return nil, err
292272
}
293-
for _, netConf := range networkConfigs {
273+
274+
for _, netConf := range netConfigList {
294275
usedSubnets = append(usedSubnets, netConf.subnets()...)
295276
}
296277
return usedSubnets, nil
@@ -314,44 +295,39 @@ type cniNetworkConfig struct {
314295
func (e *CNIEnv) CreateNetwork(opts types.NetworkCreateOptions) (*NetworkConfig, error) { //nolint:revive
315296
var netConf *NetworkConfig
316297

317-
fn := func() error {
318-
netMap, err := e.NetworkMap()
319-
if err != nil {
320-
return err
321-
}
298+
netMap, err := e.NetworkMap()
299+
if err != nil {
300+
return nil, err
301+
}
322302

323-
if _, ok := netMap[opts.Name]; ok {
324-
return errdefs.ErrAlreadyExists
325-
}
326-
ipam, err := e.generateIPAM(opts.IPAMDriver, opts.Subnets, opts.Gateway, opts.IPRange, opts.IPAMOptions, opts.IPv6)
327-
if err != nil {
328-
return err
329-
}
330-
plugins, err := e.generateCNIPlugins(opts.Driver, opts.Name, ipam, opts.Options, opts.IPv6)
331-
if err != nil {
332-
return err
333-
}
334-
netConf, err = e.generateNetworkConfig(opts.Name, opts.Labels, plugins)
335-
if err != nil {
336-
return err
337-
}
338-
return e.writeNetworkConfig(netConf)
303+
// See note in fsWrite. Just because it does not exist now does not guarantee it will still not exist later.
304+
// This is more a perf optimization at this point than a true check.
305+
if _, ok := netMap[opts.Name]; ok {
306+
return nil, errdefs.ErrAlreadyExists
339307
}
340-
err := lockutil.WithDirLock(filepath.Join(e.NetconfPath, ".nerdctl.lock"), fn)
308+
ipam, err := e.generateIPAM(opts.IPAMDriver, opts.Subnets, opts.Gateway, opts.IPRange, opts.IPAMOptions, opts.IPv6)
341309
if err != nil {
342310
return nil, err
343311
}
312+
plugins, err := e.generateCNIPlugins(opts.Driver, opts.Name, ipam, opts.Options, opts.IPv6)
313+
if err != nil {
314+
return nil, err
315+
}
316+
netConf, err = e.generateNetworkConfig(opts.Name, opts.Labels, plugins)
317+
if err != nil {
318+
return nil, err
319+
}
320+
err = fsWrite(e, netConf)
321+
322+
// See note above. If it exists, we got raced out by another process. Consider this to NOT be a hard error.
323+
if err != nil && !errdefs.IsAlreadyExists(err) {
324+
return nil, err
325+
}
344326
return netConf, nil
345327
}
346328

347329
func (e *CNIEnv) RemoveNetwork(net *NetworkConfig) error {
348-
fn := func() error {
349-
if err := os.RemoveAll(net.File); err != nil {
350-
return err
351-
}
352-
return net.clean()
353-
}
354-
return lockutil.WithDirLock(filepath.Join(e.NetconfPath, ".nerdctl.lock"), fn)
330+
return fsRemove(e, net)
355331
}
356332

357333
// GetDefaultNetworkConfig checks whether the default network exists
@@ -394,8 +370,8 @@ func (e *CNIEnv) GetDefaultNetworkConfig() (*NetworkConfig, error) {
394370

395371
// Warn the user if the default network was not created by nerdctl.
396372
match := nameMatches[0]
397-
_, statErr := os.Stat(e.getConfigPathForNetworkName(DefaultNetworkName))
398-
if match.NerdctlID == nil || statErr != nil {
373+
exists, statErr := fsExists(e, DefaultNetworkName)
374+
if match.NerdctlID == nil || statErr != nil || !exists {
399375
log.L.Warnf("default network named %q does not have an internal nerdctl ID or nerdctl-managed config file, it was most likely NOT created by nerdctl", DefaultNetworkName)
400376
}
401377

@@ -419,9 +395,12 @@ func (e *CNIEnv) ensureDefaultNetworkConfig(bridgeIP string) error {
419395
}
420396

421397
func (e *CNIEnv) createDefaultNetworkConfig(bridgeIP string) error {
422-
filename := e.getConfigPathForNetworkName(DefaultNetworkName)
423-
if _, err := os.Stat(filename); err == nil {
424-
return fmt.Errorf("already found existing network config at %q, cannot create new network named %q", filename, DefaultNetworkName)
398+
exist, err := fsExists(e, DefaultNetworkName)
399+
if err != nil && !os.IsNotExist(err) {
400+
return err
401+
}
402+
if exist {
403+
return fmt.Errorf("already found existing network config, cannot create new network named %q", DefaultNetworkName)
425404
}
426405

427406
bridgeCIDR := DefaultCIDR
@@ -443,7 +422,7 @@ func (e *CNIEnv) createDefaultNetworkConfig(bridgeIP string) error {
443422
Labels: []string{fmt.Sprintf("%s=true", labels.NerdctlDefaultNetwork)},
444423
}
445424

446-
_, err := e.CreateNetwork(opts)
425+
_, err = e.CreateNetwork(opts)
447426
if err != nil && !errdefs.IsAlreadyExists(err) {
448427
return err
449428
}
@@ -490,31 +469,6 @@ func (e *CNIEnv) generateNetworkConfig(name string, labels []string, plugins []C
490469
}, nil
491470
}
492471

493-
// writeNetworkConfig writes NetworkConfig file to cni config path.
494-
func (e *CNIEnv) writeNetworkConfig(net *NetworkConfig) error {
495-
filename := e.getConfigPathForNetworkName(net.Name)
496-
if _, err := os.Stat(filename); err == nil {
497-
return errdefs.ErrAlreadyExists
498-
}
499-
return os.WriteFile(filename, net.Bytes, 0644)
500-
}
501-
502-
// networkConfigList loads config from dir if dir exists.
503-
func (e *CNIEnv) networkConfigList() ([]*NetworkConfig, error) {
504-
common, err := libcni.ConfFiles(e.NetconfPath, []string{".conf", ".conflist", ".json"})
505-
if err != nil {
506-
return nil, err
507-
}
508-
namespaced := []string{}
509-
if e.Namespace != "" {
510-
namespaced, err = libcni.ConfFiles(filepath.Join(e.NetconfPath, e.Namespace), []string{".conf", ".conflist", ".json"})
511-
if err != nil {
512-
return nil, err
513-
}
514-
}
515-
return cniLoad(append(common, namespaced...))
516-
}
517-
518472
func wrapCNIError(fileName string, err error) error {
519473
return fmt.Errorf("failed marshalling json out of network configuration file %q: %w\n"+
520474
"For details on the schema, see https://pkg.go.dev/github.com/containernetworking/cni/libcni#NetworkConfigList", fileName, err)

pkg/netutil/store.go

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package netutil
18+
19+
import (
20+
"os"
21+
"path/filepath"
22+
23+
"github.com/containernetworking/cni/libcni"
24+
25+
"github.com/containerd/errdefs"
26+
27+
"github.com/containerd/nerdctl/v2/pkg/lockutil"
28+
)
29+
30+
// NOTE: libcni is not safe to use concurrently - or at least delegates concurrency management to the consumer.
31+
// Furthermore, CNIEnv (prior to this) is assuming the filesystem is ACID and other TOCTOU faults.
32+
// This small set of methods here are meant to isolate CNIEnv entirely from the filesystem.
33+
// This is NOT proper - we should instead use the Store implementation, which is the generic abstraction for ACID
34+
// operations - but for now that will do, waiting for a full rewrite of CNIEnv.
35+
36+
func fsEnsureRoot(e *CNIEnv, namespace string) error {
37+
path := e.NetconfPath
38+
if namespace != "" {
39+
path = filepath.Join(e.NetconfPath, namespace)
40+
}
41+
return os.MkdirAll(path, 0755)
42+
}
43+
44+
func fsRemove(e *CNIEnv, net *NetworkConfig) error {
45+
fn := func() error {
46+
if err := os.RemoveAll(net.File); err != nil {
47+
return err
48+
}
49+
return net.clean()
50+
}
51+
return lockutil.WithDirLock(filepath.Join(e.NetconfPath, ".nerdctl.lock"), fn)
52+
}
53+
54+
func fsExists(e *CNIEnv, name string) (bool, error) {
55+
fi, err := os.Stat(getConfigPathForNetworkName(e, name))
56+
return !os.IsNotExist(err) && !fi.IsDir(), err
57+
}
58+
59+
func fsWrite(e *CNIEnv, net *NetworkConfig) error {
60+
filename := getConfigPathForNetworkName(e, net.Name)
61+
// FIXME: note that this is still problematic.
62+
// Concurrent access may independently first figure out that a given network is missing, and while the lock
63+
// here will prevent concurrent writes, one of the routines will fail.
64+
// Consuming code MUST account for that scenario.
65+
return lockutil.WithDirLock(filepath.Join(e.NetconfPath, ".nerdctl.lock"), func() error {
66+
if _, err := os.Stat(filename); err == nil {
67+
return errdefs.ErrAlreadyExists
68+
}
69+
return os.WriteFile(filename, net.Bytes, 0644)
70+
})
71+
}
72+
73+
func fsRead(e *CNIEnv) ([]*NetworkConfig, error) {
74+
var nc []*NetworkConfig
75+
var err error
76+
err = lockutil.WithDirLock(filepath.Join(e.NetconfPath, ".nerdctl.lock"), func() error {
77+
namespaced := []string{}
78+
var common []string
79+
common, err = libcni.ConfFiles(e.NetconfPath, []string{".conf", ".conflist", ".json"})
80+
if err != nil {
81+
return err
82+
}
83+
if e.Namespace != "" {
84+
namespaced, err = libcni.ConfFiles(filepath.Join(e.NetconfPath, e.Namespace), []string{".conf", ".conflist", ".json"})
85+
if err != nil {
86+
return err
87+
}
88+
}
89+
nc, err = cniLoad(append(common, namespaced...))
90+
return err
91+
})
92+
return nc, err
93+
}
94+
95+
func getConfigPathForNetworkName(e *CNIEnv, netName string) string {
96+
if netName == DefaultNetworkName || e.Namespace == "" {
97+
return filepath.Join(e.NetconfPath, "nerdctl-"+netName+".conflist")
98+
}
99+
return filepath.Join(e.NetconfPath, e.Namespace, "nerdctl-"+netName+".conflist")
100+
}

pkg/ocihook/ocihook.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,13 @@ func Run(stdin io.Reader, stderr io.Writer, event, dataStore, cniPath, cniNetcon
103103
// This below is a stopgap solution that just enforces a global lock
104104
// Note this here is probably not enough, as concurrent CNI operations may happen outside of the scope of ocihooks
105105
// through explicit calls to Remove, etc.
106+
// Finally note that this is not the same (albeit similar) as libcni filesystem manipulation locking,
107+
// hence the independent lock
106108
err = os.MkdirAll(cniNetconfPath, 0o700)
107109
if err != nil {
108110
return err
109111
}
110-
lock, err := lockutil.Lock(filepath.Join(cniNetconfPath, ".nerdctl.lock"))
112+
lock, err := lockutil.Lock(filepath.Join(cniNetconfPath, ".cni-concurrency.lock"))
111113
if err != nil {
112114
return err
113115
}

0 commit comments

Comments
 (0)