Go中的网络轮询器(1)--Epoll在Go中的抽象

2年前 (2022) 程序员胖胖胖虎阿
287 0 0

Go中的网络轮询器(1)--Epoll在Go中的抽象

Go用netpoll实现对不同操作系统的I/O多路复用的抽象(或者说是封装),Go中多路复用与平台无关。编译器在编译时根据平台的不同使用不同的多路复用器进行编译。

Go中的网络轮询器(1)--Epoll在Go中的抽象

有关netpoll的函数

epollkqueuesolaries 等多路复用模块都要实现以下五个函数

// 新建多路复用器
// Initialize the poller. Only called once.
// func netpollinit()

// 监听文件描述符上的边缘触发事件,创建事件并加入监听
// Disable notifications for fd. Return an errno value.
// func netpollopen(fd uintptr, pd *pollDesc) int32

// 监听多路复用器,返回一组已经就绪的goroutine
// func netpoll(delta int64) gList
//     Poll the network. If delta < 0, block indefinitely. If delta == 0,
//     poll without blocking. If delta > 0, block for up to delta nanoseconds.
//     Return a list of goroutines built by calling netpollready.

// 唤醒多路复用器
// func netpollBreak()
//     Wake up the network poller, assumed to be blocked in netpoll.

// 判断文件描述符是否被多路复用器使用
// func netpollIsPollDescriptor(fd uintptr) bool
//     Reports whether fd is a file descriptor used by the poller.

下面是Linux上的epoll的相关实现

数据结构

操作系统中的I/O多路复用器会监控文件描述符的可读可写,而Go中的netpoll会监控pollDesc结构体的状态,它封装了操作系统的文件描述符。

// Network poller descriptor.
//
// No heap pointers.
//
//go:notinheap
// socket详细信息
type pollDesc struct {
    link *pollDesc // in pollcache, protected by pollcache.lock
    // 文件描述符
    fd   uintptr   // constant for pollDesc usage lifetime

    // atomicInfo holds bits from closing, rd, and wd,
    // which are only ever written while holding the lock,
    // summarized for use by netpollcheckerr,
    // which cannot acquire the lock.
    // After writing these fields under lock in a way that
    // might change the summary, code must call publishInfo
    // before releasing the lock.
    // Code that changes fields and then calls netpollunblock
    // (while still holding the lock) must call publishInfo
    // before calling netpollunblock, because publishInfo is what
    // stops netpollblock from blocking anew
    // (by changing the result of netpollcheckerr).
    // atomicInfo also holds the eventErr bit,
    // recording whether a poll event on the fd got an error;
    // atomicInfo is the only source of truth for that bit.
    atomicInfo atomic.Uint32 // atomic pollInfo

    // rg, wg are accessed atomically and hold g pointers.
    // (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
    // rg,wg为二进制的信号量
    rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
    wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil
    
    // 互斥锁
    lock    mutex // protects the following fields
    closing bool
    user    uint32    // user settable cookie
    // 文件描述被重用
    rseq    uintptr   // protects from stale read timers
    // 等待文件描述符的计时器
    rt      timer     // read deadline timer (set if rt.f != nil)
    // 等待文件描述符可读的截止日期
    rd      int64     // read deadline (a nanotime in the future, -1 when expired)
    
    // 计时器被重置
    wseq    uintptr   // protects from stale write timers
    // 等待文件描述符的计时器
    wt      timer     // write deadline timer
    // 等待文件描述符可写的截止日期
    wd      int64     // write deadline (a nanotime in the future, -1 when expired)
    self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
}

netpollinit

  1. 调用epollcreate1创建epoll文件描述符,该描述符会在整个程序的生命周期中使用
  2. 调用nonblockingPipe 创建一个用于通信的管道
  3. 注册事件
  4. 使用 epollctl 将用于读取数据的文件描述符打包成 epollevent 事件加入监听
func netpollinit() {
    // 创建epoll文件描述符,该描述符会在整个程序的生命周期中使用
    epfd = epollcreate1(_EPOLL_CLOEXEC)
    if epfd < 0 {
        // 拿到epollId
        epfd = epollcreate(1024)
        if epfd < 0 {
            println("runtime: epollcreate failed with", -epfd)
            throw("runtime: netpollinit failed")
        }
        closeonexec(epfd)
    }
    // 创建Linux中的管道,目的是关闭epoll
    r, w, errno := nonblockingPipe()
    if errno != 0 {
        println("runtime: pipe failed with", -errno)
        throw("runtime: pipe failed")
    }
    // 注册事件
    ev := epollevent{
        events: _EPOLLIN,
    }
    *(**uintptr)(unsafe.Pointer(&ev.data)) = &netpollBreakRd
    // 使用 epollctl 将用于读取数据的文件描述符打包成 epollevent 事件加入监听
    errno = epollctl(epfd, _EPOLL_CTL_ADD, r, &ev)
    if errno != 0 {
        println("runtime: epollctl failed with", -errno)
        throw("runtime: epollctl failed")
    }
    // 管道的读写
    netpollBreakRd = uintptr(r)
    netpollBreakWr = uintptr(w)
}

netpollBreak

初始化的管道为我们提供了中断多路复用等待文件描述符中事件的方法,`netpollBreak会向管道中写入数据唤醒 epoll

因为目前的计时器由网络轮询器管理和触发,它能够让网络轮询器立刻返回并让运行时检查是否有需要触发的计时器。

func netpollBreak() {
    if atomic.Cas(&netpollWakeSig, 0, 1) {
        for {
            var b byte
             // 向管道写入数据
            n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
            if n == 1 {
                break
            }
            if n == -_EINTR {
                continue
            }
            if n == -_EAGAIN {
                return
            }
            println("runtime: netpollBreak write failed with", -n)
            throw("runtime: netpollBreak write failed")
        }
    }
}

netpollopen

// 将socket可读,可写,断开事件注册到Epoll中
func netpollopen(fd uintptr, pd *pollDesc) int32 {
    var ev epollevent
    ev.events = _EPOLLIN | _EPOLLOUT | _EPOLLRDHUP | _EPOLLET
    *(**pollDesc)(unsafe.Pointer(&ev.data)) = pd
    // 调用 epollctl 向全局的轮询文件描述符 epfd 中加入新的轮询事件监听文件描述符的可读和可写状态:
    return -epollctl(epfd, _EPOLL_CTL_ADD, int32(fd), &ev)
}

netpollclose

func netpollclose(fd uintptr) int32 {
    var ev epollevent
    // 从全局的epfd中删除监听的文件描述符
    return -epollctl(epfd, _EPOLL_CTL_DEL, int32(fd), &ev)
}

netpoll

netpoll轮询网络查询发生了什么事件。

  1. 根据传入的 delay 计算 epoll 系统调用需要等待的时间;
  2. 调用 epollwait 等待可读或者可写事件的发生;
  3. 在循环中依次处理 epollevent 事件;
// netpoll checks for ready network connections.
// Returns list of goroutines that become runnable.
// delay < 0: blocks indefinitely
// delay == 0: does not block, just polls
// delay > 0: block for up to that many nanoseconds
// 查询发生了什么事件
// 返回协程链表
func netpoll(delay int64) gList {
    if epfd == -1 {
        return gList{}
    }
    // delay的单位是纳秒所以需要将纳秒转换成毫秒
    var waitms int32
    if delay < 0 {
        waitms = -1
    } else if delay == 0 {
        waitms = 0
    } else if delay < 1e6 {
        waitms = 1
    } else if delay < 1e15 {
        waitms = int32(delay / 1e6)
    } else {
        // An arbitrary cap on how long to wait for a timer.
        // 1e9 ms == ~11.5 days.
        waitms = 1e9
    }
    
    var events [128]epollevent
retry:
    // 调用epollwait等待文件描述符转换成可读或者可写
    n := epollwait(epfd, &events[0], int32(len(events)), waitms)
    if n < 0 {
        if n != -_EINTR {
            println("runtime: epollwait on fd", epfd, "failed with", -n)
            throw("runtime: netpoll failed")
        }
        // If a timed sleep was interrupted, just return to
        // recalculate how long we should sleep now.
        if waitms > 0 {
            return gList{}
        }
        goto retry
    }
    // 协程链表
    var toRun gList
    // n > 0 ==> 出现了待处理的事件
    // 在循环中依次处理事件
    for i := int32(0); i < n; i++ {
        ev := &events[i]
        if ev.events == 0 {
            continue
        }

        if *(**uintptr)(unsafe.Pointer(&ev.data)) == &netpollBreakRd {
            if ev.events != _EPOLLIN {
                println("runtime: netpoll: break fd ready for", ev.events)
                throw("runtime: netpoll: break fd ready for something unexpected")
            }
            if delay != 0 {
                // netpollBreak could be picked up by a
                // nonblocking poll. Only read the byte
                // if blocking.
                var tmp [16]byte
                read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
                atomic.Store(&netpollWakeSig, 0)
            }
            continue
        }

        var mode int32
         // 可读
        if ev.events&(_EPOLLIN|_EPOLLRDHUP|_EPOLLHUP|_EPOLLERR) != 0 {
            mode += 'r'
        }
         // 可写 
        if ev.events&(_EPOLLOUT|_EPOLLHUP|_EPOLLERR) != 0 {
            mode += 'w'
        }
        if mode != 0 {
            pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
            pd.setEventErr(ev.events == _EPOLLERR)
            netpollready(&toRun, pd, mode)
        }
    }
    return toRun
}

func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
    var _p0 unsafe.Pointer
    if len(events) > 0 {
        _p0 = unsafe.Pointer(&events[0])
    } else {
        _p0 = unsafe.Pointer(&_zero)
    }
    r0, _, e1 := Syscall6(SYS_EPOLL_WAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), uintptr(msec), 0, 0)
    n = int(r0)
    if e1 != 0 {
        err = errnoErr(e1)
    }
    return
}

// netpollready is called by the platform-specific netpoll function.
// It declares that the fd associated with pd is ready for I/O.
// The toRun argument is used to build a list of goroutines to return
// from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
// whether the fd is ready for reading or writing or both.
//
// This may run while the world is stopped, so write barriers are not allowed.
//go:nowritebarrier
func netpollready(toRun *gList, pd *pollDesc, mode int32) {
    var rg, wg *g
    if mode == 'r' || mode == 'r'+'w' {
        rg = netpollunblock(pd, 'r', true)
    }
    if mode == 'w' || mode == 'r'+'w' {
        wg = netpollunblock(pd, 'w', true)
    }
    if rg != nil {
        toRun.push(rg)
    }
    if wg != nil {
        toRun.push(wg)
    }
}
func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
    gpp := &pd.rg
    if mode == 'w' {
        gpp = &pd.wg
    }
    // 将pollDesc中的读或者写信号量转换成pdReady 并返回其中存储的Goroutine;
    for {
        old := gpp.Load()
        if old == pdReady {
            return nil
        }
        if old == 0 && !ioready {
            // Only set pdReady for ioready. runtime_pollWait
            // will check for timeout/cancel before waiting.
            return nil
        }
        var new uintptr
        if ioready {
            new = pdReady
        }
        if gpp.CompareAndSwap(old, new) {
            if old == pdWait {
                old = 0
            }
            return (*g)(unsafe.Pointer(old))
        }
    }
}
版权声明:程序员胖胖胖虎阿 发表于 2022年11月2日 上午12:32。
转载请注明:Go中的网络轮询器(1)--Epoll在Go中的抽象 | 胖虎的工具箱-编程导航

相关文章

暂无评论

暂无评论...