Compile the following code
package main
func main() {
println("Hello Go")
}
Find the Entry Point of the Go Executable#
Use readelf
to view the entry address of the executable binary, and use the nm tool from the Go toolchain to find the function name corresponding to the address in the symbol table.
$ readelf -h ./hello
ELF Header:
...
Entry point address: 0x454020
...
$ go tool nm ./hello | grep 454020
454020 T _rt0_amd64_linux
Startup Phase#
In the Go source code, you can find the file where _rt0_amd64_linux
is located. The work done at the start of the program is written in the assembly code for the corresponding platform, mainly in two files: runtime/rt0_linux_amd64.s
and runtime/asm_amd64.s
.
TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8
JMP _rt0_amd64(SB)
TEXT _rt0_amd64(SB),NOSPLIT,$-8
MOVQ 0(SP), DI // argc
JMP runtime·rt0_go(SB)
TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
// ...
// Set up the stack space for g0
MOVQ $runtime·g0(SB), DI
LEAQ (-64*1024)(SP), BX
MOVQ BX, g_stackguard0(DI)
MOVQ BX, g_stackguard1(DI)
MOVQ BX, (g_stack+stack_lo)(DI)
MOVQ SP, (g_stack+stack_hi)(DI)
// Use arch_prctl(ARCH_SET_FS) to set the TLS base address for m0 thread to m0.tls
LEAQ runtime·m0+m_tls(SB), DI
CALL runtime·settls(SB)
// Perform basic correctness and safety checks
// Including: size of basic types, size of platform pointer types, CAS correctness checks
CALL runtime·check(SB)
MOVL 24(SP), AX // copy argc
MOVL AX, 0(SP)
MOVQ 32(SP), AX // copy argv
MOVQ AX, 8(SP)
CALL runtime·args(SB)
CALL runtime·osinit(SB)
CALL runtime·schedinit(SB)
// Pass the address of the runtime.main function to newproc
// This adds runtime.main to the run queue of a p
MOVQ $runtime·mainPC(SB), AX // entry
PUSHQ AX
CALL runtime·newproc(SB)
POPQ AX
// The main thread executes the schedule dispatch loop
// runtime.main will be scheduled for execution
// runtime.main will internally call main.main
CALL runtime·mstart(SB)
// mstart should not return, so an error is raised here
CALL runtime·abort(SB)
RET
TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
CALL runtime·mstart0(SB)
RET // not reached
runtime.args#
// runtime/runtime1.go
func args(c int32, v **byte) {
argc = c
argv = v
sysargs(c, v)
}
This function mainly sets the two global variables argc and argv. In sysargs
, it retrieves/sets through reading argv:
startupRandomData
, a 16-byte random data buffer set by the kernel (ld-linux.so)physPageSize
, the system's physical page size- Reads the ELF file header to obtain string tables, symbol tables, dynamic linking, and vdso information
- On Linux, it reads the symbol table to set two special pointers for vdso calls
var vdsoSymbolKeys = []vdsoSymbolKey{
{"__vdso_gettimeofday", 0x315ca59, 0xb01bca00, &vdsoGettimeofdaySym},
{"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &vdsoClockgettimeSym},
}
runtime.osinit#
// runtime/os_linux.go
func osinit() {
ncpu = getproccount()
physHugePageSize = getHugePageSize()
if iscgo {
// ... handle cgo signal related
}
osArchInit()
}
The osinit function on Linux is relatively simple:
- Retrieves processor information via
sched_getaffinity
- Obtains the physical size of transparent huge pages via
/sys/kernel/mm/transparent_hugepage/hpage_pmd_size
runtime.schedinit#
Initialization work for the Go coroutine scheduler, here only the key parts of the code are taken, and some temporarily unimplemented empty function calls are ignored.
// runtime/proc.go
func schedinit() {
gp := getg()
sched.maxmcount = 10000
moduledataverify() // module data verification
stackinit() // coroutine stack memory pool initialization
mallocinit() // memory allocator initialization
alginit() // AES algorithm hardware support initialization
fastrandinit() // initialize random seed, using the previous startupRandomData
mcommoninit(gp.m, -1) // initialize thread increment ID signal handling coroutine and fast random seed
modulesinit() // read each module to initialize GC scanning global variable sizes
typelinksinit() // read each module to collect type link information
itabsinit() // initialize itab table based on typelink
stkobjinit() // stack initialization for GC related
sigsave(&gp.m.sigmask) // save thread signal mask
goargs() // set global variable argslice
goenvs() // set global variable envs
parsedebugvars() // read GODEBUG environment variable settings trace
gcinit() // GC initialization
// Initialize allp based on the number of physical processors or GOMAXPROCS
procs := ncpu
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
}
if procresize(procs) != nil {
throw("unknown runnable goroutine during bootstrap")
}
}
runtime.getg#
The runtime.getg function will be compiled into a single assembly instruction during compilation, which retrieves the TLS base address. This base address is set to the current coroutine data pointer, i.e., *g, when the coroutine starts.
// ir.OGETG -> ssa.OpGetG -> ssa.OpAMD64LoweredGetG -> MOVQ (TLS), r
TEXT runtime·gogo(SB), NOSPLIT, $0-8
// ...
JMP gogo<>(SB)
TEXT gogo<>(SB), NOSPLIT, $0
get_tls(CX)
MOVQ DX, g(CX)
runtime.procresize#
// runtime/proc.go
func procresize(nprocs int32) *p {
// The first half mainly modifies:
// 1. allp []*p
// 2. idlepMask, idle P mask where each bit indicates whether the corresponding index in allp is idle
// 3. timerpMask, mask for P that may have timers, where each bit indicates whether the corresponding index in allp has a timer
// When expanding the number of procs, initialize the newly created P
// At the start of the program, allp is empty, here it will create and initialize all P
for i := old; i < nprocs; i++ {
pp := allp[i]
if pp == nil {
pp = new(p)
}
pp.init(i)
atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
}
// When subsequent P shrink, excess P will be destroyed
// So here we first ensure that the current P is not the one to be destroyed
// If it is, replace it with allp[0]
gp := getg()
if gp.m.p != 0 && gp.m.p.ptr().id < nprocs {
// Continue using the current P
gp.m.p.ptr().status = _Prunning
gp.m.p.ptr().mcache.prepareForSweep()
} else {
if gp.m.p != 0 {
gp.m.p.ptr().m = 0
}
gp.m.p = 0
pp := allp[0]
pp.m = 0
pp.status = _Pidle
acquirep(pp)
}
// When the number of procs shrinks, clean up old P
for i := nprocs; i < old; i++ {
pp := allp[i]
pp.destroy()
}
// Return the runnable P linked list
var runnablePs *p
for i := nprocs - 1; i >= 0; i-- {
pp := allp[i]
if gp.m.p.ptr() == pp {
continue
}
pp.status = _Pidle
if runqempty(pp) {
// If there are no runnable g on the local queue of p, place it on the idle p mask
pidleput(pp, now)
} else {
// Find an idle m for p, here m may be empty
pp.m.set(mget())
pp.link.set(runnablePs)
runnablePs = pp
}
}
if old != nprocs {
// If the number of procs changes, modify the proc capacity occupied by GC (default takes 25% of the proc count)
gcCPULimiter.resetCapacity(now, nprocs)
}
return runnablePs
}
runtime.newproc#
newproc takes a function (address) to create a new g and place it on the local queue of the current p, then wakes up the current p.
During the program startup, the address of runtime.main is passed here.
func newproc(fn *funcval) {
gp := getg()
pc := getcallerpc()
systemstack(func() {
newg := newproc1(fn, gp, pc)
pp := getg().m.p.ptr()
runqput(pp, newg, true)
if mainStarted {
// mainStarted is set in runtime.main
wakep()
}
})
}
func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g {
mp := acquirem()
pp := mp.p.ptr()
newg := gfget(pp) // Here it first tries to take one from the freeg linked list of p
if newg == nil {
// If not found, create a new g using malg
// The new g's stack has not been initialized, to avoid being scanned by GC, set the state to dead first
// Add it to the global allg
newg = malg(_StackMin)
casgstatus(newg, _Gidle, _Gdead)
allgadd(newg)
}
// Calculate the stack pointer position
totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame
totalSize = alignUp(totalSize, sys.StackAlign)
sp := newg.stack.hi - totalSize
// Set up newg's scheduling data, stack pointer, function address, program counter, caller information, etc.
memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
newg.sched.sp = sp
newg.stktopsp = sp
newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in the same function
newg.sched.g = guintptr(unsafe.Pointer(newg))
gostartcallfn(&newg.sched, fn)
newg.parentGoid = callergp.goid
newg.gopc = callerpc
newg.ancestors = saveAncestors(callergp)
newg.startpc = fn.fn
// Change the state to runnable and add the stack to the GC stack scan
casgstatus(newg, _Gdead, _Grunnable)
gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo))
newg.goid = pp.goidcache
pp.goidcache++
releasem(mp)
return newg
}
runtime.mstart0#
func mstart0() {
gp := getg()
// Initialize g0's stackguard for stack overflow and stack expansion checks
gp.stackguard0 = gp.stack.lo + _StackGuard
gp.stackguard1 = gp.stackguard0
mstart1()
mexit(osStack)
}
func mstart1() {
gp := getg()
// Set up m.g0.sched as a label returning to just
// after the mstart1 call in mstart0 above, for use by goexit0 and mcall.
// We're never coming back to mstart1 after we call schedule,
// so other calls can reuse the current frame.
// And goexit0 does a gogo that needs to return from mstart1
// and let mstart0 exit the thread.
gp.sched.g = guintptr(unsafe.Pointer(gp))
gp.sched.pc = getcallerpc()
gp.sched.sp = getcallersp()
// Initialize the thread's signal handling coroutine stack and signal mask
minit()
if gp.m == &m0 {
// Set the thread's signal handling function sighandler
mstartm0()
}
// Some internal threads like sysmon start directly here
if fn := gp.m.mstartfn; fn != nil {
fn()
}
if gp.m != &m0 {
acquirep(gp.m.nextp.ptr())
gp.m.nextp = 0
}
// Execute the scheduling loop, never returns
// Here there is currently only one p and one g
// All will jump to runtime.main
schedule()
}
func main() {
mp := getg().m
// Set the maximum stack to 1G
if goarch.PtrSize == 8 {
maxstacksize = 1000000000
} else {
maxstacksize = 250000000
}
// Coroutines created by newproc can be found or created by wakep
mainStarted = true
// Execute the init function under runtime and initialize global variables
doInit(&runtime_inittask)
// Enable GC
gcenable()
// Execute user-level init functions and initialize global variables
doInit(&main_inittask)
// -buildmode=c-archive or c-shared does not execute main
if isarchive || islibrary {
return
}
// Execute the user-level main.main function
fn := main_main
fn()
// Before the main coroutine exits, if there are currently other coroutines handling panic-defer
// It needs to wait for other coroutines to finish first (such as printing panic information, etc.)
if runningPanicDefers.Load() != 0 {
for c := 0; c < 1000; c++ {
if runningPanicDefers.Load() == 0 {
break
}
Gosched()
}
}
// Execute registered hooks before the process exits
// For example, output code coverage data in compile -cover mode
runExitHooks(0)
exit(0)
}