在Linux平台上,大家都很熟悉如下经典的Hello world程序 - hello.c。
[root:~/work/v1/hello]# cat hello.c
//File Name: hello.c
//The hello world program
#include<stdio.h>
intmain()
{
printf("hello, world\n");
return0;
}
[root:~/work/v1/hello]#问题:
假设hello.c源程序经过GCC编译后的可执行程序为hello,请问: hello可执行程序就只是由hello.c编译而成的吗?
hello.c编译和运行的过程如下:
[root:~/work/v1/hello]# gcc hello.c -o hello
[root:~/work/v1/hello]#
[root:~/work/v1/hello]# ./hello
hello, world
[root:~/work/v1/hello]#
一切在预料之中,顺利的编译和运行。问题是,虽然上面编译的时候确实只输入了hello.c文件,但hello可执行程序真的就只是由hello.c一个文件编译成hello可执行程序吗?
GCC提供了”-v”选项可以输出详细的编译过程。我们可以通过如下命令查看hello.c详细的编译过程:
[root:~/work/v1/hello]# gcc -v hello.c -o hello
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:hsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 9.4.0-1ubuntu1~20.04.2' --with-bugurl=file:///usr/share/doc/gcc-9/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,gm2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-9 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-9-9QDOt0/gcc-9-9.4.0/debian/tmp-nvptx/usr,hsa --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.2)
COLLECT_GCC_OPTIONS='-v' '-o' 'hello' '-mtune=generic' '-march=x86-64'
/usr/lib/gcc/x86_64-linux-gnu/9/cc1 -quiet -v -imultiarch x86_64-linux-gnu hello.c -quiet -dumpbase hello.c -mtune=generic -march=x86-64 -auxbase hello -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccdeaBwe.s
GNU C17 (Ubuntu 9.4.0-1ubuntu1~20.04.2) version 9.4.0 (x86_64-linux-gnu)
compiled by GNU C version 9.4.0, GMP version 6.2.0, MPFR version 4.0.2, MPC version 1.1.0, isl version isl-0.22.1-GMP
warning: MPFR header version 4.0.2 differs from library version 4.2.1.
warning: MPC header version 1.1.0 differs from library version 1.3.1.
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/9/include-fixed"
ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include"
#include "..." search starts here:
#include <...> search starts here:
/usr/lib/gcc/x86_64-linux-gnu/9/include
/usr/local/include
/usr/include/x86_64-linux-gnu
/usr/include
End of search list.
GNU C17 (Ubuntu 9.4.0-1ubuntu1~20.04.2) version 9.4.0 (x86_64-linux-gnu)
compiled by GNU C version 9.4.0, GMP version 6.2.0, MPFR version 4.0.2, MPC version 1.1.0, isl version isl-0.22.1-GMP
warning: MPFR header version 4.0.2 differs from library version 4.2.1.
warning: MPC header version 1.1.0 differs from library version 1.3.1.
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: 01da938ff5dc2163489aa33cb3b747a7
COLLECT_GCC_OPTIONS='-v' '-o' 'hello' '-mtune=generic' '-march=x86-64'
as -v --64 -o /tmp/ccXS77ga.o /tmp/ccdeaBwe.s
GNU assembler version 2.34 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.34
COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/
LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/9/:/usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/9/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/9/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-v' '-o' 'hello' '-mtune=generic' '-march=x86-64'
/usr/lib/gcc/x86_64-linux-gnu/9/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/9/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper -plugin-opt=-fresolution=/tmp/ccEXwvue.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o hello /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/9/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/9/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/9/../../.. /tmp/ccXS77ga.o -lgcc --push-state --as-needed -lgcc_s --pop-state -lc -lgcc --push-state --as-needed -lgcc_s --pop-state /usr/lib/gcc/x86_64-linux-gnu/9/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crtn.o
COLLECT_GCC_OPTIONS='-v' '-o' 'hello' '-mtune=generic' '-march=x86-64'
[root:~/work/v1/hello]#
在编译的最后阶段,我们看到如下的过程:
/usr/lib/gcc/x86_64-linux-gnu/9/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/9/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper -plugin-opt=-fresolution=/tmp/ccEXwvue.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o hello /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/9/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/9/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/9/../../.. /tmp/ccXS77ga.o -lgcc --push-state --as-needed -lgcc_s --pop-state -lc -lgcc --push-state --as-needed -lgcc_s --pop-state /usr/lib/gcc/x86_64-linux-gnu/9/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crtn.o
可以看到可执行程序hello,并不是由hello.o (编译过程使用临时文件 - /tmp/ccXS77ga.o)一个文件生成的,在链接的时候GCC编译器还给hello程序链接了另外5个.o文件(当然还链接了一些库文件,如-lgcc, -lc等):
• Scrt1.o: /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o
• crti.o: /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crti.o
• crtbeginS.o: /usr/lib/gcc/x86_64-linux-gnu/9/crtbeginS.o
• crtendS.o: usr/lib/gcc/x86_64-linux-gnu/9/crtendS.o
• crtn.o: /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crtn.o
由于在linux上,路径中”..”表示上一级目录,把”..”解析之后,上面这些.o文件的路径为:
• Scrt1.o: /usr/lib/x86_64-linux-gnu/Scrt1.o
• crti.o: /usr/lib/x86_64-linux-gnu/crti.o
• crtbeginS.o: /usr/lib/gcc/x86_64-linux-gnu/9/crtbeginS.o
• crtendS.o: usr/lib/gcc/x86_64-linux-gnu/9/crtendS.o
• crtn.o: /usr/lib/x86_64-linux-gnu/crtn.o
看到这里,你肯定会产生很多疑问,如:
• 为什么要链接这么多.o文件?
• 这些.o文件是做什么?
• 不链接这些.o文件是否可以?
• 这些.o文件从哪里来?它们的源码在哪里?
• 似乎这些.o文件都是以crt开头的,crt是什么意思?
• ……
CRT(C Runtime)是指C运行时库,它为C和C++程序提供了一组初始化和终止程序的基本构建模块。这些构建模块确保在main()函数执行之前和之后进行适当的初始化和清理工作。
为了更好地理解这些文件是如何工作的,可以考虑它们为程序的生命周期提供了一个基本框架:从程序的开始,到main函数的执行,再到程序的结束,每个阶段都有相应的初始化和清理工作需要完成,这些CRT文件就是为此目的而存在的。
以下是这些运行时组件在链接过程中的一般顺序:
ld Scrt1.o crti.o crtbeginS.o [user_objects] [system_libraries] crtendS.o crtn.o
例如: 对于前面的hello程序,可以使用如下的命令来进行链接。
ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o hello /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crt1.o/usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crti.o/usr/lib/gcc/x86_64-linux-gnu/9/crtbegin.o hello.o -lc /usr/lib/gcc/x86_64-linux-gnu/9/crtend.o/usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crtn.o
例如: 如下是在Ubuntu 20.04.6上的运行情况:
• 首先编译出hell.o目标文件:
[root:~/work/v1/hello]# gcc -c hello.c
[root:~/work/v1/hello]#
• 通过ld命令进行链接:
[root:~/work/v1/hello]# ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o hello /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crt1.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/9/crtbegin.o hello.o -lc /usr/lib/gcc/x86_64-linux-gnu/9/crtend.o /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/crtn.o
[root:~/work/v1/hello]#
•运行:
[root:~/work/v1/hello]# ./hello
hello, world
[root:~/work/v1/hello]#
我们可以对生成的hello可执行文件进行反汇编,看看hello里面有哪些内容:
[root:~/work/v1/hello]# objdump -d hello
hello: file format elf64-x86-64
Disassembly of section .init:
0000000000401000 <_init>:
401000: f3 0f 1e fa endbr64
401004: 48 83 ec 08 sub $0x8,%rsp
401008: 48 8b 05 e9 2f 00 00 mov 0x2fe9(%rip),%rax # 403ff8 <__gmon_start__>
40100f: 48 85 c0 test %rax,%rax
401012: 74 02 je 401016 <_init+0x16>
401014: ff d0 callq *%rax
401016: 48 83 c4 08 add $0x8,%rsp
40101a: c3 retq
Disassembly of section .plt:
0000000000401020 <.plt>:
401020: ff 35 e2 2f 00 00 pushq 0x2fe2(%rip) # 404008 <_GLOBAL_OFFSET_TABLE_+0x8>
401026: f2 ff 25 e3 2f 00 00 bnd jmpq *0x2fe3(%rip) # 404010 <_GLOBAL_OFFSET_TABLE_+0x10>
40102d: 0f 1f 00 nopl (%rax)
401030: f3 0f 1e fa endbr64
401034: 68 00 00 00 00 pushq $0x0
401039: f2 e9 e1 ff ff ff bnd jmpq 401020 <.plt>
40103f: 90 nop
Disassembly of section .plt.sec:
0000000000401040 <puts@plt>:
401040: f3 0f 1e fa endbr64
401044: f2 ff 25 cd 2f 00 00 bnd jmpq *0x2fcd(%rip) # 404018 <puts@GLIBC_2.2.5>
40104b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
Disassembly of section .text:
0000000000401050 <_start>:
401050: f3 0f 1e fa endbr64
401054: 31 ed xor %ebp,%ebp
401056: 49 89 d1 mov %rdx,%r9
401059: 5e pop %rsi
40105a: 48 89 e2 mov %rsp,%rdx
40105d: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
401061: 50 push %rax
401062: 54 push %rsp
401063: 49 c7 c0 d0 11 40 00 mov $0x4011d0,%r8
40106a: 48 c7 c1 60 11 40 00 mov $0x401160,%rcx
401071: 48 c7 c7 36 11 40 00 mov $0x401136,%rdi
401078: ff 15 72 2f 00 00 callq *0x2f72(%rip) # 403ff0 <__libc_start_main@GLIBC_2.2.5>
40107e: f4 hlt
40107f: 90 nop
……<由于篇幅原因,此处省略了一些汇编代码>
0000000000401136 <main>:
401136: f3 0f 1e fa endbr64
40113a: 55 push %rbp
40113b: 48 89 e5 mov %rsp,%rbp
40113e: 48 8d 3d bf 0e 00 00 lea 0xebf(%rip),%rdi # 402004 <_IO_stdin_used+0x4>
401145: e8 f6 fe ff ff callq 401040 <puts@plt>
40114a: b8 00 00 00 00 mov $0x0,%eax
40114f: 5d pop %rbp
401150: c3 retq
401151: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
401158: 00 00 00
40115b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
0000000000401160 <__libc_csu_init>:
401160: f3 0f 1e fa endbr64
401164: 41 57 push %r15
401166: 4c 8d 3d 93 2c 00 00 lea 0x2c93(%rip),%r15 # 403e00 <__frame_dummy_init_array_entry>
40116d: 41 56 push %r14
40116f: 49 89 d6 mov %rdx,%r14
401172: 41 55 push %r13
401174: 49 89 f5 mov %rsi,%r13
401177: 41 54 push %r12
401179: 41 89 fc mov %edi,%r12d
40117c: 55 push %rbp
40117d: 48 8d 2d 84 2c 00 00 lea 0x2c84(%rip),%rbp # 403e08
……<由于篇幅原因,此处省略了一些汇编代码>
Disassembly of section .fini:
00000000004011d8 <_fini>:
4011d8: f3 0f 1e fa endbr64
4011dc: 48 83 ec 08 sub $0x8,%rsp
4011e0: 48 83 c4 08 add $0x8,%rsp
4011e4: c3 retq
[root:~/work/v1/hello]#
通过反汇编可以看到,hello可执行文件中除了hello.c源码中的main函数以外,还有很多的其他函数,而这些其他的函数正是来自于上面的C运行时库的.o文件。例如,我们可以反汇编Scrt1.o来进行检查:
[root:~/work/v1/hello]# objdump -d /usr/lib/x86_64-linux-gnu/Scrt1.o
/usr/lib/x86_64-linux-gnu/Scrt1.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_start>:
0: f3 0f 1e fa endbr64
4: 31 ed xor %ebp,%ebp
6: 49 89 d1 mov %rdx,%r9
9: 5e pop %rsi
a: 48 89 e2 mov %rsp,%rdx
d: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
11: 50 push %rax
12: 54 push %rsp
13: 4c 8b 05 00 00 00 00 mov 0x0(%rip),%r8 # 1a <_start+0x1a>
1a: 48 8b 0d 00 00 00 00 mov 0x0(%rip),%rcx # 21 <_start+0x21>
21: 48 8b 3d 00 00 00 00 mov 0x0(%rip),%rdi # 28 <_start+0x28>
28: ff 15 00 00 00 00 callq *0x0(%rip) # 2e <_start+0x2e>
2e: f4 hlt
[root:~/work/v1/hello]#
可以看到Scrt1.o中的_start符号,hello可执行文件中的_start符号正是来自于Scrt1.o。
正如前面所说,这些.o文件意在为可执行程序提供程序启动前后的初始化和清理框架制,从而简化程序的开发工作,使程序员能够集中在程序本身的开发上。