2007年4月23日月曜日

printf("Hello, World.")を追いかける(1)

先週のHelloWorldプログラムを中間ファイル付きでコンパイルする.

$ gcc -save-temps ./hello.c
$ ls
a.out hello.c hello.i hello.o hello.s
アセンブラのソースファイルは以下の通り

.cstring
LC0:
.ascii "Hello, world.\0"
.text
.globl _main
_main:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $20, %esp
call ___i686.get_pc_thunk.bx
"L00000000001$pb":
leal LC0-"L00000000001$pb"(%ebx), %eax
movl %eax, (%esp)
call L_printf$stub # i)
movl $0, %eax
addl $20, %esp
popl %ebx
popl %ebp
ret
.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
L_printf$stub:
.indirect_symbol _printf
hlt ; hlt ; hlt ; hlt ; hlt
.subsections_via_symbols
.section __TEXT,__textcoal_nt,coalesced,pure_instructions
.weak_definition ___i686.get_pc_thunk.bx
.private_extern ___i686.get_pc_thunk.bx
___i686.get_pc_thunk.bx:
movl (%esp), %ebx
ret

どうやら,LC0でラベルされた領域にHello,Worldの文字列本体は入っている事が分かった.
i)で,L_printf$stubをcallし,L_printf$stubの中の.indirect_symbol _printfでライブラリのprintf関数を呼んでいることが分かる.
引数は一つ?だと思われる.

次に,printf()の実体を探してみる.ひとまずotool -lでMach-Oバイナリを調べる.

$ otool -l ./a.out
./a.out:
Load command 0
cmd LC_SEGMENT
cmdsize 56
segname __PAGEZERO
vmaddr 0x00000000
vmsize 0x00001000
fileoff 0
filesize 0
maxprot 0x00000000
initprot 0x00000000
nsects 0
flags 0x4
Load command 1
cmd LC_SEGMENT
cmdsize 260
segname __TEXT
vmaddr 0x00001000
vmsize 0x00001000
fileoff 0
filesize 4096
maxprot 0x00000007
initprot 0x00000005
nsects 3
flags 0x0
Section
sectname __text
segname __TEXT
addr 0x00001e5c
size 0x00000143
offset 3676
align 2^2 (4)
reloff 0
nreloc 0
flags 0x80000400
reserved1 0
reserved2 0
Section
sectname __cstring
segname __TEXT
addr 0x00001fa0
size 0x0000005c
offset 4000
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000002
reserved1 0
reserved2 0
Section
sectname __textcoal_nt
segname __TEXT
addr 0x00001ffc
size 0x00000004
offset 4092
align 2^0 (1)
reloff 0
nreloc 0
flags 0x8000040b
reserved1 0
reserved2 0
Load command 2
cmd LC_SEGMENT
cmdsize 260
segname __DATA
vmaddr 0x00002000
vmsize 0x00001000
fileoff 4096
filesize 4096
maxprot 0x00000003
initprot 0x00000003
nsects 3
flags 0x0
Section
sectname __data
segname __DATA
addr 0x00002000
size 0x00000010
offset 4096
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000000
reserved1 0
reserved2 0
Section
sectname __dyld
segname __DATA
addr 0x00002010
size 0x00000008
offset 4112
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000000
reserved1 0
reserved2 0
Section
sectname __common
segname __DATA
addr 0x00002020
size 0x00000034
offset 0
align 2^4 (16)
reloff 0
nreloc 0
flags 0x00000001
reserved1 0
reserved2 0
Load command 3
cmd LC_SEGMENT
cmdsize 192
segname __IMPORT
vmaddr 0x00003000
vmsize 0x00001000
fileoff 8192
filesize 4096
maxprot 0x00000007
initprot 0x00000007
nsects 2
flags 0x0
Section
sectname __pointers
segname __IMPORT
addr 0x00003000
size 0x0000000c
offset 8192
align 2^0 (1)
reloff 0
nreloc 0
flags 0x00000006
reserved1 0 (index into indirect symbol table)
reserved2 0
Section
sectname __jump_table
segname __IMPORT
addr 0x0000300c
size 0x00000014
offset 8204
align 2^0 (1)
reloff 0
nreloc 0
flags 0x84000408
reserved1 3 (index into indirect symbol table)
reserved2 5 (size of stubs)
Load command 4
cmd LC_SEGMENT
cmdsize 56
segname __LINKEDIT
vmaddr 0x00004000
vmsize 0x00001000
fileoff 12288
filesize 1056
maxprot 0x00000003
initprot 0x00000001
nsects 0
flags 0x4
Load command 5
cmd LC_LOAD_DYLINKER
cmdsize 28
name /usr/lib/dyld (offset 12)
Load command 6
cmd LC_LOAD_DYLIB
cmdsize 52
name /usr/lib/libSystem.B.dylib (offset 24)
time stamp 1174039075 Fri Mar 16 18:57:55 2007
current version 88.3.6
compatibility version 1.0.0
Load command 7
cmd LC_SYMTAB
cmdsize 24
symoff 12288
nsyms 31
stroff 12716
strsize 628
Load command 8
cmd LC_DYSYMTAB
cmdsize 80
ilocalsym 0
nlocalsym 4
iextdefsym 4
nextdefsym 20
iundefsym 24
nundefsym 7
tocoff 0
ntoc 0
modtaboff 0
nmodtab 0
extrefsymoff 0
nextrefsyms 0
indirectsymoff 12688
nindirectsyms 7
extreloff 0
nextrel 0
locreloff 0
nlocrel 0
Load command 9
cmd LC_TWOLEVEL_HINTS
cmdsize 16
offset 12660
nhints 7
Load command 10
cmd LC_UNIXTHREAD
cmdsize 80
flavor i386_THREAD_STATE
count i386_THREAD_STATE_COUNT
eax 0x00000000 ebx 0x00000000 ecx 0x00000000 edx 0x00000000
edi 0x00000000 esi 0x00000000 ebp 0x00000000 esp 0x00000000
ss 0x0000001f eflags 0x00000000 eip 0x00001e5c cs 0x00000017
ds 0x0000001f es 0x0000001f fs 0x00000000 gs 0x00000000

Load command 6にて,/usr/lib/libSystem.B.dylibというDynamic Libraryが呼ばれているらしいことが分かる.恐らくこの中にprintf()があるのではないかと推測する.
さらに/usr/lib/libSystem.B.dylibに対してotool -lする.

$ otool -l /usr/lib/libSystem.B.dylib
/usr/lib/libSystem.B.dylib:
Load command 0
cmd LC_SEGMENT
cmdsize 804
segname __TEXT
vmaddr 0x90000000
vmsize 0x00173000
fileoff 0
filesize 1519616
maxprot 0x00000007
initprot 0x00000005
nsects 11
flags 0x0
Section
sectname __text
segname __TEXT
addr 0x90000cd0
size 0x0015e5ae
offset 3280
align 2^4 (16)
reloff 0
nreloc 0
flags 0x80000400
reserved1 0
reserved2 0
Section
sectname __picsymbol_stub
segname __TEXT
addr 0x9015f27e
size 0x0000004e
offset 1438334
align 2^0 (1)
reloff 0
nreloc 0
flags 0x80000408
reserved1 0 (index into indirect symbol table)
reserved2 26 (size of stubs)
Section
sectname __cstring
segname __TEXT
addr 0x9015f2cc
size 0x0000c6a4
offset 1438412
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000002
reserved1 0
reserved2 0
Section
sectname __textcoal_nt
segname __TEXT
addr 0x9016b970
size 0x00000008
offset 1489264
align 2^0 (1)
reloff 0
nreloc 0
flags 0x8000040b
reserved1 0
reserved2 0
Section
sectname __const
segname __TEXT
addr 0x9016b980
size 0x00006640
offset 1489280
align 2^5 (32)
reloff 0
nreloc 0
flags 0x00000000
reserved1 0
reserved2 0
Section
sectname __literal8
segname __TEXT
addr 0x90171fc0
size 0x00000938
offset 1515456
align 2^3 (8)
reloff 0
nreloc 0
flags 0x00000004
reserved1 0
reserved2 0
Section
sectname __literal4
segname __TEXT
addr 0x901728f8
size 0x00000100
offset 1517816
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000003
reserved1 0
reserved2 0
Section
sectname __StaticInit
segname __TEXT
addr 0x901729f8
size 0x0000007f
offset 1518072
align 2^1 (2)
reloff 0
nreloc 0
flags 0x80000400
reserved1 0
reserved2 0
Section
sectname __constructor
segname __TEXT
addr 0x90172a77
size 0x00000000
offset 1518199
align 2^0 (1)
reloff 0
nreloc 0
flags 0x00000000
reserved1 0
reserved2 0
Section
sectname __destructor
segname __TEXT
addr 0x90172a78
size 0x00000000
offset 1518200
align 2^1 (2)
reloff 0
nreloc 0
flags 0x00000000
reserved1 0
reserved2 0
Section
sectname __eh_frame
segname __TEXT
addr 0x90172a78
size 0x00000564
offset 1518200
align 2^2 (4)
reloff 0
nreloc 0
flags 0x6800000b
reserved1 0
reserved2 0
Load command 1
cmd LC_SEGMENT
cmdsize 668
segname __DATA
vmaddr 0xa0000000
vmsize 0x00011000
fileoff 1519616
filesize 40960
maxprot 0x00000003
initprot 0x00000003
nsects 9
flags 0x0
Section
sectname __data
segname __DATA
addr 0xa0000000
size 0x00005ba8
offset 1519616
align 2^5 (32)
reloff 0
nreloc 0
flags 0x00000300
reserved1 0
reserved2 0
Section
sectname __dyld
segname __DATA
addr 0xa0005ba8
size 0x00000008
offset 1543080
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000000
reserved1 0
reserved2 0
Section
sectname __la_symbol_ptr
segname __DATA
addr 0xa0005bb0
size 0x0000000c
offset 1543088
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000107
reserved1 3 (index into indirect symbol table)
reserved2 0
Section
sectname __const
segname __DATA
addr 0xa0005bc0
size 0x00001288
offset 1543104
align 2^5 (32)
reloff 0
nreloc 0
flags 0x00000300
reserved1 0
reserved2 0
Section
sectname __nl_symbol_ptr
segname __DATA
addr 0xa0006e48
size 0x00000030
offset 1547848
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000006
reserved1 6 (index into indirect symbol table)
reserved2 0
Section
sectname __mod_init_func
segname __DATA
addr 0xa0006e78
size 0x0000000c
offset 1547896
align 2^2 (4)
reloff 0
nreloc 0
flags 0x00000109
reserved1 0
reserved2 0
Section
sectname __commpage
segname __DATA
addr 0xa0006e90
size 0x00002380
offset 1547920
align 2^4 (16)
reloff 0
nreloc 0
flags 0x10000000
reserved1 0
reserved2 0
Section
sectname __bss
segname __DATA
addr 0xa0009220
size 0x00006dd0
offset 0
align 2^5 (32)
reloff 0
nreloc 0
flags 0x00000001
reserved1 0
reserved2 0
Section
sectname __common
segname __DATA
addr 0xa0010000
size 0x0000017c
offset 0
align 2^5 (32)
reloff 0
nreloc 0
flags 0x00000001
reserved1 0
reserved2 0
Load command 2
cmd LC_SEGMENT
cmdsize 192
segname __IMPORT
vmaddr 0xa0011000
vmsize 0x00002000
fileoff 1560576
filesize 8192
maxprot 0x00000007
initprot 0x00000007
nsects 2
flags 0x0
Section
sectname __jump_table
segname __IMPORT
addr 0xa0011000
size 0x000016da
offset 1560576
align 2^0 (1)
reloff 0
nreloc 0
flags 0x84000408
reserved1 18 (index into indirect symbol table)
reserved2 5 (size of stubs)
Section
sectname __pointers
segname __IMPORT
addr 0xa00126da
size 0x000002ec
offset 1566426
align 2^0 (1)
reloff 0
nreloc 0
flags 0x00000006
reserved1 1188 (index into indirect symbol table)
reserved2 0
Load command 3
cmd LC_SEGMENT
cmdsize 56
segname __LINKEDIT
vmaddr 0x90173000
vmsize 0x0004e978
fileoff 1568768
filesize 321912
maxprot 0x00000003
initprot 0x00000001
nsects 0
flags 0x4
Load command 4
cmd LC_ID_DYLIB
cmdsize 52
name /usr/lib/libSystem.B.dylib (offset 24)
time stamp 1174039075 Fri Mar 16 18:57:55 2007
current version 88.3.6
compatibility version 1.0.0
Load command 5
cmd LC_LOAD_DYLIB
cmdsize 64
name /usr/lib/system/libmathCommon.A.dylib (offset 24)
time stamp 1174039075 Fri Mar 16 18:57:55 2007
current version 220.0.0
compatibility version 1.0.0
Load command 6
cmd LC_SUB_LIBRARY
cmdsize 28
sub_library libmathCommon (offset 12)
Load command 7
cmd LC_SYMTAB
cmdsize 24
symoff 1582248
nsyms 5795
stroff 1789032
strsize 101648
Load command 8
cmd LC_DYSYMTAB
cmdsize 80
ilocalsym 0
nlocalsym 2375
iextdefsym 2375
nextdefsym 3413
iundefsym 5788
nundefsym 7
tocoff 1657476
ntoc 3413
modtaboff 1684780
nmodtab 1217
extrefsymoff 1748064
nextrefsyms 10242
indirectsymoff 1651976
nindirectsyms 1375
extreloff 1651816
nextrel 20
locreloff 1568768
nlocrel 1685
Load command 9
cmd LC_TWOLEVEL_HINTS
cmdsize 16
offset 1651788
nhints 7
Load command 10
cmd LC_PREBIND_CKSUM
cmdsize 12
cksum 0x00000000

どうやらさらにライブラリを呼んでいるらしいことが分かる.ちなみにlibSystem.B.dylibに対してstringsコマンドをかけてみたが,printfという文字列は見つからなかった.
ということは/usr/lib/libSystem.B.dylib辺りが次に怪しいので,そちらを調べる.

・・・と続けたが,どうもうまくprintfの実体を見つけることが出来なかった.探し方が悪いのかも知れない.

結局,知りたいのはprintfの内部動作なので,Static Linkでコンパイルし直して眺めてみることにする.
-staticオプション付きでhello.cをコンパイルし直してみる.

$ gcc -static hello.c
/usr/bin/ld: can't locate file for: -lcrt0.o
collect2: ld returned 1 exit status

上記の通り,collect2に失敗してしまった.リンクに失敗しているようだ...
とりあえず,今回はprintfの中がどうなってるかさえ分かれば良いので,また-save-tempsを付けてコンパイルし,アセンブラソースだけを除くことにする.

$ gcc -save-temps -static hello.c
/usr/bin/ld: can't locate file for: -lcrt0.o
collect2: ld returned 1 exit status
$ ls -l
total 48
-rw-r--r-- 1 mmori mmori 72 Apr 23 05:31 hello.c
-rw-r--r-- 1 mmori mmori 8445 Apr 23 06:31 hello.i
-rw-r--r-- 1 mmori mmori 340 Apr 23 06:31 hello.o
-rw-r--r-- 1 mmori mmori 199 Apr 23 06:31 hello.s
$ cat hello.s
.cstring
LC0:
.ascii "Hello, world.\0"
.text
.globl _main
_main:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movl $LC0, (%esp)
call _printf
movl $0, %eax
leave
ret
.subsections_via_symbols

なんと!さっきよりも短くなってしまった.それどころか,結局printf()をCallするところは変わっていない.
とりあえず今日はここで力尽きたので終わりにする.

0 件のコメント: