Why does clang behave weirdly with register variables compared to gcc?
TL;DR:
Clang does not support explicit register variable as for now.
Details:
See clang documentation
clang only supports global register variables when the register specified is non-allocatable (e.g. the stack pointer). Support for general global register variables is unlikely to be implemented soon because it requires additional LLVM backend support.
On my machine (x86_64 ubuntu 16.04), if I compile with Clang-5.0, the assembly I get is:
08048410 <main>:
8048410: 55 push %ebp
8048411: 89 e5 mov %esp,%ebp
8048413: 83 ec 18 sub $0x18,%esp
8048416: 8d 05 c0 84 04 08 lea 0x80484c0,%eax
804841c: 8b 4d fc mov -0x4(%ebp),%ecx ;this line is wrong, the behavior is meaningless
804841f: 89 0d 1c a0 04 08 mov %ecx,0x804a01c
8048425: 8b 0d 1c a0 04 08 mov 0x804a01c,%ecx
804842b: 89 04 24 mov %eax,(%esp)
804842e: 89 4c 24 04 mov %ecx,0x4(%esp)
8048432: e8 89 fe ff ff call 80482c0 <printf@plt>
8048437: 89 45 f8 mov %eax,-0x8(%ebp)
804843a: 83 c4 18 add $0x18,%esp
804843d: 5d pop %ebp
804843e: c3 ret
804843f: 90 nop
If I compile with GCC-5.5.0, this is the assembly I got:
0000051d <main>:
51d: 8d 4c 24 04 lea 0x4(%esp),%ecx
521: 83 e4 f0 and $0xfffffff0,%esp
524: ff 71 fc pushl -0x4(%ecx)
527: 55 push %ebp
528: 89 e5 mov %esp,%ebp
52a: 53 push %ebx
52b: 51 push %ecx
52c: e8 33 00 00 00 call 564 <__x86.get_pc_thunk.ax>
531: 05 a7 1a 00 00 add $0x1aa7,%eax
536: 89 ea mov %ebp,%edx ; this is the correct location to get the value of ebp
538: 89 90 30 00 00 00 mov %edx,0x30(%eax)
53e: 8b 90 30 00 00 00 mov 0x30(%eax),%edx
544: 83 ec 08 sub $0x8,%esp
547: 52 push %edx
548: 8d 90 18 e6 ff ff lea -0x19e8(%eax),%edx
54e: 52 push %edx
54f: 89 c3 mov %eax,%ebx
551: e8 5a fe ff ff call 3b0 <printf@plt>
556: 83 c4 10 add $0x10,%esp
559: 90 nop
55a: 8d 65 f8 lea -0x8(%ebp),%esp
55d: 59 pop %ecx
55e: 5b pop %ebx
55f: 5d pop %ebp
560: 8d 61 fc lea -0x4(%ecx),%esp
563: c3 ret
We can see that GCC generally supports explicit register value access while Clang does not.
Solution:
If you wish to use Clang to access ebp value, you can use inline assembly, like this: asm("\t movl %%ebp,%0" : "=r"(vfp));
In Gcc the register keyword does the following (as explained here: Using Gcc - Local Register Variables):
If you use the variable in Inline Assembly, gcc will try to put it into the register you specified. In any other context, the register keyword does not have an effect, and as noted at the bottom of the first link, it is no alternative for specifying the variable as in input to the inline assembly.
What that keyword does if used with clang I do not know, most problably it is just ignored (see Is the register keyword still used?.
As a complementary to the answers by @ThePatrickStar and @Boden_Units: the explicit register initialization is erased by Clang Driver during the LLVM IR generation.
Here is the content of inline_asm.ll
when running clang -emit-llvm -S inline_asm.c -o inline_asm.ll
(clang-7).
; ModuleID = 'inline_asm.c'
source_filename = "inline_asm.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
@vfp = internal global i64 563, align 8
@.str = private unnamed_addr constant [20 x i8] c"vfp value is 0x%lx\0A\00", align 1
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 0, i32* %1, align 4
%3 = load i32, i32* %2, align 4
%4 = sext i32 %3 to i64
store i64 %4, i64* @vfp, align 8
%5 = load i64, i64* @vfp, align 8
%6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i32 0, i32 0), i64 %5)
ret i32 0
}
declare dso_local i32 @printf(i8*, ...) #1
attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 7.0.1-svn348686-1~exp1~20190113235231.54 (branches/release_70)"}
In fact, the generated IR for register int ebp asm("ebp");
is no different from register int ebp;
, as if ebp
is never initialized or bound to the ebp
register.