| |
| .globl _start |
| |
| _start: |
| # This code tests for the fldcw "load floating point command word" |
| # instruction. On most x86 processors the retired_instruction |
| # performance counter counts this as one instruction. However, |
| # on Pentium 4 systems it counts as two. Therefore this can |
| # affect BBV results on such a system. |
| # fldcw is most often used to set the rouding mode when doing |
| # floating point to integer conversions |
| |
| # It is encoded as "d9 /5" which means |
| # 1101 1001 xx10 1yyy |
| # Where xx is the "mod" which will be 00, 01, or 10 indicating offset |
| # and yyy is the register field |
| |
| # these are instructions with similar encodings to fldcw |
| # that can cause false positives if the test isn't explicit enough |
| similar: |
| fld1 # d9 e8 |
| fldl2t # d9 e9 |
| fldl2e # d9 ea |
| fldpi # d9 eb |
| fldlg2 # d9 ec |
| fldln2 # d9 ed |
| fldz # d9 ee |
| |
| # check some varied ways of calling fldcw |
| |
| # offset on stack |
| stack: |
| sub $8,%rsp # allocate space on stack |
| fnstcw 2(%rsp) |
| fldcw 2(%rsp) |
| add $8,%rsp # restore stack |
| |
| # 64-bit register |
| sixtyfour_reg: |
| fnstcw cw |
| mov $cw,%rax |
| fldcw 0(%rax) # rax |
| mov $cw,%rbx |
| fldcw 0(%rbx) # rbx |
| mov $cw,%rcx |
| fldcw 0(%rcx) # rcx |
| mov $cw,%rdx |
| fldcw 0(%rdx) # rdx |
| |
| # 32-bit register |
| |
| # Note! The assembler that comes with SuSE 9.1 |
| # cannot assemble 32-bit fldcw on 64-bit systems |
| # Hence the need to hand-code them |
| |
| |
| thirtytwo_reg: |
| fnstcw cw |
| mov $cw,%eax |
| |
| # fldcw 0(%eax) # eax |
| .byte 0x67,0xd9,0x28 |
| |
| mov $cw,%ebx |
| |
| # fldcw 0(%ebx) # ebx |
| .byte 0x67,0xd9,0x2b |
| |
| mov $cw,%ecx |
| |
| # fldcw 0(%ecx) # ecx |
| .byte 0x67,0xd9,0x29 |
| |
| mov $cw,%edx |
| |
| # fldcw 0(%edx) # edx |
| .byte 0x67,0xd9,0x2a |
| |
| # register + 8-bit offset |
| eight_bit: |
| mov $cw,%eax |
| sub $32,%eax |
| |
| # fldcw 32(%eax) # eax + 8 bit offset |
| .byte 0x67,0xd9,0x68,0x20 |
| |
| mov %eax,%ebx |
| # fldcw 32(%ebx) # ebx + 8 bit offset |
| .byte 0x67,0xd9,0x6b,0x20 |
| |
| mov %eax,%ecx |
| |
| # fldcw 32(%ecx) # ecx + 8 bit offset |
| .byte 0x67,0xd9,0x69,0x20 |
| |
| mov %eax,%edx |
| |
| # fldcw 32(%edx) # edx + 8 bit offset |
| .byte 0x67,0xd9,0x6a,0x20 |
| |
| |
| # register + 32-bit offset |
| thirtytwo_bit: |
| mov $cw,%eax |
| sub $30000,%eax |
| |
| # fldcw 30000(%eax) # eax + 16 bit offset |
| .byte 0x67,0xd9,0xa8,0x30,0x75,0x00,0x00 |
| |
| mov %eax,%ebx |
| |
| # fldcw 30000(%ebx) # ebx + 16 bit offset |
| .byte 0x67,0xd9,0xab,0x30,0x75,0x00,0x00 |
| |
| mov %eax,%ecx |
| |
| # fldcw 30000(%ecx) # ecx + 16 bit offset |
| .byte 0x67,0xd9,0xa9,0x30,0x75,0x00,0x00 |
| |
| mov %eax,%edx |
| |
| # fldcw 30000(%edx) # edx + 16 bit offset |
| .byte 0x67,0xd9,0xaa,0x30,0x75,0x00,0x00 |
| |
| # check an fp/integer conversion |
| # in a loop to give a bigger count |
| |
| mov $1024,%rcx |
| big_loop: |
| |
| fldl three # load value onto fp stack |
| fnstcw saved_cw # store control word to mem |
| movzwl saved_cw, %eax # load cw from mem, zero extending |
| movb $12, %ah # set cw for "round to zero" |
| movw %ax, cw # store back to memory |
| fldcw cw # save new rounding mode |
| fistpl result # save stack value as integer to mem |
| fldcw saved_cw # restore old cw |
| |
| loop big_loop # loop to make the count more obvious |
| |
| movl result, %ebx # sanity check to see if the |
| cmp $3,%rbx # result is the expected one |
| je exit |
| |
| print_error: |
| mov $1,%rax # write syscall |
| mov $1,%rdi # stdout |
| mov $error,%rsi # string |
| mov $22,%rdx # length of string |
| syscall |
| |
| exit: |
| xor %rdi, %rdi # return 0 |
| mov $60, %rax # SYSCALL_EXIT |
| syscall |
| |
| |
| |
| .data |
| saved_cw: .long 0 |
| cw: .long 0 |
| result: .long 0 |
| three: .long 0 # a floating point 3.0 |
| .long 1074266112 |
| error: .asciz "Error! Wrong result!\n" |