mirror of git://gcc.gnu.org/git/gcc.git
ffi64.c (ffi_prep_cif_machdep): Save sse-used flag in bit 11 of flags.
* src/x86/ffi64.c (ffi_prep_cif_machdep): Save sse-used flag in bit 11 of flags. (ffi_call): Mask return type field. Pass ssecount to ffi_call_unix64. (ffi_prep_closure): Set carry bit if sse-used flag set. * src/x86/unix64.S (ffi_call_unix64): Add ssecount argument. Only load sse registers if ssecount non-zero. (ffi_closure_unix64): Only save sse registers if carry set on entry. From-SVN: r99257
This commit is contained in:
parent
08cce8fe0c
commit
d56ea8d9a9
|
@ -1,4 +1,15 @@
|
||||||
2005-05-29 Ralf Corsepius <ralf.corsepius@rtems.org>
|
2005-05-04 Andreas Degert <ad@papyrus-gmbh.de>
|
||||||
|
Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
|
* src/x86/ffi64.c (ffi_prep_cif_machdep): Save sse-used flag in
|
||||||
|
bit 11 of flags.
|
||||||
|
(ffi_call): Mask return type field. Pass ssecount to ffi_call_unix64.
|
||||||
|
(ffi_prep_closure): Set carry bit if sse-used flag set.
|
||||||
|
* src/x86/unix64.S (ffi_call_unix64): Add ssecount argument.
|
||||||
|
Only load sse registers if ssecount non-zero.
|
||||||
|
(ffi_closure_unix64): Only save sse registers if carry set on entry.
|
||||||
|
|
||||||
|
2005-04-29 Ralf Corsepius <ralf.corsepius@rtems.org>
|
||||||
|
|
||||||
* configure.ac: Add i*86-*-rtems*, sparc*-*-rtems*,
|
* configure.ac: Add i*86-*-rtems*, sparc*-*-rtems*,
|
||||||
powerpc-*rtems*, arm*-*-rtems*, sh-*-rtems*.
|
powerpc-*rtems*, arm*-*-rtems*, sh-*-rtems*.
|
||||||
|
|
|
@ -42,7 +42,7 @@ struct register_args
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
|
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
|
||||||
void *raddr, void (*fnaddr)());
|
void *raddr, void (*fnaddr)(), unsigned ssecount);
|
||||||
|
|
||||||
/* All reference to register classes here is identical to the code in
|
/* All reference to register classes here is identical to the code in
|
||||||
gcc/config/i386/i386.c. Do *not* change one without the other. */
|
gcc/config/i386/i386.c. Do *not* change one without the other. */
|
||||||
|
@ -303,10 +303,9 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
||||||
else if (sse0 && sse1)
|
else if (sse0 && sse1)
|
||||||
flags |= 1 << 10;
|
flags |= 1 << 10;
|
||||||
/* Mark the true size of the structure. */
|
/* Mark the true size of the structure. */
|
||||||
flags |= cif->rtype->size << 11;
|
flags |= cif->rtype->size << 12;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cif->flags = flags;
|
|
||||||
|
|
||||||
/* Go over all arguments and determine the way they should be passed.
|
/* Go over all arguments and determine the way they should be passed.
|
||||||
If it's in a register and there is space for it, let that be so. If
|
If it's in a register and there is space for it, let that be so. If
|
||||||
|
@ -331,6 +330,9 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
||||||
ssecount += nsse;
|
ssecount += nsse;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (ssecount)
|
||||||
|
flags |= 1 << 11;
|
||||||
|
cif->flags = flags;
|
||||||
cif->bytes = bytes;
|
cif->bytes = bytes;
|
||||||
|
|
||||||
return FFI_OK;
|
return FFI_OK;
|
||||||
|
@ -353,7 +355,7 @@ ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
|
||||||
address then we need to make one. Note the setting of flags to
|
address then we need to make one. Note the setting of flags to
|
||||||
VOID above in ffi_prep_cif_machdep. */
|
VOID above in ffi_prep_cif_machdep. */
|
||||||
ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
|
ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
|
||||||
&& cif->flags == FFI_TYPE_VOID);
|
&& (cif->flags & 0xff) == FFI_TYPE_VOID);
|
||||||
if (rvalue == NULL && ret_in_memory)
|
if (rvalue == NULL && ret_in_memory)
|
||||||
rvalue = alloca (cif->rtype->size);
|
rvalue = alloca (cif->rtype->size);
|
||||||
|
|
||||||
|
@ -424,7 +426,7 @@ ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
|
||||||
}
|
}
|
||||||
|
|
||||||
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
|
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
|
||||||
cif->flags, rvalue, fn);
|
cif->flags, rvalue, fn, ssecount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -439,13 +441,18 @@ ffi_prep_closure (ffi_closure* closure,
|
||||||
volatile unsigned short *tramp;
|
volatile unsigned short *tramp;
|
||||||
|
|
||||||
tramp = (volatile unsigned short *) &closure->tramp[0];
|
tramp = (volatile unsigned short *) &closure->tramp[0];
|
||||||
|
|
||||||
tramp[0] = 0xbb49; /* mov <code>, %r11 */
|
tramp[0] = 0xbb49; /* mov <code>, %r11 */
|
||||||
tramp[5] = 0xba49; /* mov <data>, %r10 */
|
|
||||||
tramp[10] = 0xff49; /* jmp *%r11 */
|
|
||||||
tramp[11] = 0x00e3;
|
|
||||||
*(void * volatile *) &tramp[1] = ffi_closure_unix64;
|
*(void * volatile *) &tramp[1] = ffi_closure_unix64;
|
||||||
|
tramp[5] = 0xba49; /* mov <data>, %r10 */
|
||||||
*(void * volatile *) &tramp[6] = closure;
|
*(void * volatile *) &tramp[6] = closure;
|
||||||
|
|
||||||
|
/* Set the carry bit iff the function uses any sse registers.
|
||||||
|
This is clc or stc, together with the first byte of the jmp. */
|
||||||
|
tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
|
||||||
|
|
||||||
|
tramp[11] = 0xe3ff; /* jmp *%r11 */
|
||||||
|
|
||||||
closure->cif = cif;
|
closure->cif = cif;
|
||||||
closure->fun = fun;
|
closure->fun = fun;
|
||||||
closure->user_data = user_data;
|
closure->user_data = user_data;
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
.text
|
.text
|
||||||
|
|
||||||
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
|
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
|
||||||
void *raddr, void (*fnaddr)());
|
void *raddr, void (*fnaddr)());
|
||||||
|
|
||||||
Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
|
Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
|
||||||
for this function. This has been allocated by ffi_call. We also
|
for this function. This has been allocated by ffi_call. We also
|
||||||
|
@ -39,7 +39,7 @@
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.globl ffi_call_unix64
|
.globl ffi_call_unix64
|
||||||
.type ffi_call_unix64,@function
|
.type ffi_call_unix64,@function
|
||||||
|
|
||||||
ffi_call_unix64:
|
ffi_call_unix64:
|
||||||
.LUW0:
|
.LUW0:
|
||||||
|
@ -53,6 +53,7 @@ ffi_call_unix64:
|
||||||
.LUW1:
|
.LUW1:
|
||||||
movq %rdi, %r10 /* Save a copy of the register area. */
|
movq %rdi, %r10 /* Save a copy of the register area. */
|
||||||
movq %r8, %r11 /* Save a copy of the target fn. */
|
movq %r8, %r11 /* Save a copy of the target fn. */
|
||||||
|
movl %r9d, %eax /* Set number of SSE registers. */
|
||||||
|
|
||||||
/* Load up all argument registers. */
|
/* Load up all argument registers. */
|
||||||
movq (%r10), %rdi
|
movq (%r10), %rdi
|
||||||
|
@ -61,14 +62,9 @@ ffi_call_unix64:
|
||||||
movq 24(%r10), %rcx
|
movq 24(%r10), %rcx
|
||||||
movq 32(%r10), %r8
|
movq 32(%r10), %r8
|
||||||
movq 40(%r10), %r9
|
movq 40(%r10), %r9
|
||||||
movdqa 48(%r10), %xmm0
|
testl %eax, %eax
|
||||||
movdqa 64(%r10), %xmm1
|
jnz .Lload_sse
|
||||||
movdqa 80(%r10), %xmm2
|
.Lret_from_load_sse:
|
||||||
movdqa 96(%r10), %xmm3
|
|
||||||
movdqa 112(%r10), %xmm4
|
|
||||||
movdqa 128(%r10), %xmm5
|
|
||||||
movdqa 144(%r10), %xmm6
|
|
||||||
movdqa 160(%r10), %xmm7
|
|
||||||
|
|
||||||
/* Deallocate the reg arg area. */
|
/* Deallocate the reg arg area. */
|
||||||
leaq 176(%r10), %rsp
|
leaq 176(%r10), %rsp
|
||||||
|
@ -181,37 +177,49 @@ ffi_call_unix64:
|
||||||
movq %rax, (%rsi)
|
movq %rax, (%rsi)
|
||||||
movq %rdx, 8(%rsi)
|
movq %rdx, 8(%rsi)
|
||||||
|
|
||||||
/* Bits 11-31 contain the true size of the structure. Copy from
|
/* Bits 12-31 contain the true size of the structure. Copy from
|
||||||
the scratch area to the true destination. */
|
the scratch area to the true destination. */
|
||||||
shrl $11, %ecx
|
shrl $12, %ecx
|
||||||
rep movsb
|
rep movsb
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
/* Many times we can avoid loading any SSE registers at all.
|
||||||
|
It's not worth an indirect jump to load the exact set of
|
||||||
|
SSE registers needed; zero or all is a good compromise. */
|
||||||
|
.align 2
|
||||||
.LUW3:
|
.LUW3:
|
||||||
|
.Lload_sse:
|
||||||
|
movdqa 48(%r10), %xmm0
|
||||||
|
movdqa 64(%r10), %xmm1
|
||||||
|
movdqa 80(%r10), %xmm2
|
||||||
|
movdqa 96(%r10), %xmm3
|
||||||
|
movdqa 112(%r10), %xmm4
|
||||||
|
movdqa 128(%r10), %xmm5
|
||||||
|
movdqa 144(%r10), %xmm6
|
||||||
|
movdqa 160(%r10), %xmm7
|
||||||
|
jmp .Lret_from_load_sse
|
||||||
|
|
||||||
|
.LUW4:
|
||||||
.size ffi_call_unix64,.-ffi_call_unix64
|
.size ffi_call_unix64,.-ffi_call_unix64
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.globl ffi_closure_unix64
|
.globl ffi_closure_unix64
|
||||||
.type ffi_closure_unix64,@function
|
.type ffi_closure_unix64,@function
|
||||||
|
|
||||||
ffi_closure_unix64:
|
ffi_closure_unix64:
|
||||||
.LUW4:
|
|
||||||
subq $200, %rsp
|
|
||||||
.LUW5:
|
.LUW5:
|
||||||
|
/* The carry flag is set by the trampoline iff SSE registers
|
||||||
|
are used. Don't clobber it before the branch instruction. */
|
||||||
|
leaq -200(%rsp), %rsp
|
||||||
|
.LUW6:
|
||||||
movq %rdi, (%rsp)
|
movq %rdi, (%rsp)
|
||||||
movq %rsi, 8(%rsp)
|
movq %rsi, 8(%rsp)
|
||||||
movq %rdx, 16(%rsp)
|
movq %rdx, 16(%rsp)
|
||||||
movq %rcx, 24(%rsp)
|
movq %rcx, 24(%rsp)
|
||||||
movq %r8, 32(%rsp)
|
movq %r8, 32(%rsp)
|
||||||
movq %r9, 40(%rsp)
|
movq %r9, 40(%rsp)
|
||||||
movdqa %xmm0, 48(%rsp)
|
jc .Lsave_sse
|
||||||
movdqa %xmm1, 64(%rsp)
|
.Lret_from_save_sse:
|
||||||
movdqa %xmm2, 80(%rsp)
|
|
||||||
movdqa %xmm3, 96(%rsp)
|
|
||||||
movdqa %xmm4, 112(%rsp)
|
|
||||||
movdqa %xmm5, 128(%rsp)
|
|
||||||
movdqa %xmm6, 144(%rsp)
|
|
||||||
movdqa %xmm7, 160(%rsp)
|
|
||||||
|
|
||||||
movq %r10, %rdi
|
movq %r10, %rdi
|
||||||
leaq 176(%rsp), %rsi
|
leaq 176(%rsp), %rsi
|
||||||
|
@ -221,7 +229,7 @@ ffi_closure_unix64:
|
||||||
|
|
||||||
/* Deallocate stack frame early; return value is now in redzone. */
|
/* Deallocate stack frame early; return value is now in redzone. */
|
||||||
addq $200, %rsp
|
addq $200, %rsp
|
||||||
.LUW6:
|
.LUW7:
|
||||||
|
|
||||||
/* The first byte of the return value contains the FFI_TYPE. */
|
/* The first byte of the return value contains the FFI_TYPE. */
|
||||||
movzbl %al, %r10d
|
movzbl %al, %r10d
|
||||||
|
@ -300,7 +308,22 @@ ffi_closure_unix64:
|
||||||
movq -24(%rsp), %rax
|
movq -24(%rsp), %rax
|
||||||
cmovnz %rdx, %rax
|
cmovnz %rdx, %rax
|
||||||
ret
|
ret
|
||||||
.LUW7:
|
|
||||||
|
/* See the comment above .Lload_sse; the same logic applies here. */
|
||||||
|
.align 2
|
||||||
|
.LUW8:
|
||||||
|
.Lsave_sse:
|
||||||
|
movdqa %xmm0, 48(%rsp)
|
||||||
|
movdqa %xmm1, 64(%rsp)
|
||||||
|
movdqa %xmm2, 80(%rsp)
|
||||||
|
movdqa %xmm3, 96(%rsp)
|
||||||
|
movdqa %xmm4, 112(%rsp)
|
||||||
|
movdqa %xmm5, 128(%rsp)
|
||||||
|
movdqa %xmm6, 144(%rsp)
|
||||||
|
movdqa %xmm7, 160(%rsp)
|
||||||
|
jmp .Lret_from_save_sse
|
||||||
|
|
||||||
|
.LUW9:
|
||||||
.size ffi_closure_unix64,.-ffi_closure_unix64
|
.size ffi_closure_unix64,.-ffi_closure_unix64
|
||||||
|
|
||||||
.section .eh_frame,"a",@progbits
|
.section .eh_frame,"a",@progbits
|
||||||
|
@ -327,24 +350,25 @@ ffi_closure_unix64:
|
||||||
.LASFDE1:
|
.LASFDE1:
|
||||||
.long .LASFDE1-.Lframe1 /* FDE CIE offset */
|
.long .LASFDE1-.Lframe1 /* FDE CIE offset */
|
||||||
.long .LUW0-. /* FDE initial location */
|
.long .LUW0-. /* FDE initial location */
|
||||||
.long .LUW3-.LUW0 /* FDE address range */
|
.long .LUW4-.LUW0 /* FDE address range */
|
||||||
.uleb128 0x0 /* Augmentation size */
|
.uleb128 0x0 /* Augmentation size */
|
||||||
|
|
||||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||||
.long .LUW1-.LUW0
|
.long .LUW1-.LUW0
|
||||||
|
|
||||||
/* New stack frame based off rbp. This is a itty bit of unwind
|
/* New stack frame based off rbp. This is a itty bit of unwind
|
||||||
trickery in that the CFA *has* changed. There is no easy way
|
trickery in that the CFA *has* changed. There is no easy way
|
||||||
to describe it correctly on entry to the function. Fortunately,
|
to describe it correctly on entry to the function. Fortunately,
|
||||||
it doesn't matter too much since at all points we can correctly
|
it doesn't matter too much since at all points we can correctly
|
||||||
unwind back to ffi_call. Note that the location to which we
|
unwind back to ffi_call. Note that the location to which we
|
||||||
moved the return address is (the new) CFA-8, so from the
|
moved the return address is (the new) CFA-8, so from the
|
||||||
perspective of the unwind info, it hasn't moved. */
|
perspective of the unwind info, it hasn't moved. */
|
||||||
.byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
|
.byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
|
||||||
.uleb128 6
|
.uleb128 6
|
||||||
.uleb128 32
|
.uleb128 32
|
||||||
.byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
|
.byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
|
||||||
.uleb128 2
|
.uleb128 2
|
||||||
|
.byte 0xa /* DW_CFA_remember_state */
|
||||||
|
|
||||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||||
.long .LUW2-.LUW1
|
.long .LUW2-.LUW1
|
||||||
|
@ -352,23 +376,36 @@ ffi_closure_unix64:
|
||||||
.uleb128 7
|
.uleb128 7
|
||||||
.uleb128 8
|
.uleb128 8
|
||||||
.byte 0xc0+6 /* DW_CFA_restore, %rbp */
|
.byte 0xc0+6 /* DW_CFA_restore, %rbp */
|
||||||
|
|
||||||
|
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||||
|
.long .LUW3-.LUW2
|
||||||
|
.byte 0xb /* DW_CFA_restore_state */
|
||||||
|
|
||||||
.align 8
|
.align 8
|
||||||
.LEFDE1:
|
.LEFDE1:
|
||||||
.LSFDE3:
|
.LSFDE3:
|
||||||
.long .LEFDE3-.LASFDE3 /* FDE Length */
|
.long .LEFDE3-.LASFDE3 /* FDE Length */
|
||||||
.LASFDE3:
|
.LASFDE3:
|
||||||
.long .LASFDE3-.Lframe1 /* FDE CIE offset */
|
.long .LASFDE3-.Lframe1 /* FDE CIE offset */
|
||||||
.long .LUW4-. /* FDE initial location */
|
.long .LUW5-. /* FDE initial location */
|
||||||
.long .LUW7-.LUW4 /* FDE address range */
|
.long .LUW9-.LUW5 /* FDE address range */
|
||||||
.uleb128 0x0 /* Augmentation size */
|
.uleb128 0x0 /* Augmentation size */
|
||||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
|
||||||
.long .LUW5-.LUW4
|
|
||||||
.byte 0xe /* DW_CFA_def_cfa_offset */
|
|
||||||
.uleb128 208
|
|
||||||
.byte 0x4 /* DW_CFA_advance_loc4 */
|
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||||
.long .LUW6-.LUW5
|
.long .LUW6-.LUW5
|
||||||
.byte 0xe /* DW_CFA_def_cfa_offset */
|
.byte 0xe /* DW_CFA_def_cfa_offset */
|
||||||
|
.uleb128 208
|
||||||
|
.byte 0xa /* DW_CFA_remember_state */
|
||||||
|
|
||||||
|
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||||
|
.long .LUW7-.LUW6
|
||||||
|
.byte 0xe /* DW_CFA_def_cfa_offset */
|
||||||
.uleb128 8
|
.uleb128 8
|
||||||
|
|
||||||
|
.byte 0x4 /* DW_CFA_advance_loc4 */
|
||||||
|
.long .LUW8-.LUW7
|
||||||
|
.byte 0xb /* DW_CFA_restore_state */
|
||||||
|
|
||||||
.align 8
|
.align 8
|
||||||
.LEFDE3:
|
.LEFDE3:
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue