394 lines
15 KiB
Diff
394 lines
15 KiB
Diff
Upstream-Status:Backport
|
||
2011-09-19 chengbin <bin.cheng@arm.com>
|
||
|
||
Backport r174035 from mainline
|
||
2011-05-22 Tom de Vries <tom@codesourcery.com>
|
||
|
||
PR middle-end/48689
|
||
* fold-const.c (fold_checksum_tree): Guard TREE_CHAIN use with
|
||
CODE_CONTAINS_STRUCT (TS_COMMON).
|
||
|
||
Backport r172297 from mainline
|
||
2011-04-11 Chung-Lin Tang <cltang@codesourcery.com>
|
||
Richard Earnshaw <rearnsha@arm.com>
|
||
|
||
PR target/48250
|
||
* config/arm/arm.c (arm_legitimize_reload_address): Update cases
|
||
to use sign-magnitude offsets. Reject unsupported unaligned
|
||
cases. Add detailed description in comments.
|
||
* config/arm/arm.md (reload_outdf): Disable for ARM mode; change
|
||
condition from TARGET_32BIT to TARGET_ARM.
|
||
|
||
Backport r171978 from mainline
|
||
2011-04-05 Tom de Vries <tom@codesourcery.com>
|
||
|
||
PR target/43920
|
||
* config/arm/arm.h (BRANCH_COST): Set to 1 for Thumb-2 when optimizing
|
||
for size.
|
||
|
||
Backport r171632 from mainline
|
||
2011-03-28 Richard Sandiford <richard.sandiford@linaro.org>
|
||
|
||
* builtins.c (expand_builtin_memset_args): Use gen_int_mode
|
||
instead of GEN_INT.
|
||
|
||
Backport r171379 from mainline
|
||
2011-03-23 Chung-Lin Tang <cltang@codesourcery.com>
|
||
|
||
PR target/46934
|
||
* config/arm/arm.md (casesi): Use the gen_int_mode() function
|
||
to subtract lower bound instead of GEN_INT().
|
||
|
||
Backport r171251 from mainline
|
||
2011-03-21 Daniel Jacobowitz <dan@codesourcery.com>
|
||
|
||
* config/arm/unwind-arm.c (__gnu_unwind_pr_common): Correct test
|
||
for barrier handlers.
|
||
|
||
Backport r171096 from mainline
|
||
2011-03-17 Chung-Lin Tang <cltang@codesourcery.com>
|
||
|
||
PR target/43872
|
||
* config/arm/arm.c (arm_get_frame_offsets): Adjust early
|
||
return condition with !cfun->calls_alloca.
|
||
|
||
Index: gcc-4_6-branch/gcc/builtins.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/builtins.c 2011-10-17 17:45:32.050502963 -0700
|
||
+++ gcc-4_6-branch/gcc/builtins.c 2011-10-17 17:46:11.154696878 -0700
|
||
@@ -3972,6 +3972,7 @@
|
||
{
|
||
tree fndecl, fn;
|
||
enum built_in_function fcode;
|
||
+ enum machine_mode val_mode;
|
||
char c;
|
||
unsigned int dest_align;
|
||
rtx dest_mem, dest_addr, len_rtx;
|
||
@@ -4006,14 +4007,14 @@
|
||
|
||
len_rtx = expand_normal (len);
|
||
dest_mem = get_memory_rtx (dest, len);
|
||
+ val_mode = TYPE_MODE (unsigned_char_type_node);
|
||
|
||
if (TREE_CODE (val) != INTEGER_CST)
|
||
{
|
||
rtx val_rtx;
|
||
|
||
val_rtx = expand_normal (val);
|
||
- val_rtx = convert_to_mode (TYPE_MODE (unsigned_char_type_node),
|
||
- val_rtx, 0);
|
||
+ val_rtx = convert_to_mode (val_mode, val_rtx, 0);
|
||
|
||
/* Assume that we can memset by pieces if we can store
|
||
* the coefficients by pieces (in the required modes).
|
||
@@ -4024,8 +4025,7 @@
|
||
builtin_memset_read_str, &c, dest_align,
|
||
true))
|
||
{
|
||
- val_rtx = force_reg (TYPE_MODE (unsigned_char_type_node),
|
||
- val_rtx);
|
||
+ val_rtx = force_reg (val_mode, val_rtx);
|
||
store_by_pieces (dest_mem, tree_low_cst (len, 1),
|
||
builtin_memset_gen_str, val_rtx, dest_align,
|
||
true, 0);
|
||
@@ -4051,7 +4051,8 @@
|
||
true))
|
||
store_by_pieces (dest_mem, tree_low_cst (len, 1),
|
||
builtin_memset_read_str, &c, dest_align, true, 0);
|
||
- else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c),
|
||
+ else if (!set_storage_via_setmem (dest_mem, len_rtx,
|
||
+ gen_int_mode (c, val_mode),
|
||
dest_align, expected_align,
|
||
expected_size))
|
||
goto do_libcall;
|
||
Index: gcc-4_6-branch/gcc/config/arm/arm.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2011-10-17 17:45:41.914551883 -0700
|
||
+++ gcc-4_6-branch/gcc/config/arm/arm.c 2011-10-17 17:48:35.447412371 -0700
|
||
@@ -6406,23 +6406,126 @@
|
||
HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
|
||
HOST_WIDE_INT low, high;
|
||
|
||
- if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
|
||
- low = ((val & 0xf) ^ 0x8) - 0x8;
|
||
- else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
|
||
- /* Need to be careful, -256 is not a valid offset. */
|
||
- low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
|
||
- else if (mode == SImode
|
||
- || (mode == SFmode && TARGET_SOFT_FLOAT)
|
||
- || ((mode == HImode || mode == QImode) && ! arm_arch4))
|
||
- /* Need to be careful, -4096 is not a valid offset. */
|
||
- low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
|
||
- else if ((mode == HImode || mode == QImode) && arm_arch4)
|
||
- /* Need to be careful, -256 is not a valid offset. */
|
||
- low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
|
||
- else if (GET_MODE_CLASS (mode) == MODE_FLOAT
|
||
- && TARGET_HARD_FLOAT && TARGET_FPA)
|
||
- /* Need to be careful, -1024 is not a valid offset. */
|
||
- low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
|
||
+ /* Detect coprocessor load/stores. */
|
||
+ bool coproc_p = ((TARGET_HARD_FLOAT
|
||
+ && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
|
||
+ && (mode == SFmode || mode == DFmode
|
||
+ || (mode == DImode && TARGET_MAVERICK)))
|
||
+ || (TARGET_REALLY_IWMMXT
|
||
+ && VALID_IWMMXT_REG_MODE (mode))
|
||
+ || (TARGET_NEON
|
||
+ && (VALID_NEON_DREG_MODE (mode)
|
||
+ || VALID_NEON_QREG_MODE (mode))));
|
||
+
|
||
+ /* For some conditions, bail out when lower two bits are unaligned. */
|
||
+ if ((val & 0x3) != 0
|
||
+ /* Coprocessor load/store indexes are 8-bits + '00' appended. */
|
||
+ && (coproc_p
|
||
+ /* For DI, and DF under soft-float: */
|
||
+ || ((mode == DImode || mode == DFmode)
|
||
+ /* Without ldrd, we use stm/ldm, which does not
|
||
+ fair well with unaligned bits. */
|
||
+ && (! TARGET_LDRD
|
||
+ /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
|
||
+ || TARGET_THUMB2))))
|
||
+ return false;
|
||
+
|
||
+ /* When breaking down a [reg+index] reload address into [(reg+high)+low],
|
||
+ of which the (reg+high) gets turned into a reload add insn,
|
||
+ we try to decompose the index into high/low values that can often
|
||
+ also lead to better reload CSE.
|
||
+ For example:
|
||
+ ldr r0, [r2, #4100] // Offset too large
|
||
+ ldr r1, [r2, #4104] // Offset too large
|
||
+
|
||
+ is best reloaded as:
|
||
+ add t1, r2, #4096
|
||
+ ldr r0, [t1, #4]
|
||
+ add t2, r2, #4096
|
||
+ ldr r1, [t2, #8]
|
||
+
|
||
+ which post-reload CSE can simplify in most cases to eliminate the
|
||
+ second add instruction:
|
||
+ add t1, r2, #4096
|
||
+ ldr r0, [t1, #4]
|
||
+ ldr r1, [t1, #8]
|
||
+
|
||
+ The idea here is that we want to split out the bits of the constant
|
||
+ as a mask, rather than as subtracting the maximum offset that the
|
||
+ respective type of load/store used can handle.
|
||
+
|
||
+ When encountering negative offsets, we can still utilize it even if
|
||
+ the overall offset is positive; sometimes this may lead to an immediate
|
||
+ that can be constructed with fewer instructions.
|
||
+ For example:
|
||
+ ldr r0, [r2, #0x3FFFFC]
|
||
+
|
||
+ This is best reloaded as:
|
||
+ add t1, r2, #0x400000
|
||
+ ldr r0, [t1, #-4]
|
||
+
|
||
+ The trick for spotting this for a load insn with N bits of offset
|
||
+ (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
|
||
+ negative offset that is going to make bit N and all the bits below
|
||
+ it become zero in the remainder part.
|
||
+
|
||
+ The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
|
||
+ to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
|
||
+ used in most cases of ARM load/store instructions. */
|
||
+
|
||
+#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
|
||
+ (((VAL) & ((1 << (N)) - 1)) \
|
||
+ ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
|
||
+ : 0)
|
||
+
|
||
+ if (coproc_p)
|
||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
|
||
+ else if (GET_MODE_SIZE (mode) == 8)
|
||
+ {
|
||
+ if (TARGET_LDRD)
|
||
+ low = (TARGET_THUMB2
|
||
+ ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
|
||
+ : SIGN_MAG_LOW_ADDR_BITS (val, 8));
|
||
+ else
|
||
+ /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
|
||
+ to access doublewords. The supported load/store offsets are
|
||
+ -8, -4, and 4, which we try to produce here. */
|
||
+ low = ((val & 0xf) ^ 0x8) - 0x8;
|
||
+ }
|
||
+ else if (GET_MODE_SIZE (mode) < 8)
|
||
+ {
|
||
+ /* NEON element load/stores do not have an offset. */
|
||
+ if (TARGET_NEON_FP16 && mode == HFmode)
|
||
+ return false;
|
||
+
|
||
+ if (TARGET_THUMB2)
|
||
+ {
|
||
+ /* Thumb-2 has an asymmetrical index range of (-256,4096).
|
||
+ Try the wider 12-bit range first, and re-try if the result
|
||
+ is out of range. */
|
||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
|
||
+ if (low < -255)
|
||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (mode == HImode || mode == HFmode)
|
||
+ {
|
||
+ if (arm_arch4)
|
||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
|
||
+ else
|
||
+ {
|
||
+ /* The storehi/movhi_bytes fallbacks can use only
|
||
+ [-4094,+4094] of the full ldrb/strb index range. */
|
||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
|
||
+ if (low == 4095 || low == -4095)
|
||
+ return false;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
|
||
+ }
|
||
+ }
|
||
else
|
||
return false;
|
||
|
||
@@ -15415,7 +15518,10 @@
|
||
offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
|
||
/* A leaf function does not need any stack alignment if it has nothing
|
||
on the stack. */
|
||
- if (leaf && frame_size == 0)
|
||
+ if (leaf && frame_size == 0
|
||
+ /* However if it calls alloca(), we have a dynamically allocated
|
||
+ block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
|
||
+ && ! cfun->calls_alloca)
|
||
{
|
||
offsets->outgoing_args = offsets->soft_frame;
|
||
offsets->locals_base = offsets->soft_frame;
|
||
Index: gcc-4_6-branch/gcc/config/arm/arm.h
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/config/arm/arm.h 2011-10-17 17:45:41.910551858 -0700
|
||
+++ gcc-4_6-branch/gcc/config/arm/arm.h 2011-10-17 17:48:35.447412371 -0700
|
||
@@ -2041,7 +2041,8 @@
|
||
/* Try to generate sequences that don't involve branches, we can then use
|
||
conditional instructions */
|
||
#define BRANCH_COST(speed_p, predictable_p) \
|
||
- (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
|
||
+ (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \
|
||
+ : (optimize > 0 ? 2 : 0))
|
||
|
||
/* Position Independent Code. */
|
||
/* We decide which register to use based on the compilation options and
|
||
Index: gcc-4_6-branch/gcc/config/arm/arm.md
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/config/arm/arm.md 2011-10-17 17:46:11.002696119 -0700
|
||
+++ gcc-4_6-branch/gcc/config/arm/arm.md 2011-10-17 17:46:11.202697111 -0700
|
||
@@ -6187,7 +6187,7 @@
|
||
[(match_operand:DF 0 "arm_reload_memory_operand" "=o")
|
||
(match_operand:DF 1 "s_register_operand" "r")
|
||
(match_operand:SI 2 "s_register_operand" "=&r")]
|
||
- "TARGET_32BIT"
|
||
+ "TARGET_THUMB2"
|
||
"
|
||
{
|
||
enum rtx_code code = GET_CODE (XEXP (operands[0], 0));
|
||
@@ -8359,7 +8359,8 @@
|
||
rtx reg = gen_reg_rtx (SImode);
|
||
|
||
emit_insn (gen_addsi3 (reg, operands[0],
|
||
- GEN_INT (-INTVAL (operands[1]))));
|
||
+ gen_int_mode (-INTVAL (operands[1]),
|
||
+ SImode)));
|
||
operands[0] = reg;
|
||
}
|
||
|
||
Index: gcc-4_6-branch/gcc/config/arm/unwind-arm.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/config/arm/unwind-arm.c 2011-10-17 17:45:41.390549278 -0700
|
||
+++ gcc-4_6-branch/gcc/config/arm/unwind-arm.c 2011-10-17 17:46:11.000000000 -0700
|
||
@@ -1196,8 +1196,6 @@
|
||
ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1];
|
||
|
||
if (data[0] & uint32_highbit)
|
||
- phase2_call_unexpected_after_unwind = 1;
|
||
- else
|
||
{
|
||
data += rtti_count + 1;
|
||
/* Setup for entry to the handler. */
|
||
@@ -1207,6 +1205,8 @@
|
||
_Unwind_SetGR (context, 0, (_uw) ucbp);
|
||
return _URC_INSTALL_CONTEXT;
|
||
}
|
||
+ else
|
||
+ phase2_call_unexpected_after_unwind = 1;
|
||
}
|
||
if (data[0] & uint32_highbit)
|
||
data++;
|
||
Index: gcc-4_6-branch/gcc/fold-const.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/fold-const.c 2011-10-17 17:45:32.050502963 -0700
|
||
+++ gcc-4_6-branch/gcc/fold-const.c 2011-10-17 17:46:11.178696990 -0700
|
||
@@ -13788,7 +13788,8 @@
|
||
if (TREE_CODE_CLASS (code) != tcc_type
|
||
&& TREE_CODE_CLASS (code) != tcc_declaration
|
||
&& code != TREE_LIST
|
||
- && code != SSA_NAME)
|
||
+ && code != SSA_NAME
|
||
+ && CODE_CONTAINS_STRUCT (code, TS_COMMON))
|
||
fold_checksum_tree (TREE_CHAIN (expr), ctx, ht);
|
||
switch (TREE_CODE_CLASS (code))
|
||
{
|
||
Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr40887.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/pr40887.c 2011-06-24 08:13:47.000000000 -0700
|
||
+++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr40887.c 2011-10-17 17:46:11.182697014 -0700
|
||
@@ -1,5 +1,6 @@
|
||
/* { dg-options "-O2 -march=armv5te" } */
|
||
/* { dg-final { scan-assembler "blx" } } */
|
||
+/* { dg-prune-output "switch .* conflicts with" } */
|
||
|
||
int (*indirect_func)();
|
||
|
||
Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr42575.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/pr42575.c 2011-06-24 08:13:47.000000000 -0700
|
||
+++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr42575.c 2011-10-17 17:46:11.182697014 -0700
|
||
@@ -1,4 +1,4 @@
|
||
-/* { dg-options "-O2 -march=armv7-a" } */
|
||
+/* { dg-options "-O2" } */
|
||
/* Make sure RA does good job allocating registers and avoids
|
||
unnecessary moves. */
|
||
/* { dg-final { scan-assembler-not "mov" } } */
|
||
Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr43698.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/pr43698.c 2011-06-24 08:13:47.000000000 -0700
|
||
+++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr43698.c 2011-10-17 17:46:11.182697014 -0700
|
||
@@ -1,5 +1,5 @@
|
||
/* { dg-do run } */
|
||
-/* { dg-options "-Os -march=armv7-a" } */
|
||
+/* { dg-options "-Os" } */
|
||
#include <stdint.h>
|
||
#include <stdlib.h>
|
||
|
||
Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr44788.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/pr44788.c 2011-06-24 08:13:47.000000000 -0700
|
||
+++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/pr44788.c 2011-10-17 17:46:11.182697014 -0700
|
||
@@ -1,6 +1,6 @@
|
||
/* { dg-do compile } */
|
||
/* { dg-require-effective-target arm_thumb2_ok } */
|
||
-/* { dg-options "-Os -fno-strict-aliasing -fPIC -mthumb -march=armv7-a -mfpu=vfp3 -mfloat-abi=softfp" } */
|
||
+/* { dg-options "-Os -fno-strict-aliasing -fPIC -mthumb -mfpu=vfp3 -mfloat-abi=softfp" } */
|
||
|
||
void joint_decode(float* mlt_buffer1, int t) {
|
||
int i;
|
||
Index: gcc-4_6-branch/gcc/testsuite/gcc.target/arm/sync-1.c
|
||
===================================================================
|
||
--- gcc-4_6-branch.orig/gcc/testsuite/gcc.target/arm/sync-1.c 2011-06-24 08:13:47.000000000 -0700
|
||
+++ gcc-4_6-branch/gcc/testsuite/gcc.target/arm/sync-1.c 2011-10-17 17:46:11.182697014 -0700
|
||
@@ -1,5 +1,6 @@
|
||
-/* { dg-do run } */
|
||
-/* { dg-options "-O2 -march=armv7-a" } */
|
||
+
|
||
+/* { dg-do run { target sync_int_long } } */
|
||
+/* { dg-options "-O2" } */
|
||
|
||
volatile int mem;
|
||
|