Submitted By: Jim Gifford (jim at cross-lfs dot org) Date: 10-31-2015 Initial Package Version: 5.2.0 Origin: Upstream Upstream Status: Applied Description: This is a branch update for gcc-5.2.0, and should be rechecked periodically. This patch was made from Revision #229636. diff -Naur gcc-5.2.0.orig/boehm-gc/os_dep.c gcc-5.2.0/boehm-gc/os_dep.c --- gcc-5.2.0.orig/boehm-gc/os_dep.c 2013-03-06 09:08:58.000000000 -0600 +++ gcc-5.2.0/boehm-gc/os_dep.c 2015-09-10 09:11:16.970564000 -0500 @@ -3184,13 +3184,11 @@ (GC_words_allocd + GC_words_allocd_before_gc)); # endif } - sprintf(buf, "/proc/%d", getpid()); - fd = open(buf, O_RDONLY); - if (fd < 0) { + sprintf(buf, "/proc/%d/pagedata", getpid()); + GC_proc_fd = open(buf, O_RDONLY); + if (GC_proc_fd < 0) { ABORT("/proc open failed"); } - GC_proc_fd = syscall(SYS_ioctl, fd, PIOCOPENPD, 0); - close(fd); syscall(SYS_fcntl, GC_proc_fd, F_SETFD, FD_CLOEXEC); if (GC_proc_fd < 0) { ABORT("/proc ioctl failed"); diff -Naur gcc-5.2.0.orig/contrib/download_prerequisites gcc-5.2.0/contrib/download_prerequisites --- gcc-5.2.0.orig/contrib/download_prerequisites 2014-12-11 21:23:16.000000000 -0600 +++ gcc-5.2.0/contrib/download_prerequisites 2015-09-11 01:12:40.145086000 -0500 @@ -24,6 +24,11 @@ # be downloaded. GRAPHITE_LOOP_OPT=yes +if [ ! -e gcc/BASE-VER ] ; then + echo "You must run this script in the top level GCC source directory." + exit 1 +fi + # Necessary to build GCC. MPFR=mpfr-2.4.2 GMP=gmp-4.3.2 diff -Naur gcc-5.2.0.orig/fixincludes/fixincl.x gcc-5.2.0/fixincludes/fixincl.x --- gcc-5.2.0.orig/fixincludes/fixincl.x 2015-05-22 08:13:11.000000000 -0500 +++ gcc-5.2.0/fixincludes/fixincl.x 2015-08-14 06:34:26.037693000 -0500 @@ -1,12 +1,12 @@ /* -*- buffer-read-only: t -*- vi: set ro: - * - * DO NOT EDIT THIS FILE (fixincl.x) - * - * It has been AutoGen-ed May 21, 2015 at 02:48:12 AM by AutoGen 5.18.3 - * From the definitions inclhack.def - * and the template file fixincl + * + *DO NOT EDIT THIS FILE (fixincl.x) + * + *It has been AutoGen-ed August 13, 2015 at 05:25:50 PM by AutoGen 5.18.3 + *From the definitions inclhack.def + *and the template file fixincl */ -/* DO NOT SVN-MERGE THIS FILE, EITHER Thu May 21 02:48:12 UTC 2015 +/* DO NOT SVN-MERGE THIS FILE, EITHER Thu Aug 13 17:25:50 UTC 2015 * * You must regenerate it. Use the ./genfixes script. * @@ -15,7 +15,7 @@ * certain ANSI-incompatible system header files which are fixed to work * correctly with ANSI C and placed in a directory that GNU C will search. * - * This file contains 229 fixup descriptions. + * This file contains 230 fixup descriptions. * * See README for more information. * @@ -1503,6 +1503,50 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * + * Description of Aix_Stdio_Inline fix + */ +tSCC zAix_Stdio_InlineName[] = + "aix_stdio_inline"; + +/* + * File name selection pattern + */ +tSCC zAix_Stdio_InlineList[] = + "stdio.h\0"; +/* + * Machine/OS name selection pattern + */ +tSCC* apzAix_Stdio_InlineMachs[] = { + "*-*-aix*", + (const char*)NULL }; + +/* + * content selection pattern - do fix if pattern found + */ +tSCC zAix_Stdio_InlineSelect0[] = + "#ifdef __cplusplus\\\n\ +}\\\n\ +\\\n\ +#ifdef ferror\\\n"; + +#define AIX_STDIO_INLINE_TEST_CT 1 +static tTestDesc aAix_Stdio_InlineTests[] = { + { TT_EGREP, zAix_Stdio_InlineSelect0, (regex_t*)NULL }, }; + +/* + * Fix Command Arguments for Aix_Stdio_Inline + */ +static const char* apzAix_Stdio_InlinePatch[] = { + "format", + "#ifdef __cplusplus\n\ +}\n\ +#endif\n\n\ +#if (defined(__cplusplus) && defined(__IBMCPP__))\n\ +#ifdef ferror\n", + (char*)NULL }; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * + * * Description of Aix_Strtof_Const fix */ tSCC zAix_Strtof_ConstName[] = @@ -1516,7 +1560,9 @@ /* * Machine/OS name selection pattern */ -#define apzAix_Strtof_ConstMachs (const char**)NULL +tSCC* apzAix_Strtof_ConstMachs[] = { + "*-*-aix*", + (const char*)NULL }; /* * content selection pattern - do fix if pattern found @@ -1551,7 +1597,9 @@ /* * Machine/OS name selection pattern */ -#define apzAix_SysmachineMachs (const char**)NULL +tSCC* apzAix_SysmachineMachs[] = { + "*-*-aix*", + (const char*)NULL }; /* * content selection pattern - do fix if pattern found @@ -1586,7 +1634,9 @@ /* * Machine/OS name selection pattern */ -#define apzAix_Syswait_2Machs (const char**)NULL +tSCC* apzAix_Syswait_2Machs[] = { + "*-*-aix*", + (const char*)NULL }; /* * content selection pattern - do fix if pattern found @@ -1621,7 +1671,9 @@ /* * Machine/OS name selection pattern */ -#define apzAix_VolatileMachs (const char**)NULL +tSCC* apzAix_VolatileMachs[] = { + "*-*-aix*", + (const char*)NULL }; /* * content selection pattern - do fix if pattern found @@ -9396,9 +9448,9 @@ * * List of all fixes */ -#define REGEX_COUNT 266 +#define REGEX_COUNT 267 #define MACH_LIST_SIZE_LIMIT 187 -#define FIX_COUNT 229 +#define FIX_COUNT 230 /* * Enumerate the fixes @@ -9435,6 +9487,7 @@ AIX_STDINT_3_FIXIDX, AIX_STDINT_4_FIXIDX, AIX_STDINT_5_FIXIDX, + AIX_STDIO_INLINE_FIXIDX, AIX_STRTOF_CONST_FIXIDX, AIX_SYSMACHINE_FIXIDX, AIX_SYSWAIT_2_FIXIDX, @@ -9791,6 +9844,11 @@ AIX_STDINT_5_TEST_CT, FD_MACH_ONLY | FD_SUBROUTINE, aAix_Stdint_5Tests, apzAix_Stdint_5Patch, 0 }, + { zAix_Stdio_InlineName, zAix_Stdio_InlineList, + apzAix_Stdio_InlineMachs, + AIX_STDIO_INLINE_TEST_CT, FD_MACH_ONLY | FD_SUBROUTINE, + aAix_Stdio_InlineTests, apzAix_Stdio_InlinePatch, 0 }, + { zAix_Strtof_ConstName, zAix_Strtof_ConstList, apzAix_Strtof_ConstMachs, AIX_STRTOF_CONST_TEST_CT, FD_MACH_ONLY | FD_SUBROUTINE, diff -Naur gcc-5.2.0.orig/fixincludes/inclhack.def gcc-5.2.0/fixincludes/inclhack.def --- gcc-5.2.0.orig/fixincludes/inclhack.def 2015-05-22 08:13:11.000000000 -0500 +++ gcc-5.2.0/fixincludes/inclhack.def 2015-08-14 06:34:26.037693000 -0500 @@ -892,10 +892,32 @@ }; /* + * stdio.h on AIX defines ferror, clearerr and feof as C++ inline, which + produces wrong code with G++. + */ +fix = { + hackname = aix_stdio_inline; + mach = "*-*-aix*"; + files = stdio.h; + select = "#ifdef __cplusplus\\\n" + "}\\\n\\\n" + "#ifdef ferror\\\n"; + c_fix = format; + c_fix_arg = "#ifdef __cplusplus\n" + "}\n" + "#endif\n\n" + "#if (defined(__cplusplus) && defined(__IBMCPP__))\n" + "#ifdef ferror\n"; + test_text = "#ifdef __cplusplus\n}\n\n#ifdef ferror"; +}; + + +/* * stdlib.h on AIX 4.3 declares strtof() with a non-const first argument. */ fix = { hackname = aix_strtof_const; + mach = "*-*-aix*"; files = stdlib.h; select = "((extern[ \t]+)?float[ \t]+strtof)\\(char \\*, char \\*\\*\\);"; c_fix = format; @@ -909,6 +931,7 @@ */ fix = { hackname = aix_sysmachine; + mach = "*-*-aix*"; files = sys/machine.h; select = "\\\\ +\n"; c_fix = format; @@ -923,6 +946,7 @@ */ fix = { hackname = aix_syswait_2; + mach = "*-*-aix*"; files = sys/wait.h; select = '\? (\(\(\(\(unsigned[^)]*\)[^)]*\) >> [^)]*\) \& 0xff\) : -1)'; c_fix = format; @@ -939,6 +963,7 @@ */ fix = { hackname = aix_volatile; + mach = "*-*-aix*"; files = sys/signal.h; select = "typedef volatile int sig_atomic_t"; c_fix = format; diff -Naur gcc-5.2.0.orig/fixincludes/tests/base/stdio.h gcc-5.2.0/fixincludes/tests/base/stdio.h --- gcc-5.2.0.orig/fixincludes/tests/base/stdio.h 2011-08-10 03:43:38.000000000 -0500 +++ gcc-5.2.0/fixincludes/tests/base/stdio.h 2015-08-14 06:34:26.037693000 -0500 @@ -19,6 +19,16 @@ #endif /* AAB_AIX_STDIO_CHECK */ +#if defined( AIX_STDIO_INLINE_CHECK ) +#ifdef __cplusplus +} +#endif + +#if (defined(__cplusplus) && defined(__IBMCPP__)) +#ifdef ferror +#endif /* AIX_STDIO_INLINE_CHECK */ + + #if defined( ALPHA_GETOPT_CHECK ) extern int getopt(int, char *const[], const char *); #endif /* ALPHA_GETOPT_CHECK */ diff -Naur gcc-5.2.0.orig/gcc/ada/gcc-interface/Make-lang.in gcc-5.2.0/gcc/ada/gcc-interface/Make-lang.in --- gcc-5.2.0.orig/gcc/ada/gcc-interface/Make-lang.in 2015-03-02 05:03:29.000000000 -0600 +++ gcc-5.2.0/gcc/ada/gcc-interface/Make-lang.in 2015-10-09 03:14:21.872164000 -0500 @@ -1025,7 +1025,7 @@ # When building from scratch we don't have dependency files, the only thing # we need to ensure is that the generated files are created first. -$(GNAT1_ADA_OBJS) $(GNATBIND_OBJS): | $(ada_generated_files) +$(GNAT1_OBJS) $(GNATBIND_OBJS): | $(ada_generated_files) # Manually include the auto-generated dependencies for the Ada host objects. ADA_DEPFILES = $(foreach obj,$(GNAT1_ADA_OBJS) $(GNATBIND_OBJS),\ diff -Naur gcc-5.2.0.orig/gcc/builtins.c gcc-5.2.0/gcc/builtins.c --- gcc-5.2.0.orig/gcc/builtins.c 2015-07-15 06:55:00.000000000 -0500 +++ gcc-5.2.0/gcc/builtins.c 2015-10-08 11:54:23.551701000 -0500 @@ -5271,7 +5271,7 @@ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); - return expand_atomic_fetch_op (target, mem, val, code, MEMMODEL_SEQ_CST, + return expand_atomic_fetch_op (target, mem, val, code, MEMMODEL_SYNC_SEQ_CST, after); } @@ -5301,8 +5301,8 @@ poval = ⌖ } if (!expand_atomic_compare_and_swap (pbool, poval, mem, old_val, new_val, - false, MEMMODEL_SEQ_CST, - MEMMODEL_SEQ_CST)) + false, MEMMODEL_SYNC_SEQ_CST, + MEMMODEL_SYNC_SEQ_CST)) return NULL_RTX; return target; @@ -5337,7 +5337,7 @@ /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); - expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE, true); + expand_atomic_store (mem, const0_rtx, MEMMODEL_SYNC_RELEASE, true); } /* Given an integer representing an ``enum memmodel'', verify its @@ -5366,7 +5366,8 @@ return MEMMODEL_SEQ_CST; } - if ((INTVAL (op) & MEMMODEL_MASK) >= MEMMODEL_LAST) + /* Should never see a user explicit SYNC memodel model, so >= LAST works. */ + if (memmodel_base (val) >= MEMMODEL_LAST) { warning (OPT_Winvalid_memory_model, "invalid memory model argument to builtin"); @@ -5433,8 +5434,7 @@ success = MEMMODEL_SEQ_CST; } - if ((failure & MEMMODEL_MASK) == MEMMODEL_RELEASE - || (failure & MEMMODEL_MASK) == MEMMODEL_ACQ_REL) + if (is_mm_release (failure) || is_mm_acq_rel (failure)) { warning (OPT_Winvalid_memory_model, "invalid failure memory model for " @@ -5496,8 +5496,7 @@ enum memmodel model; model = get_memmodel (CALL_EXPR_ARG (exp, 1)); - if ((model & MEMMODEL_MASK) == MEMMODEL_RELEASE - || (model & MEMMODEL_MASK) == MEMMODEL_ACQ_REL) + if (is_mm_release (model) || is_mm_acq_rel (model)) { warning (OPT_Winvalid_memory_model, "invalid memory model for %<__atomic_load%>"); @@ -5526,9 +5525,8 @@ enum memmodel model; model = get_memmodel (CALL_EXPR_ARG (exp, 2)); - if ((model & MEMMODEL_MASK) != MEMMODEL_RELAXED - && (model & MEMMODEL_MASK) != MEMMODEL_SEQ_CST - && (model & MEMMODEL_MASK) != MEMMODEL_RELEASE) + if (!(is_mm_relaxed (model) || is_mm_seq_cst (model) + || is_mm_release (model))) { warning (OPT_Winvalid_memory_model, "invalid memory model for %<__atomic_store%>"); @@ -5635,9 +5633,7 @@ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); model = get_memmodel (CALL_EXPR_ARG (exp, 1)); - if ((model & MEMMODEL_MASK) == MEMMODEL_CONSUME - || (model & MEMMODEL_MASK) == MEMMODEL_ACQUIRE - || (model & MEMMODEL_MASK) == MEMMODEL_ACQ_REL) + if (is_mm_consume (model) || is_mm_acquire (model) || is_mm_acq_rel (model)) { warning (OPT_Winvalid_memory_model, "invalid memory model for %<__atomic_store%>"); @@ -5697,8 +5693,20 @@ mode = mode_for_size (size, MODE_INT, 0); mode_align = GET_MODE_ALIGNMENT (mode); - if (TREE_CODE (arg1) == INTEGER_CST && INTVAL (expand_normal (arg1)) == 0) - type_align = mode_align; + if (TREE_CODE (arg1) == INTEGER_CST) + { + unsigned HOST_WIDE_INT val = UINTVAL (expand_normal (arg1)); + + /* Either this argument is null, or it's a fake pointer encoding + the alignment of the object. */ + val = val & -val; + val *= BITS_PER_UNIT; + + if (val == 0 || mode_align < val) + type_align = mode_align; + else + type_align = val; + } else { tree ttype = TREE_TYPE (arg1); @@ -5833,7 +5841,7 @@ static void expand_builtin_sync_synchronize (void) { - expand_mem_thread_fence (MEMMODEL_SEQ_CST); + expand_mem_thread_fence (MEMMODEL_SYNC_SEQ_CST); } static rtx diff -Naur gcc-5.2.0.orig/gcc/c/c-decl.c gcc-5.2.0/gcc/c/c-decl.c --- gcc-5.2.0.orig/gcc/c/c-decl.c 2015-06-11 07:51:04.000000000 -0500 +++ gcc-5.2.0/gcc/c/c-decl.c 2015-09-09 02:30:42.229662000 -0500 @@ -8883,7 +8883,8 @@ current_scope->bindings = NULL; for (; b; b = free_binding_and_advance (b)) { - gcc_assert (TREE_CODE (b->decl) == PARM_DECL); + gcc_assert (TREE_CODE (b->decl) == PARM_DECL + || b->decl == error_mark_node); gcc_assert (I_SYMBOL_BINDING (b->id) == b); I_SYMBOL_BINDING (b->id) = b->shadowed; if (b->shadowed && b->shadowed->u.type) diff -Naur gcc-5.2.0.orig/gcc/c/c-parser.c gcc-5.2.0/gcc/c/c-parser.c --- gcc-5.2.0.orig/gcc/c/c-parser.c 2015-05-22 03:10:32.000000000 -0500 +++ gcc-5.2.0/gcc/c/c-parser.c 2015-09-10 02:37:00.176754000 -0500 @@ -11185,9 +11185,9 @@ tree alignment = c_parser_expr_no_commas (parser, NULL).value; mark_exp_read (alignment); alignment = c_fully_fold (alignment, false, NULL); - if (!INTEGRAL_TYPE_P (TREE_TYPE (alignment)) - && TREE_CODE (alignment) != INTEGER_CST - && tree_int_cst_sgn (alignment) != 1) + if (TREE_CODE (alignment) != INTEGER_CST + || !INTEGRAL_TYPE_P (TREE_TYPE (alignment)) + || tree_int_cst_sgn (alignment) != 1) { error_at (clause_loc, "% clause alignment expression must " "be positive constant integer expression"); @@ -11264,9 +11264,9 @@ t = c_parser_expr_no_commas (parser, NULL).value; mark_exp_read (t); t = c_fully_fold (t, false, NULL); - if (!INTEGRAL_TYPE_P (TREE_TYPE (t)) - && TREE_CODE (t) != INTEGER_CST - && tree_int_cst_sgn (t) != 1) + if (TREE_CODE (t) != INTEGER_CST + || !INTEGRAL_TYPE_P (TREE_TYPE (t)) + || tree_int_cst_sgn (t) != 1) { error_at (clause_loc, "% clause expression must " "be positive constant integer expression"); @@ -11300,9 +11300,9 @@ t = c_parser_expr_no_commas (parser, NULL).value; mark_exp_read (t); t = c_fully_fold (t, false, NULL); - if (!INTEGRAL_TYPE_P (TREE_TYPE (t)) - && TREE_CODE (t) != INTEGER_CST - && tree_int_cst_sgn (t) != 1) + if (TREE_CODE (t) != INTEGER_CST + || !INTEGRAL_TYPE_P (TREE_TYPE (t)) + || tree_int_cst_sgn (t) != 1) { error_at (clause_loc, "% clause expression must " "be positive constant integer expression"); @@ -11706,7 +11706,7 @@ first = false; - if (((mask >> c_kind) & 1) == 0 && !parser->error) + if (((mask >> c_kind) & 1) == 0) { /* Remove the invalid clause(s) from the list to avoid confusing the rest of the compiler. */ @@ -11935,7 +11935,7 @@ first = false; - if (((mask >> c_kind) & 1) == 0 && !parser->error) + if (((mask >> c_kind) & 1) == 0) { /* Remove the invalid clause(s) from the list to avoid confusing the rest of the compiler. */ @@ -12379,6 +12379,7 @@ bool structured_block = false; bool swapped = false; bool seq_cst = false; + bool non_lvalue_p; if (c_parser_next_token_is (parser, CPP_NAME)) { @@ -12432,20 +12433,33 @@ { case OMP_ATOMIC_READ: case NOP_EXPR: /* atomic write */ - v = c_parser_unary_expression (parser).value; + v = c_parser_cast_expression (parser, NULL).value; + non_lvalue_p = !lvalue_p (v); v = c_fully_fold (v, false, NULL); if (v == error_mark_node) goto saw_error; + if (non_lvalue_p) + v = non_lvalue (v); loc = c_parser_peek_token (parser)->location; if (!c_parser_require (parser, CPP_EQ, "expected %<=%>")) goto saw_error; if (code == NOP_EXPR) - lhs = c_parser_expression (parser).value; + { + lhs = c_parser_expression (parser).value; + lhs = c_fully_fold (lhs, false, NULL); + if (lhs == error_mark_node) + goto saw_error; + } else - lhs = c_parser_unary_expression (parser).value; - lhs = c_fully_fold (lhs, false, NULL); - if (lhs == error_mark_node) - goto saw_error; + { + lhs = c_parser_cast_expression (parser, NULL).value; + non_lvalue_p = !lvalue_p (lhs); + lhs = c_fully_fold (lhs, false, NULL); + if (lhs == error_mark_node) + goto saw_error; + if (non_lvalue_p) + lhs = non_lvalue (lhs); + } if (code == NOP_EXPR) { /* atomic write is represented by OMP_ATOMIC with NOP_EXPR @@ -12464,10 +12478,13 @@ } else { - v = c_parser_unary_expression (parser).value; + v = c_parser_cast_expression (parser, NULL).value; + non_lvalue_p = !lvalue_p (v); v = c_fully_fold (v, false, NULL); if (v == error_mark_node) goto saw_error; + if (non_lvalue_p) + v = non_lvalue (v); if (!c_parser_require (parser, CPP_EQ, "expected %<=%>")) goto saw_error; } @@ -12480,7 +12497,7 @@ old or new x should be captured. */ restart: eloc = c_parser_peek_token (parser)->location; - expr = c_parser_unary_expression (parser); + expr = c_parser_cast_expression (parser, NULL); lhs = expr.value; expr = default_function_array_conversion (eloc, expr); unfolded_lhs = expr.value; @@ -12573,6 +12590,8 @@ } /* FALLTHRU */ default: + if (!lvalue_p (unfolded_lhs)) + lhs = non_lvalue (lhs); switch (c_parser_peek_token (parser)->type) { case CPP_MULT_EQ: @@ -12687,20 +12706,25 @@ { if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>")) goto saw_error; - v = c_parser_unary_expression (parser).value; + v = c_parser_cast_expression (parser, NULL).value; + non_lvalue_p = !lvalue_p (v); v = c_fully_fold (v, false, NULL); if (v == error_mark_node) goto saw_error; + if (non_lvalue_p) + v = non_lvalue (v); if (!c_parser_require (parser, CPP_EQ, "expected %<=%>")) goto saw_error; eloc = c_parser_peek_token (parser)->location; - expr = c_parser_unary_expression (parser); + expr = c_parser_cast_expression (parser, NULL); lhs1 = expr.value; expr = default_function_array_read_conversion (eloc, expr); unfolded_lhs1 = expr.value; lhs1 = c_fully_fold (lhs1, false, NULL); if (lhs1 == error_mark_node) goto saw_error; + if (!lvalue_p (unfolded_lhs1)) + lhs1 = non_lvalue (lhs1); } if (structured_block) { @@ -12802,7 +12826,8 @@ tree clauses, tree *cclauses) { tree decl, cond, incr, save_break, save_cont, body, init, stmt, cl; - tree declv, condv, incrv, initv, ret = NULL; + tree declv, condv, incrv, initv, ret = NULL_TREE; + tree pre_body = NULL_TREE, this_pre_body; bool fail = false, open_brace_parsed = false; int i, collapse = 1, nbraces = 0; location_t for_loc; @@ -12846,8 +12871,23 @@ { if (i > 0) vec_safe_push (for_block, c_begin_compound_stmt (true)); + this_pre_body = push_stmt_list (); c_parser_declaration_or_fndef (parser, true, true, true, true, true, NULL, vNULL); + if (this_pre_body) + { + this_pre_body = pop_stmt_list (this_pre_body); + if (pre_body) + { + tree t = pre_body; + pre_body = push_stmt_list (); + add_stmt (t); + add_stmt (this_pre_body); + pre_body = pop_stmt_list (pre_body); + } + else + pre_body = this_pre_body; + } decl = check_for_loop_decls (for_loc, flag_isoc99); if (decl == NULL) goto error_init; @@ -13042,7 +13082,7 @@ if (!fail) { stmt = c_finish_omp_for (loc, code, declv, initv, condv, - incrv, body, NULL); + incrv, body, pre_body); if (stmt) { if (cclauses != NULL diff -Naur gcc-5.2.0.orig/gcc/c/c-typeck.c gcc-5.2.0/gcc/c/c-typeck.c --- gcc-5.2.0.orig/gcc/c/c-typeck.c 2015-03-10 01:38:57.000000000 -0500 +++ gcc-5.2.0/gcc/c/c-typeck.c 2015-10-05 07:35:20.216307000 -0500 @@ -5707,6 +5707,10 @@ tree rname = NULL_TREE; bool objc_ok = false; + /* Use the expansion point location to handle cases such as user's + function returning a wrong-type macro defined in a system header. */ + location = expansion_point_location_if_in_system_header (location); + if (errtype == ic_argpass) { tree selector; @@ -9368,8 +9372,12 @@ bool npc = false; size_t rank = 0; + /* Use the expansion point to handle cases such as returning NULL + in a function returning void. */ + source_location xloc = expansion_point_location_if_in_system_header (loc); + if (TREE_THIS_VOLATILE (current_function_decl)) - warning_at (loc, 0, + warning_at (xloc, 0, "function declared % has a % statement"); if (flag_cilkplus && contains_array_notation_expr (retval)) @@ -9424,10 +9432,10 @@ { current_function_returns_null = 1; if (TREE_CODE (TREE_TYPE (retval)) != VOID_TYPE) - pedwarn (loc, 0, + pedwarn (xloc, 0, "% with a value, in function returning void"); else - pedwarn (loc, OPT_Wpedantic, "ISO C forbids " + pedwarn (xloc, OPT_Wpedantic, "ISO C forbids " "% with expression, in function returning void"); } else diff -Naur gcc-5.2.0.orig/gcc/calls.c gcc-5.2.0/gcc/calls.c --- gcc-5.2.0.orig/gcc/calls.c 2015-04-30 06:11:34.000000000 -0500 +++ gcc-5.2.0/gcc/calls.c 2015-07-23 13:51:56.363755000 -0500 @@ -3115,6 +3115,19 @@ compute_argument_addresses (args, argblock, num_actuals); + /* Stack is properly aligned, pops can't safely be deferred during + the evaluation of the arguments. */ + NO_DEFER_POP; + + /* Precompute all register parameters. It isn't safe to compute + anything once we have started filling any specific hard regs. + TLS symbols sometimes need a call to resolve. Precompute + register parameters before any stack pointer manipulation + to avoid unaligned stack in the called function. */ + precompute_register_parameters (num_actuals, args, ®_parm_seen); + + OK_DEFER_POP; + /* Perform stack alignment before the first push (the last arg). */ if (argblock == 0 && adjusted_args_size.constant > reg_parm_stack_space @@ -3155,10 +3168,6 @@ funexp = rtx_for_function_call (fndecl, addr); - /* Precompute all register parameters. It isn't safe to compute anything - once we have started filling any specific hard regs. */ - precompute_register_parameters (num_actuals, args, ®_parm_seen); - if (CALL_EXPR_STATIC_CHAIN (exp)) static_chain_value = expand_normal (CALL_EXPR_STATIC_CHAIN (exp)); else diff -Naur gcc-5.2.0.orig/gcc/c-family/c-common.c gcc-5.2.0/gcc/c-family/c-common.c --- gcc-5.2.0.orig/gcc/c-family/c-common.c 2015-04-10 02:54:46.000000000 -0500 +++ gcc-5.2.0/gcc/c-family/c-common.c 2015-08-05 06:20:59.983324000 -0500 @@ -10741,7 +10741,7 @@ if (TREE_CODE (p) == INTEGER_CST) { int i = tree_to_uhwi (p); - if (i < 0 || (i & MEMMODEL_MASK) >= MEMMODEL_LAST) + if (i < 0 || (memmodel_base (i) >= MEMMODEL_LAST)) { warning_at (loc, OPT_Winvalid_memory_model, "invalid memory model argument %d of %qE", x + 1, diff -Naur gcc-5.2.0.orig/gcc/c-family/c.opt gcc-5.2.0/gcc/c-family/c.opt --- gcc-5.2.0.orig/gcc/c-family/c.opt 2015-03-27 05:29:04.000000000 -0500 +++ gcc-5.2.0/gcc/c-family/c.opt 2015-10-02 10:19:52.819852000 -0500 @@ -1592,7 +1592,7 @@ std=c11 C ObjC -Conform to the ISO 2011 C standard (experimental and incomplete support) +Conform to the ISO 2011 C standard std=c1x C ObjC Alias(std=c11) @@ -1648,7 +1648,7 @@ std=gnu11 C ObjC -Conform to the ISO 2011 C standard with GNU extensions (experimental and incomplete support) +Conform to the ISO 2011 C standard with GNU extensions std=gnu1x C ObjC Alias(std=gnu11) @@ -1688,7 +1688,7 @@ std=iso9899:2011 C ObjC Alias(std=c11) -Conform to the ISO 2011 C standard (experimental and incomplete support) +Conform to the ISO 2011 C standard traditional Driver diff -Naur gcc-5.2.0.orig/gcc/c-family/c-ubsan.c gcc-5.2.0/gcc/c-family/c-ubsan.c --- gcc-5.2.0.orig/gcc/c-family/c-ubsan.c 2015-03-06 17:44:56.000000000 -0600 +++ gcc-5.2.0/gcc/c-family/c-ubsan.c 2015-09-25 05:46:03.665534000 -0500 @@ -55,6 +55,7 @@ #include "internal-fn.h" #include "stor-layout.h" #include "builtins.h" +#include "gimplify.h" /* Instrument division by zero and INT_MIN / -1. If not instrumenting, return NULL_TREE. */ @@ -71,6 +72,9 @@ gcc_assert (TYPE_MAIN_VARIANT (TREE_TYPE (op0)) == TYPE_MAIN_VARIANT (TREE_TYPE (op1))); + op0 = unshare_expr (op0); + op1 = unshare_expr (op1); + if (TREE_CODE (type) == INTEGER_TYPE && (flag_sanitize & SANITIZE_DIVIDE)) t = fold_build2 (EQ_EXPR, boolean_type_node, @@ -117,6 +121,7 @@ } } t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (t), op0, t); + t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (t), op1, t); if (flag_sanitize_undefined_trap_on_error) tt = build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), 0); else @@ -151,6 +156,9 @@ HOST_WIDE_INT op0_prec = TYPE_PRECISION (type0); tree uprecm1 = build_int_cst (op1_utype, op0_prec - 1); + op0 = unshare_expr (op0); + op1 = unshare_expr (op1); + t = fold_convert_loc (loc, op1_utype, op1); t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1); diff -Naur gcc-5.2.0.orig/gcc/cfgexpand.c gcc-5.2.0/gcc/cfgexpand.c --- gcc-5.2.0.orig/gcc/cfgexpand.c 2015-03-16 06:17:32.000000000 -0500 +++ gcc-5.2.0/gcc/cfgexpand.c 2015-07-23 05:39:26.086189000 -0500 @@ -3219,18 +3219,25 @@ bounds_rtl = DECL_BOUNDS_RTL (DECL_RESULT (current_function_decl)); if (bounds_rtl) { - rtx addr, bnd; + rtx addr = NULL; + rtx bnd = NULL; - if (bounds) + if (bounds && bounds != error_mark_node) { bnd = expand_normal (bounds); targetm.calls.store_returned_bounds (bounds_rtl, bnd); } else if (REG_P (bounds_rtl)) { - addr = expand_normal (build_fold_addr_expr (retval_rhs)); - addr = gen_rtx_MEM (Pmode, addr); - bnd = targetm.calls.load_bounds_for_arg (addr, NULL, NULL); + if (bounds) + bnd = chkp_expand_zero_bounds (); + else + { + addr = expand_normal (build_fold_addr_expr (retval_rhs)); + addr = gen_rtx_MEM (Pmode, addr); + bnd = targetm.calls.load_bounds_for_arg (addr, NULL, NULL); + } + targetm.calls.store_returned_bounds (bounds_rtl, bnd); } else @@ -3239,15 +3246,23 @@ gcc_assert (GET_CODE (bounds_rtl) == PARALLEL); - addr = expand_normal (build_fold_addr_expr (retval_rhs)); - addr = gen_rtx_MEM (Pmode, addr); + if (bounds) + bnd = chkp_expand_zero_bounds (); + else + { + addr = expand_normal (build_fold_addr_expr (retval_rhs)); + addr = gen_rtx_MEM (Pmode, addr); + } for (n = 0; n < XVECLEN (bounds_rtl, 0); n++) { - rtx offs = XEXP (XVECEXP (bounds_rtl, 0, n), 1); rtx slot = XEXP (XVECEXP (bounds_rtl, 0, n), 0); - rtx from = adjust_address (addr, Pmode, INTVAL (offs)); - rtx bnd = targetm.calls.load_bounds_for_arg (from, NULL, NULL); + if (!bounds) + { + rtx offs = XEXP (XVECEXP (bounds_rtl, 0, n), 1); + rtx from = adjust_address (addr, Pmode, INTVAL (offs)); + bnd = targetm.calls.load_bounds_for_arg (from, NULL, NULL); + } targetm.calls.store_returned_bounds (slot, bnd); } } @@ -3344,33 +3359,40 @@ break; case GIMPLE_RETURN: - op0 = gimple_return_retval (as_a (stmt)); - - if (op0 && op0 != error_mark_node) - { - tree result = DECL_RESULT (current_function_decl); - - /* If we are not returning the current function's RESULT_DECL, - build an assignment to it. */ - if (op0 != result) - { - /* I believe that a function's RESULT_DECL is unique. */ - gcc_assert (TREE_CODE (op0) != RESULT_DECL); - - /* ??? We'd like to use simply expand_assignment here, - but this fails if the value is of BLKmode but the return - decl is a register. expand_return has special handling - for this combination, which eventually should move - to common code. See comments there. Until then, let's - build a modify expression :-/ */ - op0 = build2 (MODIFY_EXPR, TREE_TYPE (result), - result, op0); - } - } - if (!op0) - expand_null_return (); - else - expand_return (op0, gimple_return_retbnd (stmt)); + { + tree bnd = gimple_return_retbnd (as_a (stmt)); + op0 = gimple_return_retval (as_a (stmt)); + + if (op0 && op0 != error_mark_node) + { + tree result = DECL_RESULT (current_function_decl); + + /* If we are not returning the current function's RESULT_DECL, + build an assignment to it. */ + if (op0 != result) + { + /* I believe that a function's RESULT_DECL is unique. */ + gcc_assert (TREE_CODE (op0) != RESULT_DECL); + + /* ??? We'd like to use simply expand_assignment here, + but this fails if the value is of BLKmode but the return + decl is a register. expand_return has special handling + for this combination, which eventually should move + to common code. See comments there. Until then, let's + build a modify expression :-/ */ + op0 = build2 (MODIFY_EXPR, TREE_TYPE (result), + result, op0); + } + /* Mark we have return statement with missing bounds. */ + if (!bnd && chkp_function_instrumented_p (cfun->decl)) + bnd = error_mark_node; + } + + if (!op0) + expand_null_return (); + else + expand_return (op0, bnd); + } break; case GIMPLE_ASSIGN: diff -Naur gcc-5.2.0.orig/gcc/combine.c gcc-5.2.0/gcc/combine.c --- gcc-5.2.0.orig/gcc/combine.c 2015-04-09 09:37:14.000000000 -0500 +++ gcc-5.2.0/gcc/combine.c 2015-08-18 07:21:41.551020000 -0500 @@ -11960,14 +11960,15 @@ continue; } - /* If this is (and:M1 (subreg:M2 X 0) (const_int C1)) where C1 + /* If this is (and:M1 (subreg:M1 X:M2 0) (const_int C1)) where C1 fits in both M1 and M2 and the SUBREG is either paradoxical or represents the low part, permute the SUBREG and the AND and try again. */ - if (GET_CODE (XEXP (op0, 0)) == SUBREG) + if (GET_CODE (XEXP (op0, 0)) == SUBREG + && CONST_INT_P (XEXP (op0, 1))) { - unsigned HOST_WIDE_INT c1; tmode = GET_MODE (SUBREG_REG (XEXP (op0, 0))); + unsigned HOST_WIDE_INT c1 = INTVAL (XEXP (op0, 1)); /* Require an integral mode, to avoid creating something like (AND:SF ...). */ if (SCALAR_INT_MODE_P (tmode) @@ -11977,18 +11978,22 @@ have a defined value due to the AND operation. However, if we commute the AND inside the SUBREG then they no longer have defined values and the meaning of - the code has been changed. */ + the code has been changed. + Also C1 should not change value in the smaller mode, + see PR67028 (a positive C1 can become negative in the + smaller mode, so that the AND does no longer mask the + upper bits). */ && (0 #ifdef WORD_REGISTER_OPERATIONS || (mode_width > GET_MODE_PRECISION (tmode) - && mode_width <= BITS_PER_WORD) + && mode_width <= BITS_PER_WORD + && trunc_int_for_mode (c1, tmode) == (HOST_WIDE_INT) c1) #endif || (mode_width <= GET_MODE_PRECISION (tmode) && subreg_lowpart_p (XEXP (op0, 0)))) - && CONST_INT_P (XEXP (op0, 1)) && mode_width <= HOST_BITS_PER_WIDE_INT && HWI_COMPUTABLE_MODE_P (tmode) - && ((c1 = INTVAL (XEXP (op0, 1))) & ~mask) == 0 + && (c1 & ~mask) == 0 && (c1 & ~GET_MODE_MASK (tmode)) == 0 && c1 != mask && c1 != GET_MODE_MASK (tmode)) diff -Naur gcc-5.2.0.orig/gcc/config/aarch64/aarch64.c gcc-5.2.0/gcc/config/aarch64/aarch64.c --- gcc-5.2.0.orig/gcc/config/aarch64/aarch64.c 2015-06-24 04:59:28.000000000 -0500 +++ gcc-5.2.0/gcc/config/aarch64/aarch64.c 2015-08-05 06:40:25.689425000 -0500 @@ -5246,11 +5246,17 @@ if (speed) { /* Floating-point FMA/FMUL can also support negations of the - operands. */ - if (GET_CODE (op0) == NEG) - op0 = XEXP (op0, 0); - if (GET_CODE (op1) == NEG) - op1 = XEXP (op1, 0); + operands, unless the rounding mode is upward or downward in + which case FNMUL is different than FMUL with operand negation. */ + bool neg0 = GET_CODE (op0) == NEG; + bool neg1 = GET_CODE (op1) == NEG; + if (maybe_fma || !flag_rounding_math || (neg0 && neg1)) + { + if (neg0) + op0 = XEXP (op0, 0); + if (neg1) + op1 = XEXP (op1, 0); + } if (maybe_fma) /* FMADD/FNMADD/FNMSUB/FMSUB. */ @@ -5694,6 +5700,12 @@ *cost = rtx_cost (op0, NEG, 0, speed); return true; } + if (GET_CODE (op0) == MULT) + { + /* FNMUL. */ + *cost = rtx_cost (op0, NEG, 0, speed); + return true; + } if (speed) /* FNEG. */ *cost += extra_cost->fp[mode == DFmode].neg; @@ -9020,8 +9032,8 @@ unlikely event of fail being ACQUIRE and succ being RELEASE we need to promote succ to ACQ_REL so that we don't lose the acquire semantics. */ - if (INTVAL (mod_f) == MEMMODEL_ACQUIRE - && INTVAL (mod_s) == MEMMODEL_RELEASE) + if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f))) + && is_mm_release (memmodel_from_int (INTVAL (mod_s)))) mod_s = GEN_INT (MEMMODEL_ACQ_REL); switch (mode) @@ -9066,6 +9078,23 @@ emit_insn (gen_rtx_SET (VOIDmode, bval, x)); } +/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a + sequence implementing an atomic operation. */ + +static void +aarch64_emit_post_barrier (enum memmodel model) +{ + const enum memmodel base_model = memmodel_base (model); + + if (is_mm_sync (model) + && (base_model == MEMMODEL_ACQUIRE + || base_model == MEMMODEL_ACQ_REL + || base_model == MEMMODEL_SEQ_CST)) + { + emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST))); + } +} + /* Split a compare and swap pattern. */ void @@ -9076,14 +9105,18 @@ bool is_weak; rtx_code_label *label1, *label2; rtx x, cond; + enum memmodel model; + rtx model_rtx; rval = operands[0]; mem = operands[1]; oldval = operands[2]; newval = operands[3]; is_weak = (operands[4] != const0_rtx); + model_rtx = operands[5]; scratch = operands[7]; mode = GET_MODE (mem); + model = memmodel_from_int (INTVAL (model_rtx)); label1 = NULL; if (!is_weak) @@ -9093,7 +9126,13 @@ } label2 = gen_label_rtx (); - aarch64_emit_load_exclusive (mode, rval, mem, operands[5]); + /* The initial load can be relaxed for a __sync operation since a final + barrier will be emitted to stop code hoisting. */ + if (is_mm_sync (model)) + aarch64_emit_load_exclusive (mode, rval, mem, + GEN_INT (MEMMODEL_RELAXED)); + else + aarch64_emit_load_exclusive (mode, rval, mem, model_rtx); cond = aarch64_gen_compare_reg (NE, rval, oldval); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); @@ -9101,7 +9140,7 @@ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); - aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]); + aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx); if (!is_weak) { @@ -9118,6 +9157,10 @@ } emit_label (label2); + + /* Emit any final barrier needed for a __sync operation. */ + if (is_mm_sync (model)) + aarch64_emit_post_barrier (model); } /* Split an atomic operation. */ @@ -9128,6 +9171,8 @@ { machine_mode mode = GET_MODE (mem); machine_mode wmode = (mode == DImode ? DImode : SImode); + const enum memmodel model = memmodel_from_int (INTVAL (model_rtx)); + const bool is_sync = is_mm_sync (model); rtx_code_label *label; rtx x; @@ -9142,7 +9187,13 @@ old_out = new_out; value = simplify_gen_subreg (wmode, value, mode, 0); - aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx); + /* The initial load can be relaxed for a __sync operation since a final + barrier will be emitted to stop code hoisting. */ + if (is_sync) + aarch64_emit_load_exclusive (mode, old_out, mem, + GEN_INT (MEMMODEL_RELAXED)); + else + aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx); switch (code) { @@ -9178,6 +9229,10 @@ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, gen_rtx_LABEL_REF (Pmode, label), pc_rtx); aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + + /* Emit any final barrier needed for a __sync operation. */ + if (is_sync) + aarch64_emit_post_barrier (model); } static void diff -Naur gcc-5.2.0.orig/gcc/config/aarch64/aarch64-elf-raw.h gcc-5.2.0/gcc/config/aarch64/aarch64-elf-raw.h --- gcc-5.2.0.orig/gcc/config/aarch64/aarch64-elf-raw.h 2015-05-12 03:49:59.000000000 -0500 +++ gcc-5.2.0/gcc/config/aarch64/aarch64-elf-raw.h 2015-07-24 11:02:46.263397000 -0500 @@ -44,7 +44,12 @@ #endif #ifndef LINK_SPEC -#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ +#define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{!static:%{rdynamic:-export-dynamic}} \ + %{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \ CA53_ERR_835769_SPEC \ CA53_ERR_843419_SPEC diff -Naur gcc-5.2.0.orig/gcc/config/aarch64/aarch64-linux.h gcc-5.2.0/gcc/config/aarch64/aarch64-linux.h --- gcc-5.2.0.orig/gcc/config/aarch64/aarch64-linux.h 2015-05-12 03:49:59.000000000 -0500 +++ gcc-5.2.0/gcc/config/aarch64/aarch64-linux.h 2015-07-24 11:00:26.564818000 -0500 @@ -35,8 +35,9 @@ %{static:-Bstatic} \ %{shared:-shared} \ %{symbolic:-Bsymbolic} \ - %{rdynamic:-export-dynamic} \ - -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ -X \ %{mbig-endian:-EB} %{mlittle-endian:-EL} \ -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}" diff -Naur gcc-5.2.0.orig/gcc/config/aarch64/aarch64.md gcc-5.2.0/gcc/config/aarch64/aarch64.md --- gcc-5.2.0.orig/gcc/config/aarch64/aarch64.md 2015-02-28 02:37:48.000000000 -0600 +++ gcc-5.2.0/gcc/config/aarch64/aarch64.md 2015-10-28 08:32:17.454275000 -0500 @@ -382,7 +382,7 @@ ) (define_insn "prefetch" - [(prefetch (match_operand:DI 0 "address_operand" "r") + [(prefetch (match_operand:DI 0 "register_operand" "r") (match_operand:QI 1 "const_int_operand" "") (match_operand:QI 2 "const_int_operand" ""))] "" @@ -872,7 +872,7 @@ fmov\\t%w0, %s1 fmov\\t%s0, %s1" "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) - && GP_REGNUM_P (REGNO (operands[0]))" + && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" [(const_int 0)] "{ aarch64_expand_mov_immediate (operands[0], operands[1]); @@ -905,7 +905,7 @@ fmov\\t%d0, %d1 movi\\t%d0, %1" "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)) - && GP_REGNUM_P (REGNO (operands[0]))" + && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" [(const_int 0)] "{ aarch64_expand_mov_immediate (operands[0], operands[1]); @@ -3971,6 +3971,16 @@ (mult:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w")) (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT && !flag_rounding_math" + "fnmul\\t%0, %1, %2" + [(set_attr "type" "fmul")] +) + +(define_insn "*fnmul3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (neg:GPF (mult:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w"))))] "TARGET_FLOAT" "fnmul\\t%0, %1, %2" [(set_attr "type" "fmul")] diff -Naur gcc-5.2.0.orig/gcc/config/aarch64/atomics.md gcc-5.2.0/gcc/config/aarch64/atomics.md --- gcc-5.2.0.orig/gcc/config/aarch64/atomics.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/aarch64/atomics.md 2015-09-23 04:48:16.591988000 -0500 @@ -119,7 +119,7 @@ [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") (unspec_volatile:ALLI [(atomic_op:ALLI (match_dup 0) - (match_operand:ALLI 1 "" "r")) + (match_operand:ALLI 1 "" "r")) (match_operand:SI 2 "const_int_operand")] ;; model UNSPECV_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) @@ -164,7 +164,7 @@ (set (match_dup 1) (unspec_volatile:ALLI [(atomic_op:ALLI (match_dup 1) - (match_operand:ALLI 2 "" "r")) + (match_operand:ALLI 2 "" "r")) (match_operand:SI 3 "const_int_operand")] ;; model UNSPECV_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) @@ -209,7 +209,7 @@ [(set (match_operand:ALLI 0 "register_operand" "=&r") (atomic_op:ALLI (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") - (match_operand:ALLI 2 "" "r"))) + (match_operand:ALLI 2 "" "r"))) (set (match_dup 1) (unspec_volatile:ALLI [(match_dup 1) (match_dup 2) @@ -260,10 +260,8 @@ UNSPECV_LDA))] "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); - if (model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_RELEASE) + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) return "ldr\t%0, %1"; else return "ldar\t%0, %1"; @@ -278,10 +276,8 @@ UNSPECV_STL))] "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); - if (model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_ACQUIRE) + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) return "str\t%1, %0"; else return "stlr\t%1, %0"; @@ -297,10 +293,8 @@ UNSPECV_LX)))] "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); - if (model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_RELEASE) + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) return "ldxr\t%w0, %1"; else return "ldaxr\t%w0, %1"; @@ -315,10 +309,8 @@ UNSPECV_LX))] "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); - if (model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_RELEASE) + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) return "ldxr\t%0, %1"; else return "ldaxr\t%0, %1"; @@ -335,10 +327,8 @@ UNSPECV_SX))] "" { - enum memmodel model = (enum memmodel) INTVAL (operands[3]); - if (model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_ACQUIRE) + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) return "stxr\t%w0, %2, %1"; else return "stlxr\t%w0, %2, %1"; @@ -349,8 +339,8 @@ [(match_operand:SI 0 "const_int_operand" "")] "" { - enum memmodel model = (enum memmodel) INTVAL (operands[0]); - if (model != MEMMODEL_RELAXED && model != MEMMODEL_CONSUME) + enum memmodel model = memmodel_from_int (INTVAL (operands[0])); + if (!(is_mm_relaxed (model) || is_mm_consume (model))) emit_insn (gen_dmb (operands[0])); DONE; } @@ -373,8 +363,8 @@ UNSPEC_MB))] "" { - enum memmodel model = (enum memmodel) INTVAL (operands[1]); - if (model == MEMMODEL_ACQUIRE) + enum memmodel model = memmodel_from_int (INTVAL (operands[1])); + if (is_mm_acquire (model)) return "dmb\\tishld"; else return "dmb\\tish"; diff -Naur gcc-5.2.0.orig/gcc/config/aarch64/iterators.md gcc-5.2.0/gcc/config/aarch64/iterators.md --- gcc-5.2.0.orig/gcc/config/aarch64/iterators.md 2015-06-02 06:14:16.000000000 -0500 +++ gcc-5.2.0/gcc/config/aarch64/iterators.md 2015-09-23 04:48:16.591988000 -0500 @@ -342,9 +342,6 @@ ;; Attribute to describe constants acceptable in logical operations (define_mode_attr lconst [(SI "K") (DI "L")]) -;; Attribute to describe constants acceptable in atomic logical operations -(define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")]) - ;; Map a mode to a specific constraint character. (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")]) @@ -845,6 +842,16 @@ (plus "aarch64_plus_operand") (minus "aarch64_plus_operand")]) +;; Constants acceptable for atomic operations. +;; This definition must appear in this file before the iterators it refers to. +(define_code_attr const_atomic + [(plus "IJ") (minus "IJ") + (xor "") (ior "") + (and "")]) + +;; Attribute to describe constants acceptable in atomic logical operations +(define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")]) + ;; ------------------------------------------------------------------- ;; Int Iterators. ;; ------------------------------------------------------------------- diff -Naur gcc-5.2.0.orig/gcc/config/alpha/alpha.c gcc-5.2.0/gcc/config/alpha/alpha.c --- gcc-5.2.0.orig/gcc/config/alpha/alpha.c 2015-05-16 02:42:20.000000000 -0500 +++ gcc-5.2.0/gcc/config/alpha/alpha.c 2015-08-05 06:20:59.983324000 -0500 @@ -4548,8 +4548,8 @@ oldval = operands[3]; newval = operands[4]; is_weak = (operands[5] != const0_rtx); - mod_s = (enum memmodel) INTVAL (operands[6]); - mod_f = (enum memmodel) INTVAL (operands[7]); + mod_s = memmodel_from_int (INTVAL (operands[6])); + mod_f = memmodel_from_int (INTVAL (operands[7])); mode = GET_MODE (mem); alpha_pre_atomic_barrier (mod_s); @@ -4587,12 +4587,12 @@ emit_unlikely_jump (x, label1); } - if (mod_f != MEMMODEL_RELAXED) + if (!is_mm_relaxed (mod_f)) emit_label (XEXP (label2, 0)); alpha_post_atomic_barrier (mod_s); - if (mod_f == MEMMODEL_RELAXED) + if (is_mm_relaxed (mod_f)) emit_label (XEXP (label2, 0)); } @@ -4653,8 +4653,8 @@ newval = operands[4]; align = operands[5]; is_weak = (operands[6] != const0_rtx); - mod_s = (enum memmodel) INTVAL (operands[7]); - mod_f = (enum memmodel) INTVAL (operands[8]); + mod_s = memmodel_from_int (INTVAL (operands[7])); + mod_f = memmodel_from_int (INTVAL (operands[8])); scratch = operands[9]; mode = GET_MODE (orig_mem); addr = XEXP (orig_mem, 0); @@ -4706,12 +4706,12 @@ emit_unlikely_jump (x, label1); } - if (mod_f != MEMMODEL_RELAXED) + if (!is_mm_relaxed (mod_f)) emit_label (XEXP (label2, 0)); alpha_post_atomic_barrier (mod_s); - if (mod_f == MEMMODEL_RELAXED) + if (is_mm_relaxed (mod_f)) emit_label (XEXP (label2, 0)); } diff -Naur gcc-5.2.0.orig/gcc/config/arm/arm.c gcc-5.2.0/gcc/config/arm/arm.c --- gcc-5.2.0.orig/gcc/config/arm/arm.c 2015-07-06 12:32:07.000000000 -0500 +++ gcc-5.2.0/gcc/config/arm/arm.c 2015-10-27 08:46:15.949493000 -0500 @@ -10269,7 +10269,7 @@ *cost = COSTS_N_INSNS (1); - if (GET_CODE (op0) == NEG) + if (GET_CODE (op0) == NEG && !flag_rounding_math) op0 = XEXP (op0, 0); if (speed_p) @@ -10345,6 +10345,13 @@ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) { + if (GET_CODE (XEXP (x, 0)) == MULT) + { + /* VNMUL. */ + *cost = rtx_cost (XEXP (x, 0), NEG, 0, speed_p); + return true; + } + *cost = COSTS_N_INSNS (1); if (speed_p) *cost += extra_cost->fp[mode != SFmode].neg; @@ -27537,25 +27544,36 @@ return 0; } +/* If X is a CONST_DOUBLE with a value that is a power of 2 whose + log2 is in [1, 32], return that log2. Otherwise return -1. + This is used in the patterns for vcvt.s32.f32 floating-point to + fixed-point conversions. */ + int -vfp3_const_double_for_bits (rtx operand) +vfp3_const_double_for_bits (rtx x) { - REAL_VALUE_TYPE r0; + if (!CONST_DOUBLE_P (x)) + return -1; - if (!CONST_DOUBLE_P (operand)) - return 0; + REAL_VALUE_TYPE r; - REAL_VALUE_FROM_CONST_DOUBLE (r0, operand); - if (exact_real_truncate (DFmode, &r0)) - { - HOST_WIDE_INT value = real_to_integer (&r0); - value = value & 0xffffffff; - if ((value != 0) && ( (value & (value - 1)) == 0)) - return int_log2 (value); - } + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + if (REAL_VALUE_NEGATIVE (r) + || REAL_VALUE_ISNAN (r) + || REAL_VALUE_ISINF (r) + || !real_isinteger (&r, SFmode)) + return -1; - return 0; + HOST_WIDE_INT hwint = exact_log2 (real_to_integer (&r)); + + /* The exact_log2 above will have returned -1 if this is + not an exact log2. */ + if (!IN_RANGE (hwint, 1, 32)) + return -1; + + return hwint; } + /* Emit a memory barrier around an atomic sequence according to MODEL. */ @@ -27678,8 +27696,8 @@ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ if (TARGET_HAVE_LDACQ - && INTVAL (mod_f) == MEMMODEL_ACQUIRE - && INTVAL (mod_s) == MEMMODEL_RELEASE) + && is_mm_acquire (memmodel_from_int (INTVAL (mod_f))) + && is_mm_release (memmodel_from_int (INTVAL (mod_s)))) mod_s = GEN_INT (MEMMODEL_ACQ_REL); switch (mode) @@ -27752,20 +27770,25 @@ oldval = operands[2]; newval = operands[3]; is_weak = (operands[4] != const0_rtx); - mod_s = (enum memmodel) INTVAL (operands[5]); - mod_f = (enum memmodel) INTVAL (operands[6]); + mod_s = memmodel_from_int (INTVAL (operands[5])); + mod_f = memmodel_from_int (INTVAL (operands[6])); scratch = operands[7]; mode = GET_MODE (mem); - bool use_acquire = TARGET_HAVE_LDACQ - && !(mod_s == MEMMODEL_RELAXED - || mod_s == MEMMODEL_CONSUME - || mod_s == MEMMODEL_RELEASE); + bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s); + bool use_acquire = TARGET_HAVE_LDACQ + && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s) + || is_mm_release (mod_s)); + bool use_release = TARGET_HAVE_LDACQ - && !(mod_s == MEMMODEL_RELAXED - || mod_s == MEMMODEL_CONSUME - || mod_s == MEMMODEL_ACQUIRE); + && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s) + || is_mm_acquire (mod_s)); + + /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead, + a full barrier is emitted after the store-release. */ + if (is_armv8_sync) + use_acquire = false; /* Checks whether a barrier is needed and emits one accordingly. */ if (!(use_acquire || use_release)) @@ -27803,14 +27826,15 @@ emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); } - if (mod_f != MEMMODEL_RELAXED) + if (!is_mm_relaxed (mod_f)) emit_label (label2); /* Checks whether a barrier is needed and emits one accordingly. */ - if (!(use_acquire || use_release)) + if (is_armv8_sync + || !(use_acquire || use_release)) arm_post_atomic_barrier (mod_s); - if (mod_f == MEMMODEL_RELAXED) + if (is_mm_relaxed (mod_f)) emit_label (label2); } @@ -27818,21 +27842,26 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, rtx value, rtx model_rtx, rtx cond) { - enum memmodel model = (enum memmodel) INTVAL (model_rtx); + enum memmodel model = memmodel_from_int (INTVAL (model_rtx)); machine_mode mode = GET_MODE (mem); machine_mode wmode = (mode == DImode ? DImode : SImode); rtx_code_label *label; rtx x; + bool is_armv8_sync = arm_arch8 && is_mm_sync (model); + bool use_acquire = TARGET_HAVE_LDACQ - && !(model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_RELEASE); + && !(is_mm_relaxed (model) || is_mm_consume (model) + || is_mm_release (model)); bool use_release = TARGET_HAVE_LDACQ - && !(model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_ACQUIRE); + && !(is_mm_relaxed (model) || is_mm_consume (model) + || is_mm_acquire (model)); + + /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead, + a full barrier is emitted after the store-release. */ + if (is_armv8_sync) + use_acquire = false; /* Checks whether a barrier is needed and emits one accordingly. */ if (!(use_acquire || use_release)) @@ -27904,7 +27933,8 @@ emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label)); /* Checks whether a barrier is needed and emits one accordingly. */ - if (!(use_acquire || use_release)) + if (is_armv8_sync + || !(use_acquire || use_release)) arm_post_atomic_barrier (model); } @@ -28792,6 +28822,38 @@ #undef BRANCH } +/* Returns true if the pattern is a valid symbolic address, which is either a + symbol_ref or (symbol_ref + addend). + + According to the ARM ELF ABI, the initial addend of REL-type relocations + processing MOVW and MOVT instructions is formed by interpreting the 16-bit + literal field of the instruction as a 16-bit signed value in the range + -32768 <= A < 32768. */ + +bool +arm_valid_symbolic_address_p (rtx addr) +{ + rtx xop0, xop1 = NULL_RTX; + rtx tmp = addr; + + if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF) + return true; + + /* (const (plus: symbol_ref const_int)) */ + if (GET_CODE (addr) == CONST) + tmp = XEXP (addr, 0); + + if (GET_CODE (tmp) == PLUS) + { + xop0 = XEXP (tmp, 0); + xop1 = XEXP (tmp, 1); + + if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1)) + return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff); + } + + return false; +} /* Returns true if a valid comparison operation and makes the operands in a form that is valid. */ diff -Naur gcc-5.2.0.orig/gcc/config/arm/arm.md gcc-5.2.0/gcc/config/arm/arm.md --- gcc-5.2.0.orig/gcc/config/arm/arm.md 2015-07-06 12:18:40.000000000 -0500 +++ gcc-5.2.0/gcc/config/arm/arm.md 2015-09-23 05:36:48.364214000 -0500 @@ -5415,7 +5415,7 @@ if (!REG_P (operands[0])) operands[1] = force_reg (DImode, operands[1]); } - if (REG_P (operands[0]) && REGNO (operands[0]) < FIRST_VIRTUAL_REGISTER + if (REG_P (operands[0]) && REGNO (operands[0]) <= LAST_ARM_REGNUM && !HARD_REGNO_MODE_OK (REGNO (operands[0]), DImode)) { /* Avoid LDRD's into an odd-numbered register pair in ARM state @@ -5434,7 +5434,7 @@ gen_highpart (SImode, operands[1])); DONE; } - else if (REG_P (operands[1]) && REGNO (operands[1]) < FIRST_VIRTUAL_REGISTER + else if (REG_P (operands[1]) && REGNO (operands[1]) <= LAST_ARM_REGNUM && !HARD_REGNO_MODE_OK (REGNO (operands[1]), DImode)) { /* Avoid STRD's from an odd-numbered register pair in ARM state @@ -5662,7 +5662,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:SI 2 "general_operand" "i")))] - "arm_arch_thumb2" + "arm_arch_thumb2 && arm_valid_symbolic_address_p (operands[2])" "movt%?\t%0, #:upper16:%c2" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") @@ -6508,7 +6508,7 @@ (define_insn "*arm32_movhf" [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") (match_operand:HF 1 "general_operand" " m,r,r,F"))] - "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) && !arm_restrict_it + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) && ( s_register_operand (operands[0], HFmode) || s_register_operand (operands[1], HFmode))" "* @@ -6546,7 +6546,8 @@ [(set_attr "conds" "unconditional") (set_attr "type" "load1,store1,mov_reg,multiple") (set_attr "length" "4,4,4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "movsf" diff -Naur gcc-5.2.0.orig/gcc/config/arm/arm-protos.h gcc-5.2.0/gcc/config/arm/arm-protos.h --- gcc-5.2.0.orig/gcc/config/arm/arm-protos.h 2015-03-25 00:53:55.000000000 -0500 +++ gcc-5.2.0/gcc/config/arm/arm-protos.h 2015-08-26 08:49:00.639074000 -0500 @@ -312,6 +312,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern bool arm_valid_symbolic_address_p (rtx); extern bool arm_validize_comparison (rtx *, rtx *, rtx *); #endif /* RTX_CODE */ diff -Naur gcc-5.2.0.orig/gcc/config/arm/constraints.md gcc-5.2.0/gcc/config/arm/constraints.md --- gcc-5.2.0.orig/gcc/config/arm/constraints.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/arm/constraints.md 2015-10-27 08:46:15.949493000 -0500 @@ -67,7 +67,8 @@ (define_constraint "j" "A constant suitable for a MOVW instruction. (ARM/Thumb-2)" (and (match_test "TARGET_32BIT && arm_arch_thumb2") - (ior (match_code "high") + (ior (and (match_code "high") + (match_test "arm_valid_symbolic_address_p (XEXP (op, 0))")) (and (match_code "const_int") (match_test "(ival & 0xffff0000) == 0"))))) @@ -338,7 +339,8 @@ "@internal In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32 with bits operation" (and (match_code "const_double") - (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_bits (op)"))) + (match_test "TARGET_32BIT && TARGET_VFP + && vfp3_const_double_for_bits (op) > 0"))) (define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS" "For arm_restrict_it the core registers @code{r0}-@code{r7}. GENERAL_REGS otherwise.") diff -Naur gcc-5.2.0.orig/gcc/config/arm/predicates.md gcc-5.2.0/gcc/config/arm/predicates.md --- gcc-5.2.0.orig/gcc/config/arm/predicates.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/arm/predicates.md 2015-10-27 08:46:15.949493000 -0500 @@ -668,7 +668,7 @@ (define_predicate "const_double_vcvt_power_of_two" (and (match_code "const_double") (match_test "TARGET_32BIT && TARGET_VFP - && vfp3_const_double_for_bits (op)"))) + && vfp3_const_double_for_bits (op) > 0"))) (define_predicate "neon_struct_operand" (and (match_code "mem") diff -Naur gcc-5.2.0.orig/gcc/config/arm/sync.md gcc-5.2.0/gcc/config/arm/sync.md --- gcc-5.2.0.orig/gcc/config/arm/sync.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/arm/sync.md 2015-10-01 04:05:45.272698000 -0500 @@ -73,15 +73,14 @@ VUNSPEC_LDA))] "TARGET_HAVE_LDACQ" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); - if (model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_RELEASE) - return \"ldr\\t%0, %1\"; + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) + return \"ldr%(%)\\t%0, %1\"; else - return \"lda\\t%0, %1\"; + return \"lda%?\\t%0, %1\"; } -) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "atomic_store" [(set (match_operand:QHSI 0 "memory_operand" "=Q") @@ -91,15 +90,14 @@ VUNSPEC_STL))] "TARGET_HAVE_LDACQ" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); - if (model == MEMMODEL_RELAXED - || model == MEMMODEL_CONSUME - || model == MEMMODEL_ACQUIRE) - return \"str\t%1, %0\"; + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) + return \"str%(%)\t%1, %0\"; else - return \"stl\t%1, %0\"; + return \"stl%?\t%1, %0\"; } -) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) ;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic, ;; even for a 64-bit aligned address. Instead we use a ldrexd unparied @@ -110,10 +108,10 @@ (match_operand:SI 2 "const_int_operand")] ;; model "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); expand_mem_thread_fence (model); emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1])); - if (model == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (model)) expand_mem_thread_fence (model); DONE; }) diff -Naur gcc-5.2.0.orig/gcc/config/arm/vfp.md gcc-5.2.0/gcc/config/arm/vfp.md --- gcc-5.2.0.orig/gcc/config/arm/vfp.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/arm/vfp.md 2015-08-03 09:27:43.641873000 -0500 @@ -770,6 +770,17 @@ [(set (match_operand:SF 0 "s_register_operand" "=t") (mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t")) (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && !flag_rounding_math" + "vnmul%?.f32\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmuls")] +) + +(define_insn "*negmulsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (neg:SF (mult:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t"))))] "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "vnmul%?.f32\\t%0, %1, %2" [(set_attr "predicable" "yes") @@ -781,6 +792,18 @@ [(set (match_operand:DF 0 "s_register_operand" "=w") (mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w")) (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE + && !flag_rounding_math" + "vnmul%?.f64\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmuld")] +) + +(define_insn "*negmuldf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (neg:DF (mult:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w"))))] "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "vnmul%?.f64\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") diff -Naur gcc-5.2.0.orig/gcc/config/avr/avr.c gcc-5.2.0/gcc/config/avr/avr.c --- gcc-5.2.0.orig/gcc/config/avr/avr.c 2015-03-10 04:50:41.000000000 -0500 +++ gcc-5.2.0/gcc/config/avr/avr.c 2015-08-20 09:15:30.212999000 -0500 @@ -9272,10 +9272,10 @@ { if (TYPE_P (node)) error ("%qT uses address space %qs beyond flash of %d KiB", - node, avr_addrspace[as].name, avr_n_flash); + node, avr_addrspace[as].name, 64 * avr_n_flash); else error ("%s %q+D uses address space %qs beyond flash of %d KiB", - reason, node, avr_addrspace[as].name, avr_n_flash); + reason, node, avr_addrspace[as].name, 64 * avr_n_flash); } else { @@ -9322,7 +9322,7 @@ if (avr_addrspace[as].segment >= avr_n_flash) { error ("variable %q+D located in address space %qs beyond flash " - "of %d KiB", node, avr_addrspace[as].name, avr_n_flash); + "of %d KiB", node, avr_addrspace[as].name, 64 * avr_n_flash); } else if (!AVR_HAVE_LPM && avr_addrspace[as].pointer_size > 2) { diff -Naur gcc-5.2.0.orig/gcc/config/avr/avr-dimode.md gcc-5.2.0/gcc/config/avr/avr-dimode.md --- gcc-5.2.0.orig/gcc/config/avr/avr-dimode.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/avr/avr-dimode.md 2015-07-21 12:29:47.408167000 -0500 @@ -461,7 +461,8 @@ (match_operand:SI 2 "general_operand" "") ;; Just to mention the iterator (clobber (any_extend:SI (match_dup 1)))])] - "avr_have_dimode" + "avr_have_dimode + && AVR_HAVE_MUL" { avr_fix_inputs (operands, 1 << 2, regmask (SImode, 22)); emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]); @@ -480,7 +481,8 @@ (any_extend:DI (reg:SI 22)))) (clobber (reg:HI REG_X)) (clobber (reg:HI REG_Z))] - "avr_have_dimode" + "avr_have_dimode + && AVR_HAVE_MUL" "%~call __mulsidi3" [(set_attr "adjust_len" "call") (set_attr "cc" "clobber")]) diff -Naur gcc-5.2.0.orig/gcc/config/i386/i386.c gcc-5.2.0/gcc/config/i386/i386.c --- gcc-5.2.0.orig/gcc/config/i386/i386.c 2015-06-18 05:56:43.000000000 -0500 +++ gcc-5.2.0/gcc/config/i386/i386.c 2015-10-22 01:52:00.764034000 -0500 @@ -4055,11 +4055,11 @@ if (opts_set->x_ix86_incoming_stack_boundary_arg) { if (opts->x_ix86_incoming_stack_boundary_arg - < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2) + < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2) || opts->x_ix86_incoming_stack_boundary_arg > 12) error ("-mincoming-stack-boundary=%d is not between %d and 12", opts->x_ix86_incoming_stack_boundary_arg, - TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2); + TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2); else { ix86_user_incoming_stack_boundary @@ -5201,6 +5201,14 @@ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); } ix86_previous_fndecl = fndecl; + + /* 64-bit MS and SYSV ABI have different set of call used registers. + Avoid expensive re-initialization of init_regs each time we switch + function context. */ + if (TARGET_64BIT + && (call_used_regs[SI_REG] + == (cfun->machine->call_abi == MS_ABI))) + reinit_regs (); } @@ -6344,17 +6352,6 @@ cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl)); } -/* 64-bit MS and SYSV ABI have different set of call used registers. Avoid - expensive re-initialization of init_regs each time we switch function context - since this is needed only during RTL expansion. */ -static void -ix86_maybe_switch_abi (void) -{ - if (TARGET_64BIT && - call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI)) - reinit_regs (); -} - /* Return 1 if pseudo register should be created and used to hold GOT address for PIC code. */ bool @@ -8211,7 +8208,8 @@ case SI_REG: return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; - case FIRST_BND_REG: + case BND0_REG: + case BND1_REG: return chkp_function_instrumented_p (current_function_decl); /* Complex values are returned in %st(0)/%st(1) pair. */ @@ -10104,11 +10102,14 @@ frame->nregs = ix86_nsaved_regs (); frame->nsseregs = ix86_nsaved_sseregs (); - /* 64-bit MS ABI seem to require stack alignment to be always 16 except for - function prologues and leaf. */ + /* 64-bit MS ABI seem to require stack alignment to be always 16, + except for function prologues, leaf functions and when the defult + incoming stack boundary is overriden at command line or via + force_align_arg_pointer attribute. */ if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128) && (!crtl->is_leaf || cfun->calls_alloca != 0 - || ix86_current_function_calls_tls_descriptor)) + || ix86_current_function_calls_tls_descriptor + || ix86_incoming_stack_boundary < 128)) { crtl->preferred_stack_boundary = 128; crtl->stack_alignment_needed = 128; @@ -10207,10 +10208,14 @@ if (frame->nsseregs) { /* The only ABI that has saved SSE registers (Win64) also has a - 16-byte aligned default stack, and thus we don't need to be - within the re-aligned local stack frame to save them. */ - gcc_assert (INCOMING_STACK_BOUNDARY >= 128); - offset = (offset + 16 - 1) & -16; + 16-byte aligned default stack, and thus we don't need to be + within the re-aligned local stack frame to save them. In case + incoming stack boundary is aligned to less than 16 bytes, + unaligned move of SSE register will be emitted, so there is + no point to round up the SSE register save area outside the + re-aligned local stack frame to 16 bytes. */ + if (ix86_incoming_stack_boundary >= 128) + offset = (offset + 16 - 1) & -16; offset += frame->nsseregs * 16; } frame->sse_reg_save_offset = offset; @@ -10220,7 +10225,7 @@ sure that no value happens to be the same before and after, force the alignment computation below to add a non-zero value. */ if (stack_realign_fp) - offset = (offset + stack_alignment_needed) & -stack_alignment_needed; + offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed; /* Va-arg area */ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; @@ -10433,15 +10438,24 @@ { struct machine_function *m = cfun->machine; rtx reg = gen_rtx_REG (mode, regno); + rtx unspec = NULL_RTX; rtx mem, addr, base, insn; + unsigned int align; addr = choose_baseaddr (cfa_offset); mem = gen_frame_mem (mode, addr); - /* For SSE saves, we need to indicate the 128-bit alignment. */ - set_mem_align (mem, GET_MODE_ALIGNMENT (mode)); + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); + + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned store. */ + if (mode == V4SFmode && align < 128) + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU); - insn = emit_move_insn (mem, reg); + insn = emit_insn (gen_rtx_SET (VOIDmode, mem, unspec ? unspec : reg)); RTX_FRAME_RELATED_P (insn) = 1; base = addr; @@ -10489,6 +10503,9 @@ mem = gen_rtx_MEM (mode, addr); add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg)); } + else if (unspec) + add_reg_note (insn, REG_CFA_EXPRESSION, + gen_rtx_SET (VOIDmode, mem, reg)); } /* Emit code to save registers using MOV insns. @@ -10705,6 +10722,25 @@ } } +/* Handle a "force_align_arg_pointer" attribute. */ + +static tree +ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, + tree, int, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + /* Return minimum incoming stack alignment. */ static unsigned int @@ -10719,7 +10755,6 @@ if -mstackrealign is used, it isn't used for sibcall check and estimated stack alignment is 128bit. */ else if (!sibcall - && !TARGET_64BIT && ix86_force_align_arg_pointer && crtl->stack_alignment_estimated == 128) incoming_stack_boundary = MIN_STACK_BOUNDARY; @@ -11578,7 +11613,7 @@ pointer is no longer valid. As for the value of sp_offset, see ix86_compute_frame_layout, which we need to match in order to pass verification of stack_pointer_offset at the end. */ - m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes; + m->fs.sp_offset = (m->fs.sp_offset + align_bytes - 1) & -align_bytes; m->fs.sp_valid = false; } @@ -11991,11 +12026,26 @@ { rtx reg = gen_rtx_REG (V4SFmode, regno); rtx mem; + unsigned int align; mem = choose_baseaddr (cfa_offset); mem = gen_rtx_MEM (V4SFmode, mem); - set_mem_align (mem, 128); - emit_move_insn (reg, mem); + + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); + + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned load. */ + if (align < 128) + { + rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem), + UNSPEC_LOADU); + emit_insn (gen_rtx_SET (VOIDmode, reg, unspec)); + } + else + emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset); @@ -25140,7 +25190,8 @@ dst = change_address (dst, BLKmode, destreg); set_mem_align (dst, desired_align * BITS_PER_UNIT); epilogue_size_needed = 0; - if (need_zero_guard && !min_size) + if (need_zero_guard + && min_size < (unsigned HOST_WIDE_INT) size_needed) { /* It is possible that we copied enough so the main loop will not execute. */ @@ -25272,7 +25323,7 @@ max_size -= align_bytes; } if (need_zero_guard - && !min_size + && min_size < (unsigned HOST_WIDE_INT) size_needed && (count < (unsigned HOST_WIDE_INT) size_needed || (align_bytes == 0 && count < ((unsigned HOST_WIDE_INT) size_needed @@ -25557,7 +25608,7 @@ /* Avoid branch in fixing the byte. */ tmpreg = gen_lowpart (QImode, tmpreg); - emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); + emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); tmp = gen_rtx_REG (CCmode, FLAGS_REG); cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); @@ -39522,60 +39573,57 @@ return target; case IX86_BUILTIN_SBB32: - icode = CODE_FOR_subsi3_carry; + icode = CODE_FOR_subborrowsi; mode0 = SImode; - goto addcarryx; + goto handlecarry; case IX86_BUILTIN_SBB64: - icode = CODE_FOR_subdi3_carry; + icode = CODE_FOR_subborrowdi; mode0 = DImode; - goto addcarryx; + goto handlecarry; case IX86_BUILTIN_ADDCARRYX32: - icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry; + icode = CODE_FOR_addcarrysi; mode0 = SImode; - goto addcarryx; + goto handlecarry; case IX86_BUILTIN_ADDCARRYX64: - icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry; + icode = CODE_FOR_addcarrydi; mode0 = DImode; -addcarryx: + handlecarry: arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ - op0 = gen_reg_rtx (QImode); - - /* Generate CF from input operand. */ op1 = expand_normal (arg0); op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); - emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx)); - /* Gen ADCX instruction to compute X+Y+CF. */ op2 = expand_normal (arg1); - op3 = expand_normal (arg2); - - if (!REG_P (op2)) + if (!register_operand (op2, mode0)) op2 = copy_to_mode_reg (mode0, op2); - if (!REG_P (op3)) - op3 = copy_to_mode_reg (mode0, op3); - - op0 = gen_reg_rtx (mode0); - op4 = gen_rtx_REG (CCCmode, FLAGS_REG); - pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx); - emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat)); + op3 = expand_normal (arg2); + if (!register_operand (op3, mode0)) + op3 = copy_to_mode_reg (mode0, op3); - /* Store the result. */ op4 = expand_normal (arg3); if (!address_operand (op4, VOIDmode)) { op4 = convert_memory_address (Pmode, op4); op4 = copy_addr_to_reg (op4); } - emit_move_insn (gen_rtx_MEM (mode0, op4), op0); + + /* Generate CF from input operand. */ + emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); + + /* Generate instruction that consumes CF. */ + op0 = gen_reg_rtx (mode0); + + op1 = gen_rtx_REG (CCCmode, FLAGS_REG); + pat = gen_rtx_LTU (mode0, op1, const0_rtx); + emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat)); /* Return current CF value. */ if (target == 0) @@ -39583,6 +39631,10 @@ PUT_MODE (pat, QImode); emit_insn (gen_rtx_SET (VOIDmode, target, pat)); + + /* Store the result. */ + emit_move_insn (gen_rtx_MEM (mode0, op4), op0); + return target; case IX86_BUILTIN_READ_FLAGS: @@ -46836,7 +46888,7 @@ true }, /* force_align_arg_pointer says this function realigns the stack at entry. */ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, - false, true, true, ix86_handle_cconv_attribute, false }, + false, true, true, ix86_handle_force_align_arg_pointer_attribute, false }, #if TARGET_DLLIMPORT_DECL_ATTRIBUTES { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false }, { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false }, @@ -48957,6 +49009,62 @@ return true; } +/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even + and extract-odd permutations of two V64QI operands + with two "shifts", two "truncs" and one "concat" insns for "odd" + and two "truncs" and one concat insn for "even." + Have already failed all two instruction sequences. */ + +static bool +expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d) +{ + rtx t1, t2, t3, t4; + unsigned i, odd, nelt = d->nelt; + + if (!TARGET_AVX512BW + || d->one_operand_p + || d->vmode != V64QImode) + return false; + + /* Check that permutation is even or odd. */ + odd = d->perm[0]; + if (odd > 1) + return false; + + for (i = 1; i < nelt; ++i) + if (d->perm[i] != 2 * i + odd) + return false; + + if (d->testing_p) + return true; + + + if (odd) + { + t1 = gen_reg_rtx (V32HImode); + t2 = gen_reg_rtx (V32HImode); + emit_insn (gen_lshrv32hi3 (t1, + gen_lowpart (V32HImode, d->op0), + GEN_INT (8))); + emit_insn (gen_lshrv32hi3 (t2, + gen_lowpart (V32HImode, d->op1), + GEN_INT (8))); + } + else + { + t1 = gen_lowpart (V32HImode, d->op0); + t2 = gen_lowpart (V32HImode, d->op1); + } + + t3 = gen_reg_rtx (V32QImode); + t4 = gen_reg_rtx (V32QImode); + emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1)); + emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2)); + emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4)); + + return true; +} + /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even and extract-odd permutations. */ @@ -49059,6 +49167,9 @@ case V32QImode: return expand_vec_perm_even_odd_pack (d); + case V64QImode: + return expand_vec_perm_even_odd_trunc (d); + case V4DImode: if (!TARGET_AVX2) { @@ -49520,6 +49631,8 @@ /* Try sequences of four instructions. */ + if (expand_vec_perm_even_odd_trunc (d)) + return true; if (expand_vec_perm_vpshufb2_vpermq (d)) return true; @@ -50335,15 +50448,20 @@ unsigned int size = INTVAL (operands[1]); unsigned int pos = INTVAL (operands[2]); + if (GET_CODE (src) == SUBREG) + { + /* Reject non-lowpart subregs. */ + if (SUBREG_BYTE (src) != 0) + return false; + src = SUBREG_REG (src); + } + if (GET_CODE (dst) == SUBREG) { pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; dst = SUBREG_REG (dst); } - if (GET_CODE (src) == SUBREG) - src = SUBREG_REG (src); - switch (GET_MODE (dst)) { case V16QImode: @@ -50391,6 +50509,10 @@ return false; } + /* Reject insertions to misaligned positions. */ + if (pos & (size-1)) + return false; + rtx d = dst; if (GET_MODE (dst) != dstmode) d = gen_reg_rtx (dstmode); @@ -51516,7 +51638,7 @@ static unsigned HOST_WIDE_INT ix86_memmodel_check (unsigned HOST_WIDE_INT val) { - unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK; + enum memmodel model = memmodel_from_int (val); bool strong; if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE @@ -51527,14 +51649,14 @@ "Unknown architecture specific memory model"); return MEMMODEL_SEQ_CST; } - strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST); - if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong)) + strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model)); + if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) { warning (OPT_Winvalid_memory_model, "HLE_ACQUIRE not used with ACQUIRE or stronger memory model"); return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; } - if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong)) + if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) { warning (OPT_Winvalid_memory_model, "HLE_RELEASE not used with RELEASE or stronger memory model"); @@ -52307,9 +52429,6 @@ #undef TARGET_CAN_INLINE_P #define TARGET_CAN_INLINE_P ix86_can_inline_p -#undef TARGET_EXPAND_TO_RTL_HOOK -#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi - #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p diff -Naur gcc-5.2.0.orig/gcc/config/i386/i386.h gcc-5.2.0/gcc/config/i386/i386.h --- gcc-5.2.0.orig/gcc/config/i386/i386.h 2015-06-18 05:56:43.000000000 -0500 +++ gcc-5.2.0/gcc/config/i386/i386.h 2015-10-12 11:29:37.933151000 -0500 @@ -747,7 +747,7 @@ #define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32) /* Minimum stack boundary. */ -#define MIN_STACK_BOUNDARY (TARGET_64BIT ? (TARGET_SSE ? 128 : 64) : 32) +#define MIN_STACK_BOUNDARY BITS_PER_WORD /* Boundary (in *bits*) on which the stack pointer prefers to be aligned; the compiler cannot rely on having this alignment. */ diff -Naur gcc-5.2.0.orig/gcc/config/i386/i386.md gcc-5.2.0/gcc/config/i386/i386.md --- gcc-5.2.0.orig/gcc/config/i386/i386.md 2015-06-26 05:30:37.000000000 -0500 +++ gcc-5.2.0/gcc/config/i386/i386.md 2015-09-22 04:49:20.487735000 -0500 @@ -102,7 +102,6 @@ UNSPEC_SAHF UNSPEC_PARITY UNSPEC_FSTCW - UNSPEC_ADD_CARRY UNSPEC_FLDCW UNSPEC_REP UNSPEC_LD_MPIC ; load_macho_picbase @@ -783,7 +782,8 @@ (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, - fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq" + fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq, + avx512vl,noavx512vl" (const_string "base")) (define_attr "enabled" "" @@ -818,6 +818,8 @@ (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW") (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ") (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") + (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL") + (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL") ] (const_int 1))) @@ -844,8 +846,6 @@ (define_code_attr plusminus_mnemonic [(plus "add") (ss_plus "adds") (us_plus "addus") (minus "sub") (ss_minus "subs") (us_minus "subus")]) -(define_code_attr plusminus_carry_mnemonic - [(plus "adc") (minus "sbb")]) (define_code_attr multdiv_mnemonic [(mult "mul") (div "div")]) @@ -5051,11 +5051,11 @@ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). Assemble the 64-bit DImode value in an xmm register. */ emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), - gen_rtx_SUBREG (SImode, operands[1], 0))); + gen_lowpart (SImode, operands[1]))); emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), - gen_rtx_SUBREG (SImode, operands[1], 4))); + gen_highpart (SImode, operands[1]))); emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], - operands[4])); + operands[4])); operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); }) @@ -5213,46 +5213,21 @@ "ix86_binary_operator_ok (PLUS, mode, operands)" "#" "reload_completed" - [(parallel [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 1) (match_dup 2)] - UNSPEC_ADD_CARRY)) + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (plus:DWIH - (match_dup 4) (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (match_dup 5)))) + (match_dup 4)) + (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]);") -(define_insn "*add3_cc" - [(set (reg:CC FLAGS_REG) - (unspec:CC - [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI48 2 "" "r,rm")] - UNSPEC_ADD_CARRY)) - (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") - (plus:SWI48 (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" - "add{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") - (set_attr "mode" "")]) - -(define_insn "addqi3_cc" - [(set (reg:CC FLAGS_REG) - (unspec:CC - [(match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qn,qm")] - UNSPEC_ADD_CARRY)) - (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") - (plus:QI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, QImode, operands)" - "add{b}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - (define_insn "*add_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r") (plus:SWI48 @@ -6160,10 +6135,10 @@ (minus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (minus:DWIH - (match_dup 4) - (plus:DWIH - (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (match_dup 5)))) + (minus:DWIH + (match_dup 4) + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) + (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]);") @@ -6327,29 +6302,17 @@ ;; Add with carry and subtract with borrow -(define_expand "3_carry" - [(parallel - [(set (match_operand:SWI 0 "nonimmediate_operand") - (plusminus:SWI - (match_operand:SWI 1 "nonimmediate_operand") - (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" - [(match_operand 3 "flags_reg_operand") - (const_int 0)]) - (match_operand:SWI 2 "")))) - (clobber (reg:CC FLAGS_REG))])] - "ix86_binary_operator_ok (, mode, operands)") - -(define_insn "*3_carry" +(define_insn "add3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") - (plusminus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (plus:SWI (plus:SWI - (match_operator 3 "ix86_carry_flag_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SWI 2 "" ",m")))) + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) + (match_operand:SWI 2 "" ",m"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, mode, operands)" - "{}\t{%2, %0|%0, %2}" + "adc{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") @@ -6358,10 +6321,11 @@ (define_insn "*addsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (plus:SI (match_operator 3 "ix86_carry_flag_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 2 "x86_64_general_operand" "rme"))))) + (plus:SI + (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 1 "register_operand" "%0")) + (match_operand:SI 2 "x86_64_general_operand" "rme")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %k0|%k0, %2}" @@ -6370,45 +6334,96 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) +;; There is no point to generate ADCX instruction. ADC is shorter and faster. + +(define_insn "addcarry" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 1 "nonimmediate_operand" "%0")) + (match_operand:SWI48 2 "nonimmediate_operand" "rm")) + (match_dup 1))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (plus:SWI48 (plus:SWI48 (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "adc{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "")]) + +(define_insn "sub3_carry" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (minus:SWI + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)])) + (match_operand:SWI 2 "" ",m"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, mode, operands)" + "sbb{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "")]) + (define_insn "*subsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "0") - (plus:SI (match_operator 3 "ix86_carry_flag_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 2 "x86_64_general_operand" "rme"))))) + (minus:SI + (minus:SI + (match_operand:SI 1 "register_operand" "0") + (match_operator:SI 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)])) + (match_operand:SI 2 "x86_64_general_operand" "rme")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) - -;; ADCX instruction -(define_insn "adcx3" +(define_insn "subborrow" [(set (reg:CCC FLAGS_REG) (compare:CCC + (match_operand:SWI48 1 "nonimmediate_operand" "0") (plus:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0") - (plus:SWI48 - (match_operator 4 "ix86_carry_flag_operator" - [(match_operand 3 "flags_reg_operand") (const_int 0)]) - (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) - (const_int 0))) + (match_operator:SWI48 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 2 "nonimmediate_operand" "rm")))) (set (match_operand:SWI48 0 "register_operand" "=r") - (plus:SWI48 (match_dup 1) - (plus:SWI48 (match_op_dup 4 - [(match_dup 3) (const_int 0)]) - (match_dup 2))))] - "TARGET_ADX && ix86_binary_operator_ok (PLUS, mode, operands)" - "adcx\t{%2, %0|%0, %2}" + (minus:SWI48 (minus:SWI48 (match_dup 1) + (match_op_dup 4 + [(match_dup 3) (const_int 0)])) + (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, mode, operands)" + "sbb{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") (set_attr "mode" "")]) ;; Overflow setting add instructions +(define_expand "addqi3_cconly_overflow" + [(parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:QI + (match_operand:QI 0 "nonimmediate_operand") + (match_operand:QI 1 "general_operand")) + (match_dup 0))) + (clobber (match_scratch:QI 2))])] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))") + (define_insn "*add3_cconly_overflow" [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -8462,11 +8477,11 @@ (clobber (reg:CC FLAGS_REG))] "TARGET_AVX512F && reload_completed" [(parallel [(set (match_dup 0) - (xor:HI (match_dup 0) - (match_dup 1))) + (xor:SWI1248x (match_dup 0) + (match_dup 1))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) - (not:HI (match_dup 0)))]) + (not:SWI1248x (match_dup 0)))]) ;;There are kortrest[bdq] but no intrinsics for them. ;;We probably don't need to implement them. @@ -8500,7 +8515,7 @@ [(set (match_operand:HI 0 "register_operand" "=k") (ior:HI (ashift:HI - (match_operand:HI 1 "register_operand" "k") + (zero_extend:HI (match_operand:QI 1 "register_operand" "k")) (const_int 8)) (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))] "TARGET_AVX512F" @@ -8513,9 +8528,9 @@ [(set (match_operand:SI 0 "register_operand" "=k") (ior:SI (ashift:SI - (match_operand:SI 1 "register_operand" "k") + (zero_extend:SI (match_operand:HI 1 "register_operand" "k")) (const_int 16)) - (zero_extend:SI (subreg:HI (match_operand:SI 2 "register_operand" "k") 0))))] + (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))] "TARGET_AVX512BW" "kunpckwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "SI")]) @@ -8524,9 +8539,9 @@ [(set (match_operand:DI 0 "register_operand" "=k") (ior:DI (ashift:DI - (match_operand:DI 1 "register_operand" "k") + (zero_extend:DI (match_operand:SI 1 "register_operand" "k")) (const_int 32)) - (zero_extend:DI (subreg:SI (match_operand:DI 2 "register_operand" "k") 0))))] + (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))] "TARGET_AVX512BW" "kunpckdq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "DI")]) @@ -8754,9 +8769,9 @@ (set (match_dup 0) (neg:DWIH (match_dup 1)))]) (parallel [(set (match_dup 2) - (plus:DWIH (match_dup 3) - (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (const_int 0)))) + (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 2) @@ -13237,7 +13252,8 @@ (call:P (mem:QI (match_operand 2 "constant_call_address_operand" "Bz")) (match_operand 3))) - (unspec:P [(match_operand 1 "tls_symbolic_operand")] + (unspec:P [(match_operand 1 "tls_symbolic_operand") + (reg:P SP_REG)] UNSPEC_TLS_GD)] "TARGET_64BIT" { @@ -13261,8 +13277,9 @@ (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b") (match_operand:DI 3 "immediate_operand" "i"))) (match_operand 4))) - (unspec:DI [(match_operand 1 "tls_symbolic_operand")] - UNSPEC_TLS_GD)] + (unspec:DI [(match_operand 1 "tls_symbolic_operand") + (reg:DI SP_REG)] + UNSPEC_TLS_GD)] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[3]) == CONST && GET_CODE (XEXP (operands[3], 0)) == UNSPEC @@ -13283,7 +13300,8 @@ (call:P (mem:QI (match_operand 2)) (const_int 0))) - (unspec:P [(match_operand 1 "tls_symbolic_operand")] + (unspec:P [(match_operand 1 "tls_symbolic_operand") + (reg:P SP_REG)] UNSPEC_TLS_GD)])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -13333,7 +13351,7 @@ (call:P (mem:QI (match_operand 1 "constant_call_address_operand" "Bz")) (match_operand 2))) - (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT" { output_asm_insn @@ -13351,7 +13369,7 @@ (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") (match_operand:DI 2 "immediate_operand" "i"))) (match_operand 3))) - (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[2]) == CONST && GET_CODE (XEXP (operands[2], 0)) == UNSPEC @@ -13372,7 +13390,7 @@ (call:P (mem:QI (match_operand 1)) (const_int 0))) - (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -17431,8 +17449,8 @@ ;; lifetime information then. (define_peephole2 - [(set (match_operand:SWI124 0 "nonimmediate_operand") - (not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand")))] + [(set (match_operand:SWI124 0 "nonimmediate_gr_operand") + (not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))] "optimize_insn_for_speed_p () && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) @@ -17576,8 +17594,10 @@ [(match_dup 0) (match_operand 2 "memory_operand")]))] "REGNO (operands[0]) != REGNO (operands[1]) - && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) - || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))" + && ((MMX_REGNO_P (REGNO (operands[0])) + && MMX_REGNO_P (REGNO (operands[1]))) + || (SSE_REGNO_P (REGNO (operands[0])) + && SSE_REGNO_P (REGNO (operands[1]))))" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) @@ -17725,7 +17745,7 @@ (match_operand 1 "const0_operand"))] "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) - && GENERAL_REG_P (operands[0]) + && GENERAL_REGNO_P (REGNO (operands[0])) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] @@ -17746,6 +17766,7 @@ [(set (match_operand:SWI248 0 "register_operand") (const_int -1))] "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) + && GENERAL_REGNO_P (REGNO (operands[0])) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) (clobber (reg:CC FLAGS_REG))])] @@ -18113,11 +18134,13 @@ operands[1] = gen_rtx_PLUS (word_mode, base, gen_rtx_MULT (word_mode, index, GEN_INT (scale))); - operands[5] = base; if (mode != word_mode) operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + + operands[5] = base; if (op1mode != word_mode) - operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0); + operands[5] = gen_lowpart (op1mode, operands[5]); + operands[0] = dest; }) diff -Naur gcc-5.2.0.orig/gcc/config/i386/intelmic-mkoffload.c gcc-5.2.0/gcc/config/i386/intelmic-mkoffload.c --- gcc-5.2.0.orig/gcc/config/i386/intelmic-mkoffload.c 2015-04-06 07:40:28.000000000 -0500 +++ gcc-5.2.0/gcc/config/i386/intelmic-mkoffload.c 2015-09-07 10:32:23.683954000 -0500 @@ -453,17 +453,18 @@ fork_execute (objcopy_argv[0], CONST_CAST (char **, objcopy_argv), false); /* Objcopy has created symbols, containing the input file name with - special characters replaced with '_'. We are going to rename these - new symbols. */ + non-alphanumeric characters replaced by underscores. + We are going to rename these new symbols. */ size_t symbol_name_len = strlen (target_so_filename); char *symbol_name = XALLOCAVEC (char, symbol_name_len + 1); - for (size_t i = 0; i <= symbol_name_len; i++) + for (size_t i = 0; i < symbol_name_len; i++) { char c = target_so_filename[i]; - if ((c == '/') || (c == '.')) + if (!ISALNUM (c)) c = '_'; symbol_name[i] = c; } + symbol_name[symbol_name_len] = '\0'; char *opt_for_objcopy[3]; opt_for_objcopy[0] = XALLOCAVEC (char, sizeof ("_binary__start=") diff -Naur gcc-5.2.0.orig/gcc/config/i386/predicates.md gcc-5.2.0/gcc/config/i386/predicates.md --- gcc-5.2.0.orig/gcc/config/i386/predicates.md 2015-01-31 09:30:30.000000000 -0600 +++ gcc-5.2.0/gcc/config/i386/predicates.md 2015-07-17 11:27:58.471071000 -0500 @@ -37,6 +37,12 @@ (and (match_code "reg") (match_test "GENERAL_REG_P (op)"))) +;; True if the operand is a nonimmediate operand with GENERAL class register. +(define_predicate "nonimmediate_gr_operand" + (if_then_else (match_code "reg") + (match_test "GENERAL_REGNO_P (REGNO (op))") + (match_operand 0 "nonimmediate_operand"))) + ;; Return true if OP is a register operand other than an i387 fp register. (define_predicate "register_and_not_fp_reg_operand" (and (match_code "reg") diff -Naur gcc-5.2.0.orig/gcc/config/i386/sol2.h gcc-5.2.0/gcc/config/i386/sol2.h --- gcc-5.2.0.orig/gcc/config/i386/sol2.h 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/i386/sol2.h 2015-10-01 07:01:18.897040000 -0500 @@ -86,13 +86,10 @@ #endif #endif -#undef ENDFILE_SPEC -#define ENDFILE_SPEC \ - "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ - %{mpc32:crtprec32.o%s} \ +#define ENDFILE_ARCH_SPEC \ + "%{mpc32:crtprec32.o%s} \ %{mpc64:crtprec64.o%s} \ - %{mpc80:crtprec80.o%s} \ - crtend.o%s crtn.o%s" + %{mpc80:crtprec80.o%s}" #define SUBTARGET_CPU_EXTRA_SPECS \ { "cpp_subtarget", CPP_SUBTARGET_SPEC }, \ diff -Naur gcc-5.2.0.orig/gcc/config/i386/sse.md gcc-5.2.0/gcc/config/i386/sse.md --- gcc-5.2.0.orig/gcc/config/i386/sse.md 2015-06-08 05:28:40.000000000 -0500 +++ gcc-5.2.0/gcc/config/i386/sse.md 2015-10-12 06:03:56.562174000 -0500 @@ -375,8 +375,8 @@ [(V16HI "TARGET_AVX2") V8HI (V8SI "TARGET_AVX2") V4SI]) -(define_mode_iterator VI124_AVX512F - [(V32QI "TARGET_AVX2") V16QI +(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) @@ -392,9 +392,9 @@ [(V8SI "TARGET_AVX2") V4SI (V4DI "TARGET_AVX2") V2DI]) -(define_mode_iterator VI248_AVX2_8_AVX512F - [(V16HI "TARGET_AVX2") V8HI - (V8SI "TARGET_AVX2") V4SI +(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI248_AVX512BW_AVX512VL @@ -410,6 +410,14 @@ [(V16SI "TARGET_AVX512F") V8SI V4SI (V8DI "TARGET_AVX512F") V4DI V2DI]) +(define_mode_iterator VI48_AVX_AVX512F + [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) + +(define_mode_iterator VI12_AVX_AVX512F + [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI]) + (define_mode_iterator V48_AVX2 [V4SF V2DF V8SF V4DF @@ -1078,9 +1086,9 @@ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). Assemble the 64-bit DImode value in an xmm register. */ emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), - gen_rtx_SUBREG (SImode, operands[1], 0))); + gen_lowpart (SImode, operands[1]))); emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), - gen_rtx_SUBREG (SImode, operands[1], 4))); + gen_highpart (SImode, operands[1]))); emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], operands[2])); } @@ -8490,42 +8498,48 @@ (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) (define_insn "vec_dupv2df" - [(set (match_operand:V2DF 0 "register_operand" "=x,v") + [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") (vec_duplicate:V2DF - (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))] + (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))] "TARGET_SSE2 && " "@ unpcklpd\t%0, %0 - %vmovddup\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,sse3") + %vmovddup\t{%1, %0|%0, %1} + vmovddup\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,sse3,avx512vl") (set_attr "type" "sselog1") - (set_attr "prefix" "orig,maybe_vex") - (set_attr "mode" "V2DF,DF")]) + (set_attr "prefix" "orig,maybe_vex,evex") + (set_attr "mode" "V2DF,DF,DF")]) (define_insn "*vec_concatv2df" - [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x") + [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x") (vec_concat:V2DF - (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0") - (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))] + (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0") + (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m,C,x,m")))] "TARGET_SSE" "@ unpcklpd\t{%2, %0|%0, %2} vunpcklpd\t{%2, %1, %0|%0, %1, %2} + vunpcklpd\t{%2, %1, %0|%0, %1, %2} %vmovddup\t{%1, %0|%0, %1} + vmovddup\t{%1, %0|%0, %1} movhpd\t{%2, %0|%0, %2} vmovhpd\t{%2, %1, %0|%0, %1, %2} %vmovsd\t{%1, %0|%0, %1} movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2}" - [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx") + [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx") (set (attr "type") (if_then_else - (eq_attr "alternative" "0,1,2") + (eq_attr "alternative" "0,1,2,3,4") (const_string "sselog") (const_string "ssemov"))) - (set_attr "prefix_data16" "*,*,*,1,*,*,*,*") - (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig") - (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")]) + (set (attr "prefix_data16") + (if_then_else (eq_attr "alternative" "5") + (const_string "1") + (const_string "*"))) + (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig") + (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -8575,7 +8589,7 @@ (match_operand: 2 "register_operand")))] "TARGET_AVX512F") -(define_insn "*avx512bw_v32hiv32qi2" +(define_insn "avx512bw_v32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") (any_truncate:V32QI (match_operand:V32HI 1 "register_operand" "v,v")))] @@ -10911,10 +10925,10 @@ }) (define_insn "3" - [(set (match_operand:VI 0 "register_operand" "=x,v") - (any_logic:VI - (match_operand:VI 1 "nonimmediate_operand" "%0,v") - (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))] + [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v") + (any_logic:VI48_AVX_AVX512F + (match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v") + (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && && ix86_binary_operator_ok (, mode, operands)" { @@ -10943,24 +10957,120 @@ case V4DImode: case V4SImode: case V2DImode: - if (TARGET_AVX512VL) + tmp = TARGET_AVX512VL ? "p" : "p"; + break; + default: + gcc_unreachable (); + } + break; + + case MODE_V8SF: + gcc_assert (TARGET_AVX); + case MODE_V4SF: + gcc_assert (TARGET_SSE); + gcc_assert (!); + tmp = "ps"; + break; + + default: + gcc_unreachable (); + } + + switch (which_alternative) + { + case 0: + if () + ops = "v%s\t{%%2, %%0, %%0|%%0, %%0, %%2}"; + else + ops = "%s\t{%%2, %%0|%%0, %%2}"; + break; + case 1: + ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + break; + default: + gcc_unreachable (); + } + + snprintf (buf, sizeof (buf), ops, tmp); + return buf; +} + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") + (set (attr "prefix_data16") + (if_then_else + (and (eq_attr "alternative" "0") + (eq_attr "mode" "TI")) + (const_string "1") + (const_string "*"))) + (set_attr "prefix" "") + (set (attr "mode") + (cond [(and (match_test " == 16") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (const_string "") + (match_test "TARGET_AVX2") + (const_string "") + (match_test "TARGET_AVX") + (if_then_else + (match_test " > 16") + (const_string "V8SF") + (const_string "")) + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "")))]) + +(define_insn "*3" + [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v") + (any_logic: VI12_AVX_AVX512F + (match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v") + (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))] + "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)" +{ + static char buf[64]; + const char *ops; + const char *tmp; + const char *ssesuffix; + + switch (get_attr_mode (insn)) + { + case MODE_XI: + gcc_assert (TARGET_AVX512F); + case MODE_OI: + gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); + case MODE_TI: + gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); + switch (mode) + { + case V64QImode: + case V32HImode: + if (TARGET_AVX512F) { - tmp = "p"; + tmp = "p"; + ssesuffix = "q"; + break; + } + case V32QImode: + case V16HImode: + case V16QImode: + case V8HImode: + if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2) + { + tmp = "p"; + ssesuffix = TARGET_AVX512VL ? "q" : ""; break; } default: - tmp = TARGET_AVX512VL ? "pq" : "p"; + gcc_unreachable (); } break; - case MODE_V16SF: - gcc_assert (TARGET_AVX512F); case MODE_V8SF: gcc_assert (TARGET_AVX); case MODE_V4SF: gcc_assert (TARGET_SSE); - tmp = "ps"; + ssesuffix = ""; break; default: @@ -10971,15 +11081,16 @@ { case 0: ops = "%s\t{%%2, %%0|%%0, %%2}"; + snprintf (buf, sizeof (buf), ops, tmp); break; case 1: - ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + snprintf (buf, sizeof (buf), ops, tmp, ssesuffix); break; default: gcc_unreachable (); } - snprintf (buf, sizeof (buf), ops, tmp); return buf; } [(set_attr "isa" "noavx,avx") @@ -11060,8 +11171,8 @@ (define_expand "vec_pack_trunc_" [(match_operand: 0 "register_operand") - (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand") - (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")] + (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand") + (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")] "TARGET_SSE2" { rtx op1 = gen_lowpart (mode, operands[1]); @@ -12971,25 +13082,25 @@ (define_expand "vec_unpacks_lo_" [(match_operand: 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") (define_expand "vec_unpacks_hi_" [(match_operand: 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") (define_expand "vec_unpacku_lo_" [(match_operand: 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") (define_expand "vec_unpacku_hi_" [(match_operand: 0 "register_operand") - (match_operand:VI124_AVX512F 1 "register_operand")] + (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") diff -Naur gcc-5.2.0.orig/gcc/config/i386/sync.md gcc-5.2.0/gcc/config/i386/sync.md --- gcc-5.2.0.orig/gcc/config/i386/sync.md 2015-04-02 02:56:05.000000000 -0500 +++ gcc-5.2.0/gcc/config/i386/sync.md 2015-08-05 06:20:59.983324000 -0500 @@ -105,11 +105,11 @@ [(match_operand:SI 0 "const_int_operand")] ;; model "" { - enum memmodel model = (enum memmodel) (INTVAL (operands[0]) & MEMMODEL_MASK); + enum memmodel model = memmodel_from_int (INTVAL (operands[0])); /* Unless this is a SEQ_CST fence, the i386 memory model is strong enough not to require barriers of any kind. */ - if (model == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (model)) { rtx (*mfence_insn)(rtx); rtx mem; @@ -217,7 +217,7 @@ UNSPEC_STA))] "" { - enum memmodel model = (enum memmodel) (INTVAL (operands[2]) & MEMMODEL_MASK); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); if (mode == DImode && !TARGET_64BIT) { @@ -233,7 +233,7 @@ operands[1] = force_reg (mode, operands[1]); /* For seq-cst stores, when we lack MFENCE, use XCHG. */ - if (model == MEMMODEL_SEQ_CST && !(TARGET_64BIT || TARGET_SSE2)) + if (is_mm_seq_cst (model) && !(TARGET_64BIT || TARGET_SSE2)) { emit_insn (gen_atomic_exchange (gen_reg_rtx (mode), operands[0], operands[1], @@ -246,7 +246,7 @@ operands[2])); } /* ... followed by an MFENCE, if required. */ - if (model == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (model)) emit_insn (gen_mem_thread_fence (operands[2])); DONE; }) diff -Naur gcc-5.2.0.orig/gcc/config/ia64/ia64.c gcc-5.2.0/gcc/config/ia64/ia64.c --- gcc-5.2.0.orig/gcc/config/ia64/ia64.c 2015-03-03 04:41:00.000000000 -0600 +++ gcc-5.2.0/gcc/config/ia64/ia64.c 2015-08-05 06:20:59.983324000 -0500 @@ -2389,10 +2389,12 @@ { case MEMMODEL_ACQ_REL: case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: emit_insn (gen_memory_barrier ()); /* FALLTHRU */ case MEMMODEL_RELAXED: case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: case MEMMODEL_CONSUME: if (mode == SImode) icode = CODE_FOR_fetchadd_acq_si; @@ -2400,6 +2402,7 @@ icode = CODE_FOR_fetchadd_acq_di; break; case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: if (mode == SImode) icode = CODE_FOR_fetchadd_rel_si; else @@ -2426,8 +2429,7 @@ front half of the full barrier. The end half is the cmpxchg.rel. For relaxed and release memory models, we don't need this. But we also don't bother trying to prevent it either. */ - gcc_assert (model == MEMMODEL_RELAXED - || model == MEMMODEL_RELEASE + gcc_assert (is_mm_relaxed (model) || is_mm_release (model) || MEM_VOLATILE_P (mem)); old_reg = gen_reg_rtx (DImode); @@ -2471,6 +2473,7 @@ { case MEMMODEL_RELAXED: case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: case MEMMODEL_CONSUME: switch (mode) { @@ -2484,8 +2487,10 @@ break; case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: case MEMMODEL_ACQ_REL: case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: switch (mode) { case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; diff -Naur gcc-5.2.0.orig/gcc/config/ia64/sync.md gcc-5.2.0/gcc/config/ia64/sync.md --- gcc-5.2.0.orig/gcc/config/ia64/sync.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/ia64/sync.md 2015-08-05 06:20:59.983324000 -0500 @@ -33,7 +33,7 @@ [(match_operand:SI 0 "const_int_operand" "")] ;; model "" { - if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (memmodel_from_int (INTVAL (operands[0])))) emit_insn (gen_memory_barrier ()); DONE; }) @@ -60,11 +60,11 @@ (match_operand:SI 2 "const_int_operand" "")] ;; model "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); /* Unless the memory model is relaxed, we want to emit ld.acq, which will happen automatically for volatile memories. */ - gcc_assert (model == MEMMODEL_RELAXED || MEM_VOLATILE_P (operands[1])); + gcc_assert (is_mm_relaxed (model) || MEM_VOLATILE_P (operands[1])); emit_move_insn (operands[0], operands[1]); DONE; }) @@ -75,17 +75,17 @@ (match_operand:SI 2 "const_int_operand" "")] ;; model "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); /* Unless the memory model is relaxed, we want to emit st.rel, which will happen automatically for volatile memories. */ - gcc_assert (model == MEMMODEL_RELAXED || MEM_VOLATILE_P (operands[0])); + gcc_assert (is_mm_relaxed (model) || MEM_VOLATILE_P (operands[0])); emit_move_insn (operands[0], operands[1]); /* Sequentially consistent stores need a subsequent MF. See http://www.decadent.org.uk/pipermail/cpp-threads/2008-December/001952.html for a discussion of why a MF is needed here, but not for atomic_load. */ - if (model == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (model)) emit_insn (gen_memory_barrier ()); DONE; }) @@ -101,7 +101,8 @@ (match_operand:SI 7 "const_int_operand" "")] ;; fail model "" { - enum memmodel model = (enum memmodel) INTVAL (operands[6]); + /* No need to distinquish __sync from __atomic, so get base value. */ + enum memmodel model = memmodel_base (INTVAL (operands[6])); rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); rtx dval, eval; @@ -200,7 +201,8 @@ (match_operand:SI 3 "const_int_operand" "")] ;; succ model "" { - enum memmodel model = (enum memmodel) INTVAL (operands[3]); + /* No need to distinquish __sync from __atomic, so get base value. */ + enum memmodel model = memmodel_base (INTVAL (operands[3])); switch (model) { diff -Naur gcc-5.2.0.orig/gcc/config/mips/mips.c gcc-5.2.0/gcc/config/mips/mips.c --- gcc-5.2.0.orig/gcc/config/mips/mips.c 2015-01-28 03:44:39.000000000 -0600 +++ gcc-5.2.0/gcc/config/mips/mips.c 2015-08-05 06:20:59.983324000 -0500 @@ -13111,7 +13111,7 @@ model = MEMMODEL_ACQUIRE; break; default: - model = (enum memmodel) INTVAL (operands[memmodel_attr]); + model = memmodel_from_int (INTVAL (operands[memmodel_attr])); } mips_multi_start (); diff -Naur gcc-5.2.0.orig/gcc/config/nios2/nios2.c gcc-5.2.0/gcc/config/nios2/nios2.c --- gcc-5.2.0.orig/gcc/config/nios2/nios2.c 2015-03-21 21:39:24.000000000 -0500 +++ gcc-5.2.0/gcc/config/nios2/nios2.c 2015-09-22 07:23:20.768742000 -0500 @@ -1894,15 +1894,15 @@ Which will be output as '%tls_le(var+48)(r23)' in assembly. */ if (GET_CODE (x) == PLUS - && GET_CODE (XEXP (x, 0)) == REG && GET_CODE (XEXP (x, 1)) == CONST) { - rtx unspec, offset, reg = XEXP (x, 0); + rtx unspec, offset; split_const (XEXP (x, 1), &unspec, &offset); if (GET_CODE (unspec) == UNSPEC && !nios2_large_offset_p (XINT (unspec, 1)) && offset != const0_rtx) { + rtx reg = force_reg (Pmode, XEXP (x, 0)); unspec = copy_rtx (unspec); XVECEXP (unspec, 0, 0) = plus_constant (Pmode, XVECEXP (unspec, 0, 0), INTVAL (offset)); diff -Naur gcc-5.2.0.orig/gcc/config/pa/pa.c gcc-5.2.0/gcc/config/pa/pa.c --- gcc-5.2.0.orig/gcc/config/pa/pa.c 2015-06-11 18:22:32.000000000 -0500 +++ gcc-5.2.0/gcc/config/pa/pa.c 2015-09-24 19:04:26.380077000 -0500 @@ -2443,6 +2443,7 @@ enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; rtx latehalf[2]; rtx addreg0 = 0, addreg1 = 0; + int highonly = 0; /* First classify both operands. */ @@ -2653,7 +2654,14 @@ else if (optype1 == OFFSOP) latehalf[1] = adjust_address_nv (operands[1], SImode, 4); else if (optype1 == CNSTOP) - split_double (operands[1], &operands[1], &latehalf[1]); + { + if (GET_CODE (operands[1]) == HIGH) + { + operands[1] = XEXP (operands[1], 0); + highonly = 1; + } + split_double (operands[1], &operands[1], &latehalf[1]); + } else latehalf[1] = operands[1]; @@ -2706,8 +2714,11 @@ if (addreg1) output_asm_insn ("ldo 4(%0),%0", &addreg1); - /* Do that word. */ - output_asm_insn (pa_singlemove_string (latehalf), latehalf); + /* Do high-numbered word. */ + if (highonly) + output_asm_insn ("ldil L'%1,%0", latehalf); + else + output_asm_insn (pa_singlemove_string (latehalf), latehalf); /* Undo the adds we just did. */ if (addreg0) @@ -5712,7 +5723,7 @@ } if (TARGET_SYNC_LIBCALL) - init_sync_libfuncs (UNITS_PER_WORD); + init_sync_libfuncs (8); } /* HP's millicode routines mean something special to the assembler. @@ -8473,14 +8484,6 @@ if (TARGET_PORTABLE_RUNTIME) return false; - /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in - single subspace mode and the call is not indirect. As far as I know, - there is no operating system support for the multiple subspace mode. - It might be possible to support indirect calls if we didn't use - $$dyncall (see the indirect sequence generated in pa_output_call). */ - if (TARGET_ELF32) - return (decl != NULL_TREE); - /* Sibcalls are not ok because the arg pointer register is not a fixed register. This prevents the sibcall optimization from occurring. In addition, there are problems with stub placement using GNU ld. This @@ -10515,4 +10518,79 @@ fputs ("\t.end_brtab\n", asm_out_file); } +/* This is a helper function for the other atomic operations. This function + emits a loop that contains SEQ that iterates until a compare-and-swap + operation at the end succeeds. MEM is the memory to be modified. SEQ is + a set of instructions that takes a value from OLD_REG as an input and + produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be + set to the current contents of MEM. After SEQ, a compare-and-swap will + attempt to update MEM with NEW_REG. The function returns true when the + loop was generated successfully. */ + +static bool +pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq) +{ + machine_mode mode = GET_MODE (mem); + rtx_code_label *label; + rtx cmp_reg, success, oldval; + + /* The loop we want to generate looks like + + cmp_reg = mem; + label: + old_reg = cmp_reg; + seq; + (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg) + if (success) + goto label; + + Note that we only do the plain load from memory once. Subsequent + iterations use the value loaded by the compare-and-swap pattern. */ + + label = gen_label_rtx (); + cmp_reg = gen_reg_rtx (mode); + + emit_move_insn (cmp_reg, mem); + emit_label (label); + emit_move_insn (old_reg, cmp_reg); + if (seq) + emit_insn (seq); + + success = NULL_RTX; + oldval = cmp_reg; + if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg, + new_reg, false, MEMMODEL_SYNC_SEQ_CST, + MEMMODEL_RELAXED)) + return false; + + if (oldval != cmp_reg) + emit_move_insn (cmp_reg, oldval); + + /* Mark this jump predicted not taken. */ + emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx, + GET_MODE (success), 1, label, 0); + return true; +} + +/* This function tries to implement an atomic exchange operation using a + compare_and_swap loop. VAL is written to *MEM. The previous contents of + *MEM are returned, using TARGET if possible. No memory model is required + since a compare_and_swap loop is seq-cst. */ + +rtx +pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val) +{ + machine_mode mode = GET_MODE (mem); + + if (can_compare_and_swap_p (mode, true)) + { + if (!target || !register_operand (target, mode)) + target = gen_reg_rtx (mode); + if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX)) + return target; + } + + return NULL_RTX; +} + #include "gt-pa.h" diff -Naur gcc-5.2.0.orig/gcc/config/pa/pa-linux.h gcc-5.2.0/gcc/config/pa/pa-linux.h --- gcc-5.2.0.orig/gcc/config/pa/pa-linux.h 2015-05-28 09:07:55.000000000 -0500 +++ gcc-5.2.0/gcc/config/pa/pa-linux.h 2015-09-24 19:04:26.380077000 -0500 @@ -140,3 +140,4 @@ #define HAVE_sync_compare_and_swapqi 1 #define HAVE_sync_compare_and_swaphi 1 #define HAVE_sync_compare_and_swapsi 1 +#define HAVE_sync_compare_and_swapdi 1 diff -Naur gcc-5.2.0.orig/gcc/config/pa/pa.md gcc-5.2.0/gcc/config/pa/pa.md --- gcc-5.2.0.orig/gcc/config/pa/pa.md 2015-07-04 08:44:39.000000000 -0500 +++ gcc-5.2.0/gcc/config/pa/pa.md 2015-09-24 19:04:26.380077000 -0500 @@ -699,59 +699,229 @@ ;; doubleword loads and stores are not guaranteed to be atomic ;; when referencing the I/O address space. -;; Implement atomic DImode load using 64-bit floating point load and copy. +;; The kernel cmpxchg operation on linux is not atomic with respect to +;; memory stores on SMP machines, so we must do stores using a cmpxchg +;; operation. + +;; Implement atomic QImode store using exchange. + +(define_expand "atomic_storeqi" + [(match_operand:QI 0 "memory_operand") ;; memory + (match_operand:QI 1 "register_operand") ;; val out + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + if (TARGET_SYNC_LIBCALL) + { + rtx mem = operands[0]; + rtx val = operands[1]; + if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) + DONE; + } + FAIL; +}) + +;; Implement atomic HImode stores using exchange. + +(define_expand "atomic_storehi" + [(match_operand:HI 0 "memory_operand") ;; memory + (match_operand:HI 1 "register_operand") ;; val out + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + if (TARGET_SYNC_LIBCALL) + { + rtx mem = operands[0]; + rtx val = operands[1]; + if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) + DONE; + } + FAIL; +}) + +;; Implement atomic SImode store using exchange. + +(define_expand "atomic_storesi" + [(match_operand:SI 0 "memory_operand") ;; memory + (match_operand:SI 1 "register_operand") ;; val out + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + if (TARGET_SYNC_LIBCALL) + { + rtx mem = operands[0]; + rtx val = operands[1]; + if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) + DONE; + } + FAIL; +}) + +;; Implement atomic SFmode store using exchange. + +(define_expand "atomic_storesf" + [(match_operand:SF 0 "memory_operand") ;; memory + (match_operand:SF 1 "register_operand") ;; val out + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + if (TARGET_SYNC_LIBCALL) + { + rtx mem = operands[0]; + rtx val = operands[1]; + if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) + DONE; + } + FAIL; +}) + +;; Implement atomic DImode load using 64-bit floating point load. (define_expand "atomic_loaddi" [(match_operand:DI 0 "register_operand") ;; val out (match_operand:DI 1 "memory_operand") ;; memory (match_operand:SI 2 "const_int_operand")] ;; model - "!TARGET_64BIT && !TARGET_SOFT_FLOAT" + "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model; + + if (TARGET_64BIT || TARGET_SOFT_FLOAT) + FAIL; + + model = memmodel_from_int (INTVAL (operands[2])); operands[1] = force_reg (SImode, XEXP (operands[1], 0)); - operands[2] = gen_reg_rtx (DImode); expand_mem_thread_fence (model); - emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1], operands[2])); - if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST) + emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1])); + if (is_mm_seq_cst (model)) expand_mem_thread_fence (model); DONE; }) (define_insn "atomic_loaddi_1" - [(set (match_operand:DI 0 "register_operand" "=r") - (mem:DI (match_operand:SI 1 "register_operand" "r"))) - (clobber (match_operand:DI 2 "register_operand" "=&f"))] + [(set (match_operand:DI 0 "register_operand" "=f,r") + (mem:DI (match_operand:SI 1 "register_operand" "r,r"))) + (clobber (match_scratch:DI 2 "=X,f"))] "!TARGET_64BIT && !TARGET_SOFT_FLOAT" - "{fldds|fldd} 0(%1),%2\;{fstds|fstd} %2,-16(%%sp)\;{ldws|ldw} -16(%%sp),%0\;{ldws|ldw} -12(%%sp),%R0" - [(set_attr "type" "move") - (set_attr "length" "16")]) + "@ + {fldds|fldd} 0(%1),%0 + {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0" + [(set_attr "type" "move,move") + (set_attr "length" "4,16")]) -;; Implement atomic DImode store using copy and 64-bit floating point store. +;; Implement atomic DImode store. (define_expand "atomic_storedi" [(match_operand:DI 0 "memory_operand") ;; memory (match_operand:DI 1 "register_operand") ;; val out (match_operand:SI 2 "const_int_operand")] ;; model - "!TARGET_64BIT && !TARGET_SOFT_FLOAT" + "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model; + + if (TARGET_SYNC_LIBCALL) + { + rtx mem = operands[0]; + rtx val = operands[1]; + if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) + DONE; + } + + if (TARGET_64BIT || TARGET_SOFT_FLOAT) + FAIL; + + model = memmodel_from_int (INTVAL (operands[2])); operands[0] = force_reg (SImode, XEXP (operands[0], 0)); - operands[2] = gen_reg_rtx (DImode); expand_mem_thread_fence (model); - emit_insn (gen_atomic_storedi_1 (operands[0], operands[1], operands[2])); - if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST) + emit_insn (gen_atomic_storedi_1 (operands[0], operands[1])); + if (is_mm_seq_cst (model)) expand_mem_thread_fence (model); DONE; }) (define_insn "atomic_storedi_1" - [(set (mem:DI (match_operand:SI 0 "register_operand" "r")) - (match_operand:DI 1 "register_operand" "r")) - (clobber (match_operand:DI 2 "register_operand" "=&f"))] + [(set (mem:DI (match_operand:SI 0 "register_operand" "r,r")) + (match_operand:DI 1 "register_operand" "f,r")) + (clobber (match_scratch:DI 2 "=X,f"))] + "!TARGET_64BIT && !TARGET_SOFT_FLOAT && !TARGET_SYNC_LIBCALL" + "@ + {fstds|fstd} %1,0(%0) + {stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)" + [(set_attr "type" "move,move") + (set_attr "length" "4,16")]) + +;; Implement atomic DFmode load using 64-bit floating point load. + +(define_expand "atomic_loaddf" + [(match_operand:DF 0 "register_operand") ;; val out + (match_operand:DF 1 "memory_operand") ;; memory + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + enum memmodel model; + + if (TARGET_64BIT || TARGET_SOFT_FLOAT) + FAIL; + + model = memmodel_from_int (INTVAL (operands[2])); + operands[1] = force_reg (SImode, XEXP (operands[1], 0)); + expand_mem_thread_fence (model); + emit_insn (gen_atomic_loaddf_1 (operands[0], operands[1])); + if (is_mm_seq_cst (model)) + expand_mem_thread_fence (model); + DONE; +}) + +(define_insn "atomic_loaddf_1" + [(set (match_operand:DF 0 "register_operand" "=f,r") + (mem:DF (match_operand:SI 1 "register_operand" "r,r"))) + (clobber (match_scratch:DF 2 "=X,f"))] "!TARGET_64BIT && !TARGET_SOFT_FLOAT" - "{stws|stw} %1,-16(%%sp)\;{stws|stw} %R1,-12(%%sp)\;{fldds|fldd} -16(%%sp),%2\;{fstds|fstd} %2,0(%0)" - [(set_attr "type" "move") - (set_attr "length" "16")]) + "@ + {fldds|fldd} 0(%1),%0 + {fldds|fldd} 0(%1),%2\n\t{fstds|fstd} %2,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0\n\t{ldws|ldw} -12(%%sp),%R0" + [(set_attr "type" "move,move") + (set_attr "length" "4,16")]) + +;; Implement atomic DFmode store using 64-bit floating point store. + +(define_expand "atomic_storedf" + [(match_operand:DF 0 "memory_operand") ;; memory + (match_operand:DF 1 "register_operand") ;; val out + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + enum memmodel model; + + if (TARGET_SYNC_LIBCALL) + { + rtx mem = operands[0]; + rtx val = operands[1]; + if (pa_maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val)) + DONE; + } + + if (TARGET_64BIT || TARGET_SOFT_FLOAT) + FAIL; + + model = memmodel_from_int (INTVAL (operands[2])); + operands[0] = force_reg (SImode, XEXP (operands[0], 0)); + expand_mem_thread_fence (model); + emit_insn (gen_atomic_storedf_1 (operands[0], operands[1])); + if (is_mm_seq_cst (model)) + expand_mem_thread_fence (model); + DONE; +}) + +(define_insn "atomic_storedf_1" + [(set (mem:DF (match_operand:SI 0 "register_operand" "r,r")) + (match_operand:DF 1 "register_operand" "f,r")) + (clobber (match_scratch:DF 2 "=X,f"))] + "!TARGET_64BIT && !TARGET_SOFT_FLOAT" + "@ + {fstds|fstd} %1,0(%0) + {stws|stw} %1,-16(%%sp)\n\t{stws|stw} %R1,-12(%%sp)\n\t{fldds|fldd} -16(%%sp),%2\n\t{fstds|fstd} %2,0(%0)" + [(set_attr "type" "move,move") + (set_attr "length" "4,16")]) ;; Compare instructions. ;; This controls RTL generation and register allocation. @@ -7516,7 +7686,6 @@ (define_insn "call_reg_64bit" [(call (mem:SI (match_operand:DI 0 "register_operand" "r")) (match_operand 1 "" "i")) - (clobber (reg:DI 1)) (clobber (reg:DI 2)) (clobber (match_operand 2)) (use (reg:DI 27)) @@ -7537,7 +7706,6 @@ (define_split [(parallel [(call (mem:SI (match_operand 0 "register_operand" "")) (match_operand 1 "" "")) - (clobber (reg:DI 1)) (clobber (reg:DI 2)) (clobber (match_operand 2)) (use (reg:DI 27)) @@ -7548,7 +7716,6 @@ [(set (match_dup 2) (reg:DI 27)) (parallel [(call (mem:SI (match_dup 0)) (match_dup 1)) - (clobber (reg:DI 1)) (clobber (reg:DI 2)) (use (reg:DI 27)) (use (reg:DI 29)) @@ -7558,7 +7725,6 @@ (define_split [(parallel [(call (mem:SI (match_operand 0 "register_operand" "")) (match_operand 1 "" "")) - (clobber (reg:DI 1)) (clobber (reg:DI 2)) (clobber (match_operand 2)) (use (reg:DI 27)) @@ -7568,7 +7734,6 @@ [(set (match_dup 2) (reg:DI 27)) (parallel [(call (mem:SI (match_dup 0)) (match_dup 1)) - (clobber (reg:DI 1)) (clobber (reg:DI 2)) (use (reg:DI 27)) (use (reg:DI 29)) @@ -7579,7 +7744,6 @@ (define_insn "*call_reg_64bit_post_reload" [(call (mem:SI (match_operand:DI 0 "register_operand" "r")) (match_operand 1 "" "i")) - (clobber (reg:DI 1)) (clobber (reg:DI 2)) (use (reg:DI 27)) (use (reg:DI 29)) diff -Naur gcc-5.2.0.orig/gcc/config/pa/pa-protos.h gcc-5.2.0/gcc/config/pa/pa-protos.h --- gcc-5.2.0.orig/gcc/config/pa/pa-protos.h 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/pa/pa-protos.h 2015-09-24 19:04:26.380077000 -0500 @@ -79,6 +79,7 @@ #endif /* ARGS_SIZE_RTX */ extern int pa_insn_refs_are_delayed (rtx_insn *); extern rtx pa_get_deferred_plabel (rtx); +extern rtx pa_maybe_emit_compare_and_swap_exchange_loop (rtx, rtx, rtx); #endif /* RTX_CODE */ extern int pa_and_mask_p (unsigned HOST_WIDE_INT); diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/htm.md gcc-5.2.0/gcc/config/rs6000/htm.md --- gcc-5.2.0.orig/gcc/config/rs6000/htm.md 2015-05-05 09:22:33.000000000 -0500 +++ gcc-5.2.0/gcc/config/rs6000/htm.md 2015-10-15 11:38:47.269450000 -0500 @@ -27,6 +27,14 @@ ]) ;; +;; UNSPEC usage +;; + +(define_c_enum "unspec" + [UNSPEC_HTM_FENCE + ]) + +;; ;; UNSPEC_VOLATILE usage ;; @@ -45,96 +53,223 @@ UNSPECV_HTM_MTSPR ]) +(define_expand "tabort" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] + UNSPECV_HTM_TABORT)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) -(define_insn "tabort" +(define_insn "*tabort" [(set (match_operand:CC 1 "cc_reg_operand" "=x") - (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] - UNSPECV_HTM_TABORT))] + (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] + UNSPECV_HTM_TABORT)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabort. %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "tabortc" +(define_expand "tabortc" + [(parallel + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")] + UNSPECV_HTM_TABORTXC)) + (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[4]) = 1; +}) + +(define_insn "*tabortc" [(set (match_operand:CC 3 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") (match_operand:GPR 1 "gpc_reg_operand" "r") (match_operand:GPR 2 "gpc_reg_operand" "r")] - UNSPECV_HTM_TABORTXC))] + UNSPECV_HTM_TABORTXC)) + (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabortc. %0,%1,%2" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "tabortci" +(define_expand "tabortci" + [(parallel + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand 2 "s5bit_cint_operand" "n")] + UNSPECV_HTM_TABORTXCI)) + (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[4]) = 1; +}) + +(define_insn "*tabortci" [(set (match_operand:CC 3 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") (match_operand:GPR 1 "gpc_reg_operand" "r") (match_operand 2 "s5bit_cint_operand" "n")] - UNSPECV_HTM_TABORTXCI))] + UNSPECV_HTM_TABORTXCI)) + (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabortci. %0,%1,%2" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "tbegin" +(define_expand "tbegin" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TBEGIN)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*tbegin" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] - UNSPECV_HTM_TBEGIN))] + UNSPECV_HTM_TBEGIN)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tbegin. %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "tcheck" +(define_expand "tcheck" + [(parallel + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) + (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; +}) + +(define_insn "*tcheck" [(set (match_operand:CC 0 "cc_reg_operand" "=y") - (unspec_volatile:CC [(const_int 0)] - UNSPECV_HTM_TCHECK))] + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) + (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tcheck %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "tend" +(define_expand "tend" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TEND)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*tend" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] - UNSPECV_HTM_TEND))] + UNSPECV_HTM_TEND)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tend. %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "trechkpt" +(define_expand "trechkpt" + [(parallel + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) + (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; +}) + +(define_insn "*trechkpt" [(set (match_operand:CC 0 "cc_reg_operand" "=x") - (unspec_volatile:CC [(const_int 0)] - UNSPECV_HTM_TRECHKPT))] + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) + (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "trechkpt." [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "treclaim" +(define_expand "treclaim" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] + UNSPECV_HTM_TRECLAIM)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*treclaim" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] - UNSPECV_HTM_TRECLAIM))] + UNSPECV_HTM_TRECLAIM)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "treclaim. %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "tsr" +(define_expand "tsr" + [(parallel + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] + UNSPECV_HTM_TSR)) + (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[2]) = 1; +}) + +(define_insn "*tsr" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] - UNSPECV_HTM_TSR))] + UNSPECV_HTM_TSR)) + (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tsr. %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_insn "ttest" +(define_expand "ttest" + [(parallel + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) + (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] + "TARGET_HTM" +{ + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; +}) + +(define_insn "*ttest" [(set (match_operand:CC 0 "cc_reg_operand" "=x") - (unspec_volatile:CC [(const_int 0)] - UNSPECV_HTM_TTEST))] + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) + (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabortwci. 0,1,0" [(set_attr "type" "htm") diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/predicates.md gcc-5.2.0/gcc/config/rs6000/predicates.md --- gcc-5.2.0.orig/gcc/config/rs6000/predicates.md 2015-07-05 21:07:49.000000000 -0500 +++ gcc-5.2.0/gcc/config/rs6000/predicates.md 2015-09-01 18:04:58.809944000 -0500 @@ -1048,12 +1048,12 @@ (define_predicate "current_file_function_operand" (and (match_code "symbol_ref") (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op)) - && ((SYMBOL_REF_LOCAL_P (op) - && ((DEFAULT_ABI != ABI_AIX - && DEFAULT_ABI != ABI_ELFv2) - || !SYMBOL_REF_EXTERNAL_P (op))) - || (op == XEXP (DECL_RTL (current_function_decl), - 0)))"))) + && (SYMBOL_REF_LOCAL_P (op) + || op == XEXP (DECL_RTL (current_function_decl), 0)) + && !((DEFAULT_ABI == ABI_AIX + || DEFAULT_ABI == ABI_ELFv2) + && (SYMBOL_REF_EXTERNAL_P (op) + || SYMBOL_REF_WEAK (op)))"))) ;; Return 1 if this operand is a valid input for a move insn. (define_predicate "input_operand" diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/rs6000.c gcc-5.2.0/gcc/config/rs6000/rs6000.c --- gcc-5.2.0.orig/gcc/config/rs6000/rs6000.c 2015-05-16 12:56:55.000000000 -0500 +++ gcc-5.2.0/gcc/config/rs6000/rs6000.c 2015-09-09 00:59:16.039307000 -0500 @@ -3692,6 +3692,45 @@ && optimize >= 3) rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; + /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 + support. If we only have ISA 2.06 support, and the user did not specify + the switch, leave it set to -1 so the movmisalign patterns are enabled, + but we don't enable the full vectorization support */ + if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) + TARGET_ALLOW_MOVMISALIGN = 1; + + else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) + { + if (TARGET_ALLOW_MOVMISALIGN > 0) + error ("-mallow-movmisalign requires -mvsx"); + + TARGET_ALLOW_MOVMISALIGN = 0; + } + + /* Determine when unaligned vector accesses are permitted, and when + they are preferred over masked Altivec loads. Note that if + TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then + TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is + not true. */ + if (TARGET_EFFICIENT_UNALIGNED_VSX) + { + if (!TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mvsx"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + + else if (!TARGET_ALLOW_MOVMISALIGN) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mallow-movmisalign"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + } + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -4251,22 +4290,6 @@ } } - /* Determine when unaligned vector accesses are permitted, and when - they are preferred over masked Altivec loads. Note that if - TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then - TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is - not true. */ - if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) { - if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8 - && TARGET_ALLOW_MOVMISALIGN != 0) - TARGET_EFFICIENT_UNALIGNED_VSX = 1; - else - TARGET_EFFICIENT_UNALIGNED_VSX = 0; - } - - if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8) - TARGET_ALLOW_MOVMISALIGN = 1; - /* Set the builtin mask of the various options used that could affect which builtins were used. In the past we used target_flags, but we've run out of bits, and some options like SPE and PAIRED are no longer in @@ -17678,8 +17701,21 @@ if (GET_CODE (addr) == PRE_MODIFY) { + gcc_assert (REG_P (XEXP (addr, 0)) + && GET_CODE (XEXP (addr, 1)) == PLUS + && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0)); scratch_or_premodify = XEXP (addr, 0); - gcc_assert (REG_P (scratch_or_premodify)); + if (!HARD_REGISTER_P (scratch_or_premodify)) + /* If we have a pseudo here then reload will have arranged + to have it replaced, but only in the original insn. + Use the replacement here too. */ + scratch_or_premodify = find_replacement (&XEXP (addr, 0)); + + /* RTL emitted by rs6000_secondary_reload_gpr uses RTL + expressions from the original insn, without unsharing them. + Any RTL that points into the original insn will of course + have register replacements applied. That is why we don't + need to look for replacements under the PLUS. */ addr = XEXP (addr, 1); } gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); @@ -20537,12 +20573,15 @@ case MEMMODEL_RELAXED: case MEMMODEL_CONSUME: case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: break; case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: case MEMMODEL_ACQ_REL: emit_insn (gen_lwsync ()); break; case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: emit_insn (gen_hwsync ()); break; default: @@ -20559,10 +20598,13 @@ case MEMMODEL_RELAXED: case MEMMODEL_CONSUME: case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: break; case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: case MEMMODEL_ACQ_REL: case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: emit_insn (gen_isync ()); break; default: @@ -20662,8 +20704,8 @@ oldval = operands[3]; newval = operands[4]; is_weak = (INTVAL (operands[5]) != 0); - mod_s = (enum memmodel) INTVAL (operands[6]); - mod_f = (enum memmodel) INTVAL (operands[7]); + mod_s = memmodel_from_int (INTVAL (operands[6])); + mod_f = memmodel_from_int (INTVAL (operands[7])); orig_mode = mode = GET_MODE (mem); mask = shift = NULL_RTX; @@ -20751,12 +20793,12 @@ emit_unlikely_jump (x, label1); } - if (mod_f != MEMMODEL_RELAXED) + if (!is_mm_relaxed (mod_f)) emit_label (XEXP (label2, 0)); rs6000_post_atomic_barrier (mod_s); - if (mod_f == MEMMODEL_RELAXED) + if (is_mm_relaxed (mod_f)) emit_label (XEXP (label2, 0)); if (shift) @@ -22311,6 +22353,7 @@ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && decl && !DECL_EXTERNAL (decl) + && !DECL_WEAK (decl) && (*targetm.binds_local_p) (decl)) || (DEFAULT_ABI == ABI_V4 && (!TARGET_SECURE_PLT @@ -32274,6 +32317,8 @@ { "crypto", OPTION_MASK_CRYPTO, false, true }, { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, { "dlmzb", OPTION_MASK_DLMZB, false, true }, + { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, + false, true }, { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, { "htm", OPTION_MASK_HTM, false, true }, diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/rs6000-c.c gcc-5.2.0/gcc/config/rs6000/rs6000-c.c --- gcc-5.2.0.orig/gcc/config/rs6000/rs6000-c.c 2015-04-02 06:54:58.000000000 -0500 +++ gcc-5.2.0/gcc/config/rs6000/rs6000-c.c 2015-10-15 11:38:47.269450000 -0500 @@ -381,7 +381,11 @@ if ((flags & OPTION_MASK_VSX) != 0) rs6000_define_or_undefine_macro (define_p, "__VSX__"); if ((flags & OPTION_MASK_HTM) != 0) - rs6000_define_or_undefine_macro (define_p, "__HTM__"); + { + rs6000_define_or_undefine_macro (define_p, "__HTM__"); + /* Tell the user that our HTM insn patterns act as memory barriers. */ + rs6000_define_or_undefine_macro (define_p, "__TM_FENCE__"); + } if ((flags & OPTION_MASK_P8_VECTOR) != 0) rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__"); if ((flags & OPTION_MASK_QUAD_MEMORY) != 0) diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/rs6000-cpus.def gcc-5.2.0/gcc/config/rs6000/rs6000-cpus.def --- gcc-5.2.0.orig/gcc/config/rs6000/rs6000-cpus.def 2015-03-02 18:59:56.000000000 -0600 +++ gcc-5.2.0/gcc/config/rs6000/rs6000-cpus.def 2015-08-24 14:11:02.078717000 -0500 @@ -53,6 +53,7 @@ | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_HTM \ | OPTION_MASK_QUAD_MEMORY \ | OPTION_MASK_QUAD_MEMORY_ATOMIC \ @@ -78,6 +79,7 @@ | OPTION_MASK_DFP \ | OPTION_MASK_DIRECT_MOVE \ | OPTION_MASK_DLMZB \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_FPRND \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/rs6000.opt gcc-5.2.0/gcc/config/rs6000/rs6000.opt --- gcc-5.2.0.orig/gcc/config/rs6000/rs6000.opt 2015-04-24 15:03:13.000000000 -0500 +++ gcc-5.2.0/gcc/config/rs6000/rs6000.opt 2015-08-24 14:11:02.078717000 -0500 @@ -212,7 +212,7 @@ ; Allow/disallow the movmisalign in DF/DI vectors mefficient-unaligned-vector -Target Undocumented Report Var(TARGET_EFFICIENT_UNALIGNED_VSX) Init(-1) Save +Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags) ; Consider unaligned VSX accesses to be efficient/inefficient mallow-df-permute diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/sync.md gcc-5.2.0/gcc/config/rs6000/sync.md --- gcc-5.2.0.orig/gcc/config/rs6000/sync.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/rs6000/sync.md 2015-08-05 06:20:59.983324000 -0500 @@ -41,18 +41,21 @@ [(match_operand:SI 0 "const_int_operand" "")] ;; model "" { - enum memmodel model = (enum memmodel) INTVAL (operands[0]); + enum memmodel model = memmodel_from_int (INTVAL (operands[0])); switch (model) { case MEMMODEL_RELAXED: break; case MEMMODEL_CONSUME: case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: case MEMMODEL_ACQ_REL: emit_insn (gen_lwsync ()); break; case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: emit_insn (gen_hwsync ()); break; default: @@ -144,9 +147,9 @@ if (mode == TImode && !TARGET_SYNC_TI) FAIL; - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); - if (model == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (model)) emit_insn (gen_hwsync ()); if (mode != TImode) @@ -182,7 +185,9 @@ break; case MEMMODEL_CONSUME: case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: emit_insn (gen_loadsync_ (operands[0])); break; default: @@ -209,15 +214,17 @@ if (mode == TImode && !TARGET_SYNC_TI) FAIL; - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); switch (model) { case MEMMODEL_RELAXED: break; case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: emit_insn (gen_lwsync ()); break; case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: emit_insn (gen_hwsync ()); break; default: diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/sysv4.h gcc-5.2.0/gcc/config/rs6000/sysv4.h --- gcc-5.2.0.orig/gcc/config/rs6000/sysv4.h 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/rs6000/sysv4.h 2015-09-24 08:46:45.240734000 -0500 @@ -574,7 +574,6 @@ %{R*} \ %(link_shlib) \ %{!T*: %(link_start) } \ -%(link_target) \ %(link_os)" /* Shared libraries are not default. */ @@ -584,10 +583,6 @@ %{shared:-G -dy -z text } \ %{symbolic:-Bsymbolic -G -dy -z text }" -/* Override the default target of the linker. */ -#define LINK_TARGET_SPEC \ - ENDIAN_SELECT("", " --oformat elf32-powerpcle", "") - /* Any specific OS flags. */ #define LINK_OS_SPEC "\ %{mads : %(link_os_ads) ; \ @@ -873,7 +868,6 @@ { "endfile_openbsd", ENDFILE_OPENBSD_SPEC }, \ { "endfile_default", ENDFILE_DEFAULT_SPEC }, \ { "link_shlib", LINK_SHLIB_SPEC }, \ - { "link_target", LINK_TARGET_SPEC }, \ { "link_start", LINK_START_SPEC }, \ { "link_start_ads", LINK_START_ADS_SPEC }, \ { "link_start_yellowknife", LINK_START_YELLOWKNIFE_SPEC }, \ diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/sysv4le.h gcc-5.2.0/gcc/config/rs6000/sysv4le.h --- gcc-5.2.0.orig/gcc/config/rs6000/sysv4le.h 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/rs6000/sysv4le.h 2015-09-24 08:46:45.240734000 -0500 @@ -25,10 +25,6 @@ #undef DEFAULT_ASM_ENDIAN #define DEFAULT_ASM_ENDIAN " -mlittle" -#undef LINK_TARGET_SPEC -#define LINK_TARGET_SPEC \ - ENDIAN_SELECT(" --oformat elf32-powerpc", "", "") - #undef MULTILIB_DEFAULTS #define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" } diff -Naur gcc-5.2.0.orig/gcc/config/rs6000/vector.md gcc-5.2.0/gcc/config/rs6000/vector.md --- gcc-5.2.0.orig/gcc/config/rs6000/vector.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/rs6000/vector.md 2015-08-27 13:34:54.229009000 -0500 @@ -918,6 +918,8 @@ ;; General shift amounts can be supported using vsro + vsr. We're ;; not expecting to see these yet (the vectorizer currently ;; generates only shifts by a whole number of vector elements). +;; Note that the vec_shr operation is actually defined as +;; 'shift toward element 0' so is a shr for LE and shl for BE. (define_expand "vec_shr_" [(match_operand:VEC_L 0 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "") @@ -928,6 +930,7 @@ rtx bitshift = operands[2]; rtx shift; rtx insn; + rtx zero_reg, op1, op2; HOST_WIDE_INT bitshift_val; HOST_WIDE_INT byteshift_val; @@ -937,19 +940,29 @@ if (bitshift_val & 0x7) FAIL; byteshift_val = (bitshift_val >> 3); + zero_reg = gen_reg_rtx (mode); + emit_move_insn (zero_reg, CONST0_RTX (mode)); if (!BYTES_BIG_ENDIAN) - byteshift_val = 16 - byteshift_val; + { + byteshift_val = 16 - byteshift_val; + op1 = zero_reg; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = zero_reg; + } + if (TARGET_VSX && (byteshift_val & 0x3) == 0) { shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); - insn = gen_vsx_xxsldwi_ (operands[0], operands[1], operands[1], - shift); + insn = gen_vsx_xxsldwi_ (operands[0], op1, op2, shift); } else { shift = gen_rtx_CONST_INT (QImode, byteshift_val); - insn = gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], - shift); + insn = gen_altivec_vsldoi_ (operands[0], op1, op2, shift); } emit_insn (insn); diff -Naur gcc-5.2.0.orig/gcc/config/s390/s390-builtins.def gcc-5.2.0/gcc/config/s390/s390-builtins.def --- gcc-5.2.0.orig/gcc/config/s390/s390-builtins.def 2015-07-02 11:28:08.000000000 -0500 +++ gcc-5.2.0/gcc/config/s390/s390-builtins.def 2015-08-25 06:37:49.817958000 -0500 @@ -438,15 +438,15 @@ B_DEF (s390_vllezg, vec_insert_and_zerov2di,0, B_VX, 0, BT_FN_UV2DI_ULONGLONGCONSTPTR) OB_DEF (s390_vec_load_bndry, s390_vec_load_bndry_s8,s390_vec_load_bndry_dbl,B_VX, BT_FN_OV4SI_INTCONSTPTR_INT) -OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U3, BT_OV_V16QI_SCHARCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U3, BT_OV_UV16QI_UCHARCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U3, BT_OV_V8HI_SHORTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U3, BT_OV_UV8HI_USHORTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U3, BT_OV_V4SI_INTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U3, BT_OV_UV4SI_UINTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U3, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U3, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U3, BT_OV_V2DF_DBLCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U16, BT_OV_V16QI_SCHARCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U16, BT_OV_UV16QI_UCHARCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U16, BT_OV_V8HI_SHORTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U16, BT_OV_UV8HI_USHORTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U16, BT_OV_V4SI_INTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U16, BT_OV_UV4SI_UINTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U16, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U16, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U16, BT_OV_V2DF_DBLCONSTPTR_USHORT) B_DEF (s390_vlbb, vlbb, 0, B_VX, O2_U3, BT_FN_UV16QI_UCHARCONSTPTR_USHORT) diff -Naur gcc-5.2.0.orig/gcc/config/s390/s390.c gcc-5.2.0/gcc/config/s390/s390.c --- gcc-5.2.0.orig/gcc/config/s390/s390.c 2015-07-02 11:26:51.000000000 -0500 +++ gcc-5.2.0/gcc/config/s390/s390.c 2015-10-02 03:09:09.582503000 -0500 @@ -750,7 +750,7 @@ HOST_WIDE_INT_PRINT_DEC ".." HOST_WIDE_INT_PRINT_DEC ")", argnum, decl, - -(HOST_WIDE_INT)1 << (bitwidth - 1), + -((HOST_WIDE_INT)1 << (bitwidth - 1)), ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1); return false; } @@ -7705,11 +7705,12 @@ /* We keep a list of constants which we have to add to internal constant tables in the middle of large functions. */ -#define NR_C_MODES 31 +#define NR_C_MODES 32 machine_mode constant_modes[NR_C_MODES] = { TFmode, TImode, TDmode, - V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode, + V16QImode, V8HImode, V4SImode, V2DImode, V1TImode, + V4SFmode, V2DFmode, V1TFmode, DFmode, DImode, DDmode, V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode, SFmode, SImode, SDmode, diff -Naur gcc-5.2.0.orig/gcc/config/s390/s390.md gcc-5.2.0/gcc/config/s390/s390.md --- gcc-5.2.0.orig/gcc/config/s390/s390.md 2015-07-02 10:53:33.000000000 -0500 +++ gcc-5.2.0/gcc/config/s390/s390.md 2015-10-22 03:28:57.203960000 -0500 @@ -6044,8 +6044,13 @@ (match_operand:GPR 2 "nonimmediate_operand" "") (match_operand:GPR 3 "nonimmediate_operand" "")))] "TARGET_Z196" - "operands[1] = s390_emit_compare (GET_CODE (operands[1]), - XEXP (operands[1], 0), XEXP (operands[1], 1));") +{ + /* Emit the comparison insn in case we do not already have a comparison result. */ + if (!s390_comparison (operands[1], VOIDmode)) + operands[1] = s390_emit_compare (GET_CODE (operands[1]), + XEXP (operands[1], 0), + XEXP (operands[1], 1)); +}) ; locr, loc, stoc, locgr, locg, stocg (define_insn_and_split "*movcc" @@ -9594,7 +9599,7 @@ { /* Unless this is a SEQ_CST fence, the s390 memory model is strong enough not to require barriers of any kind. */ - if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (memmodel_from_int (INTVAL (operands[0])))) { rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (mem) = 1; @@ -9675,7 +9680,7 @@ (match_operand:SI 2 "const_int_operand")] ;; model "" { - enum memmodel model = (enum memmodel) INTVAL (operands[2]); + enum memmodel model = memmodel_from_int (INTVAL (operands[2])); if (MEM_ALIGN (operands[0]) < GET_MODE_BITSIZE (GET_MODE (operands[0]))) FAIL; @@ -9686,7 +9691,7 @@ emit_insn (gen_atomic_storedi_1 (operands[0], operands[1])); else emit_move_insn (operands[0], operands[1]); - if (model == MEMMODEL_SEQ_CST) + if (is_mm_seq_cst (model)) emit_insn (gen_mem_thread_fence (operands[2])); DONE; }) diff -Naur gcc-5.2.0.orig/gcc/config/s390/vx-builtins.md gcc-5.2.0/gcc/config/s390/vx-builtins.md --- gcc-5.2.0.orig/gcc/config/s390/vx-builtins.md 2015-07-02 10:53:33.000000000 -0500 +++ gcc-5.2.0/gcc/config/s390/vx-builtins.md 2015-09-29 05:28:00.829455000 -0500 @@ -414,7 +414,7 @@ (unspec: [(match_operand:V_HW_64 0 "register_operand" "v") (match_dup 3)] UNSPEC_VEC_EXTRACT))] "TARGET_VX && !TARGET_64BIT" - "vsce\t%v0,%O2(%v1,%R2),%3" + "vsce\t%v0,%O2(%v1,%R2),%3" [(set_attr "op_type" "VRV")]) ; Element size and target adress size is the same @@ -428,7 +428,7 @@ (unspec: [(match_operand:V_HW_32_64 0 "register_operand" "v") (match_dup 3)] UNSPEC_VEC_EXTRACT))] "TARGET_VX" - "vsce\t%v0,%O2(%v1,%R2),%3" + "vsce\t%v0,%O2(%v1,%R2),%3" [(set_attr "op_type" "VRV")]) ; Depending on the address size we have to expand a different pattern. @@ -870,11 +870,11 @@ ; vec_mladd -> vec_vmal ; vmalb, vmalh, vmalf, vmalg (define_insn "vec_vmal" - [(set (match_operand:VI_HW 0 "register_operand" "=v") - (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") - (match_operand:VI_HW 2 "register_operand" "v") - (match_operand:VI_HW 3 "register_operand" "v")] - UNSPEC_VEC_VMAL))] + [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") + (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") + (match_operand:VI_HW_QHS 2 "register_operand" "v") + (match_operand:VI_HW_QHS 3 "register_operand" "v")] + UNSPEC_VEC_VMAL))] "TARGET_VX" "vmal\t%v0,%v1,%v2,%v3" [(set_attr "op_type" "VRR")]) @@ -883,22 +883,22 @@ ; vmahb; vmahh, vmahf, vmahg (define_insn "vec_vmah" - [(set (match_operand:VI_HW 0 "register_operand" "=v") - (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") - (match_operand:VI_HW 2 "register_operand" "v") - (match_operand:VI_HW 3 "register_operand" "v")] - UNSPEC_VEC_VMAH))] + [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") + (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") + (match_operand:VI_HW_QHS 2 "register_operand" "v") + (match_operand:VI_HW_QHS 3 "register_operand" "v")] + UNSPEC_VEC_VMAH))] "TARGET_VX" "vmah\t%v0,%v1,%v2,%v3" [(set_attr "op_type" "VRR")]) ; vmalhb; vmalhh, vmalhf, vmalhg (define_insn "vec_vmalh" - [(set (match_operand:VI_HW 0 "register_operand" "=v") - (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") - (match_operand:VI_HW 2 "register_operand" "v") - (match_operand:VI_HW 3 "register_operand" "v")] - UNSPEC_VEC_VMALH))] + [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") + (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") + (match_operand:VI_HW_QHS 2 "register_operand" "v") + (match_operand:VI_HW_QHS 3 "register_operand" "v")] + UNSPEC_VEC_VMALH))] "TARGET_VX" "vmalh\t%v0,%v1,%v2,%v3" [(set_attr "op_type" "VRR")]) diff -Naur gcc-5.2.0.orig/gcc/config/sh/sh.c gcc-5.2.0/gcc/config/sh/sh.c --- gcc-5.2.0.orig/gcc/config/sh/sh.c 2015-06-25 05:15:18.000000000 -0500 +++ gcc-5.2.0/gcc/config/sh/sh.c 2015-10-03 10:20:58.905754000 -0500 @@ -222,6 +222,7 @@ static rtx_insn *gen_block_redirect (rtx_insn *, int, int); static void sh_reorg (void); static void sh_option_override (void); +static void sh_override_options_after_change (void); static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool); static rtx_insn *frame_insn (rtx); static rtx push (int); @@ -236,7 +237,6 @@ static int sh_mode_entry (int); static int sh_mode_exit (int); static int sh_mode_priority (int entity, int n); -static bool sh_lra_p (void); static rtx mark_constant_pool_use (rtx); static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, @@ -413,6 +413,10 @@ #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE sh_option_override +#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE +#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ + sh_override_options_after_change + #undef TARGET_PRINT_OPERAND #define TARGET_PRINT_OPERAND sh_print_operand #undef TARGET_PRINT_OPERAND_ADDRESS @@ -1065,42 +1069,6 @@ TARGET_ACCUMULATE_OUTGOING_ARGS = 1; } - /* Adjust loop, jump and function alignment values (in bytes), if those - were not specified by the user using -falign-loops, -falign-jumps - and -falign-functions options. - 32 bit alignment is better for speed, because instructions can be - fetched as a pair from a longword boundary. For size use 16 bit - alignment to get more compact code. - Aligning all jumps increases the code size, even if it might - result in slightly faster code. Thus, it is set to the smallest - alignment possible if not specified by the user. */ - if (align_loops == 0) - { - if (TARGET_SH5) - align_loops = 8; - else - align_loops = optimize_size ? 2 : 4; - } - - if (align_jumps == 0) - { - if (TARGET_SHMEDIA) - align_jumps = 1 << CACHE_LOG; - else - align_jumps = 2; - } - else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2)) - align_jumps = TARGET_SHMEDIA ? 4 : 2; - - if (align_functions == 0) - { - if (TARGET_SHMEDIA) - align_functions = optimize_size - ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG); - else - align_functions = optimize_size ? 2 : 4; - } - /* The linker relaxation code breaks when a function contains alignments that are larger than that at the start of a compilation unit. */ @@ -1144,6 +1112,8 @@ if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2)) flag_strict_volatile_bitfields = 1; + sh_override_options_after_change (); + /* Parse atomic model option and make sure it is valid for the current target CPU. */ selected_atomic_model_ @@ -1151,6 +1121,62 @@ register_sh_passes (); } + +/* Implement targetm.override_options_after_change. */ + +static void +sh_override_options_after_change (void) +{ + /* Adjust loop, jump and function alignment values (in bytes), if those + were not specified by the user using -falign-loops, -falign-jumps + and -falign-functions options. + 32 bit alignment is better for speed, because instructions can be + fetched as a pair from a longword boundary. For size use 16 bit + alignment to get more compact code. + Aligning all jumps increases the code size, even if it might + result in slightly faster code. Thus, it is set to the smallest + alignment possible if not specified by the user. */ + if (align_loops == 0) + { + if (TARGET_SH5) + align_loops = 8; + else + align_loops = optimize_size ? 2 : 4; + } + + if (align_jumps == 0) + { + if (TARGET_SHMEDIA) + align_jumps = 1 << CACHE_LOG; + else + align_jumps = 2; + } + else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2)) + align_jumps = TARGET_SHMEDIA ? 4 : 2; + + if (align_functions == 0) + { + if (TARGET_SHMEDIA) + align_functions = optimize_size + ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG); + else + align_functions = optimize_size ? 2 : 4; + } + + /* The linker relaxation code breaks when a function contains + alignments that are larger than that at the start of a + compilation unit. */ + if (TARGET_RELAX) + { + int min_align = align_loops > align_jumps ? align_loops : align_jumps; + + /* Also take possible .long constants / mova tables into account. */ + if (min_align < 4) + min_align = 4; + if (align_functions < min_align) + align_functions = min_align; + } +} /* Print the operand address in x to the stream. */ static void @@ -13787,6 +13813,34 @@ return i; } +/* Given a move insn destiation and a source, make sure that the move source + operand is not a post-inc mem load with the same address reg as the + destination. Returns the modified source operand with the post-inc removed + if necessary. */ +rtx +sh_remove_overlapping_post_inc (rtx dst, rtx src) +{ + if (!MEM_P (src)) + return src; + + rtx addr = XEXP (src, 0); + + if (GET_CODE (addr) == POST_INC + && reg_overlap_mentioned_p (XEXP (addr, 0), dst)) + return replace_equiv_address (src, XEXP (addr, 0)); + + gcc_assert (GET_CODE (addr) != POST_MODIFY); + return src; +} + +/* Emit a move insn that is safe to be used in peephole patterns. */ +rtx_insn* +sh_peephole_emit_move_insn (rtx dst, rtx src) +{ + return sh_check_add_incdec_notes ( + emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src))); +} + /* Given an op rtx and an insn, try to find out whether the result of the specified op consists only of logical operations on T bit stores. */ bool @@ -13886,6 +13940,7 @@ && !sh_insn_operands_modified_between_p (t_before_negc.insn, t_before_negc.insn, t_after_negc.insn) + && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn) && !sh_unspec_insn_p (t_after_negc.insn) && !volatile_insn_p (PATTERN (t_after_negc.insn)) && !side_effects_p (PATTERN (t_after_negc.insn)) @@ -13992,6 +14047,9 @@ else { rtx extension_dst = XEXP (set_rtx, 0); + if (GET_MODE (extension_dst) != SImode) + extension_dst = simplify_gen_subreg (SImode, extension_dst, + GET_MODE (extension_dst), 0); if (modified_between_p (extension_dst, insn, use_at_insn)) { if (dump_file) @@ -14162,6 +14220,12 @@ if (!can_create_pseudo_p ()) return false; + /* expand_debug_locations may call this to compute rtx costs at + very early stage. In that case, don't make new insns here to + avoid codegen differences with -g. */ + if (currently_expanding_to_rtl) + return false; + /* We are going to invoke recog in a re-entrant way and thus have to capture its current state and restore it afterwards. */ recog_data_d prev_recog_data = recog_data; @@ -14446,7 +14510,7 @@ */ /* Return true if we use LRA instead of reload pass. */ -static bool +bool sh_lra_p (void) { return sh_lra_flag; diff -Naur gcc-5.2.0.orig/gcc/config/sh/sh.md gcc-5.2.0/gcc/config/sh/sh.md --- gcc-5.2.0.orig/gcc/config/sh/sh.md 2015-07-10 04:50:18.000000000 -0500 +++ gcc-5.2.0/gcc/config/sh/sh.md 2015-09-28 08:44:32.077610000 -0500 @@ -2082,17 +2082,18 @@ }) (define_expand "addsi3" - [(set (match_operand:SI 0 "arith_reg_operand" "") - (plus:SI (match_operand:SI 1 "arith_operand" "") - (match_operand:SI 2 "arith_or_int_operand" "")))] + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "arith_or_int_operand")))] "" { - if (TARGET_SHMEDIA) - operands[1] = force_reg (SImode, operands[1]); - else if (! arith_operand (operands[2], SImode)) + if (TARGET_SH1 && !arith_operand (operands[2], SImode)) { - if (reg_overlap_mentioned_p (operands[0], operands[1])) - FAIL; + if (!sh_lra_p () || reg_overlap_mentioned_p (operands[0], operands[1])) + { + emit_insn (gen_addsi3_scr (operands[0], operands[1], operands[2])); + DONE; + } } }) @@ -2128,18 +2129,22 @@ ;; copy or constant load before the actual add insn. ;; Use u constraint for that case to avoid the invalid value in the stack ;; pointer. -(define_insn_and_split "*addsi3_compact" +;; This also results in better code when LRA is not used. However, we have +;; to use different sets of patterns and the order of these patterns is +;; important. +;; In some cases the constant zero might end up in operands[2] of the +;; patterns. We have to accept that and convert it into a reg-reg move. +(define_insn_and_split "*addsi3_compact_lra" [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u") - (plus:SI (match_operand:SI 1 "arith_operand" "%0,r") + (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r") (match_operand:SI 2 "arith_or_int_operand" "rI08,rn")))] - "TARGET_SH1 - && ((rtx_equal_p (operands[0], operands[1]) - && arith_operand (operands[2], SImode)) - || ! reg_overlap_mentioned_p (operands[0], operands[1]))" + "TARGET_SH1 && sh_lra_p () + && (! reg_overlap_mentioned_p (operands[0], operands[1]) + || arith_operand (operands[2], SImode))" "@ add %2,%0 #" - "reload_completed + "&& reload_completed && ! reg_overlap_mentioned_p (operands[0], operands[1])" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))] @@ -2150,6 +2155,58 @@ } [(set_attr "type" "arith")]) +(define_insn_and_split "addsi3_scr" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u,&u") + (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r,r") + (match_operand:SI 2 "arith_or_int_operand" "rI08,r,n"))) + (clobber (match_scratch:SI 3 "=X,X,&u"))] + "TARGET_SH1" + "@ + add %2,%0 + # + #" + "&& reload_completed" + [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))] +{ + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + if (CONST_INT_P (operands[2]) && !satisfies_constraint_I08 (operands[2])) + { + if (reg_overlap_mentioned_p (operands[0], operands[1])) + { + emit_move_insn (operands[3], operands[2]); + emit_move_insn (operands[0], operands[1]); + operands[2] = operands[3]; + } + else + { + emit_move_insn (operands[0], operands[2]); + operands[2] = operands[1]; + } + } + else if (!reg_overlap_mentioned_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +} + [(set_attr "type" "arith")]) + +(define_insn_and_split "*addsi3" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r") + (match_operand:SI 2 "arith_operand" "rI08,Z")))] + "TARGET_SH1 && !sh_lra_p ()" + "@ + add %2,%0 + #" + "&& operands[2] == const0_rtx" + [(set (match_dup 0) (match_dup 1))] +{ +} + [(set_attr "type" "arith")]) + ;; ------------------------------------------------------------------------- ;; Subtraction instructions ;; ------------------------------------------------------------------------- @@ -7851,6 +7908,24 @@ "" { prepare_move_operands (operands, DImode); + if (TARGET_SH1) + { + /* When the dest operand is (R0, R1) register pair, split it to + two movsi of which dest is R1 and R0 so as to lower R0-register + pressure on the first movsi. Apply only for simple source not + to make complex rtl here. */ + if (REG_P (operands[0]) + && REGNO (operands[0]) == R0_REG + && REG_P (operands[1]) + && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER) + { + emit_insn (gen_movsi (gen_rtx_REG (SImode, R1_REG), + gen_rtx_SUBREG (SImode, operands[1], 4))); + emit_insn (gen_movsi (gen_rtx_REG (SImode, R0_REG), + gen_rtx_SUBREG (SImode, operands[1], 0))); + DONE; + } + } }) (define_insn "movdf_media" @@ -9404,7 +9479,7 @@ (use (reg:SI FPSCR_MODES_REG)) (use (reg:SI PIC_REG)) (clobber (reg:SI PR_REG)) - (clobber (match_scratch:SI 2 "=r"))] + (clobber (match_scratch:SI 2 "=&r"))] "TARGET_SH2" "#" "reload_completed" @@ -9538,7 +9613,7 @@ (use (reg:SI FPSCR_MODES_REG)) (use (reg:SI PIC_REG)) (clobber (reg:SI PR_REG)) - (clobber (match_scratch:SI 3 "=r"))] + (clobber (match_scratch:SI 3 "=&r"))] "TARGET_SH2" "#" "reload_completed" @@ -9939,7 +10014,7 @@ [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "")) (match_operand 1 "" "")) (use (reg:SI FPSCR_MODES_REG)) - (clobber (match_scratch:SI 2 "=k")) + (clobber (match_scratch:SI 2 "=&k")) (return)] "TARGET_SH2" "#" @@ -10131,7 +10206,7 @@ (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) (match_operand 2 "" ""))) (use (reg:SI FPSCR_MODES_REG)) - (clobber (match_scratch:SI 3 "=k")) + (clobber (match_scratch:SI 3 "=&k")) (return)] "TARGET_SH2" "#" @@ -10579,7 +10654,7 @@ if (TARGET_SHMEDIA) { rtx tr = gen_rtx_REG (Pmode, TR0_REG); - rtx pic = operands[0]; + rtx pic = operands[1]; rtx lab = PATTERN (gen_call_site ()); rtx insn, equiv; @@ -14626,7 +14701,7 @@ [(const_int 0)] { emit_insn (gen_addsi3 (operands[1], operands[1], operands[2])); - sh_check_add_incdec_notes (emit_move_insn (operands[3], operands[1])); + sh_peephole_emit_move_insn (operands[3], operands[1]); }) ;; mov.l @(r0,r9),r1 @@ -14639,7 +14714,7 @@ "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])" [(const_int 0)] { - sh_check_add_incdec_notes (emit_move_insn (operands[2], operands[1])); + sh_peephole_emit_move_insn (operands[2], operands[1]); }) (define_peephole2 @@ -14650,7 +14725,7 @@ "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])" [(const_int 0)] { - sh_check_add_incdec_notes (emit_move_insn (operands[2], operands[1])); + sh_peephole_emit_move_insn (operands[2], operands[1]); }) (define_peephole2 @@ -14662,7 +14737,7 @@ [(const_int 0)] { sh_check_add_incdec_notes (emit_insn (gen_extendsi2 (operands[2], - operands[1]))); + sh_remove_overlapping_post_inc (operands[2], operands[1])))); }) ;; mov.w @(18,r1),r0 (r0 = HImode) @@ -14692,8 +14767,9 @@ // We don't know what the new set insn will be in detail. Just make sure // that it still can be recognized and the constraints are satisfied. - rtx_insn* i = emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[3])); - + rtx_insn* i = emit_insn (gen_rtx_SET (VOIDmode, operands[2], + sh_remove_overlapping_post_inc (operands[2], operands[3]))); + recog_data_d prev_recog_data = recog_data; bool i_invalid = insn_invalid_p (i, false); recog_data = prev_recog_data; @@ -14731,7 +14807,8 @@ { // We don't know what the new set insn will be in detail. Just make sure // that it still can be recognized and the constraints are satisfied. - rtx_insn* i = emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[3])); + rtx_insn* i = emit_insn (gen_rtx_SET (VOIDmode, operands[2], + sh_remove_overlapping_post_inc (operands[2], operands[3]))); recog_data_d prev_recog_data = recog_data; bool i_invalid = insn_invalid_p (i, false); diff -Naur gcc-5.2.0.orig/gcc/config/sh/sh-protos.h gcc-5.2.0/gcc/config/sh/sh-protos.h --- gcc-5.2.0.orig/gcc/config/sh/sh-protos.h 2015-02-26 13:14:23.000000000 -0600 +++ gcc-5.2.0/gcc/config/sh/sh-protos.h 2015-09-28 08:44:32.077610000 -0500 @@ -93,6 +93,7 @@ extern rtx sh_fsca_int2sf (void); /* Declare functions defined in sh.c and used in templates. */ +extern bool sh_lra_p (void); extern const char *output_branch (int, rtx_insn *, rtx *); extern const char *output_ieee_ccmpeq (rtx_insn *, rtx *); @@ -191,18 +192,19 @@ if (!REG_P (reg) || insn == NULL_RTX) return result; - rtx_insn* previnsn = insn; - - for (result.insn = stepfunc (insn); result.insn != NULL_RTX; - previnsn = result.insn, result.insn = stepfunc (result.insn)) + for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i)) { - if (BARRIER_P (result.insn)) + if (BARRIER_P (i)) break; - if (!NONJUMP_INSN_P (result.insn)) - continue; - if (reg_set_p (reg, result.insn)) + if (!INSN_P (i) || DEBUG_INSN_P (i)) + continue; + if (reg_set_p (reg, i)) { - result.set_rtx = set_of (reg, result.insn); + if (CALL_P (i)) + break; + + result.insn = i; + result.set_rtx = set_of (reg, i); if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET) break; @@ -225,12 +227,6 @@ } } - /* If the loop above stopped at the first insn in the list, - result.insn will be null. Use the insn from the previous iteration - in this case. */ - if (result.insn == NULL) - result.insn = previnsn; - if (result.set_src != NULL) gcc_assert (result.insn != NULL && result.set_rtx != NULL); @@ -310,6 +306,8 @@ extern bool sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno); extern void sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno); extern rtx_insn* sh_check_add_incdec_notes (rtx_insn* i); +extern rtx sh_remove_overlapping_post_inc (rtx dst, rtx src); +extern rtx_insn* sh_peephole_emit_move_insn (rtx dst, rtx src); extern bool sh_in_recog_treg_set_expr (void); extern bool sh_recog_treg_set_expr (rtx op, machine_mode mode); diff -Naur gcc-5.2.0.orig/gcc/config/sol2.h gcc-5.2.0/gcc/config/sol2.h --- gcc-5.2.0.orig/gcc/config/sol2.h 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/sol2.h 2015-10-01 07:01:18.897040000 -0500 @@ -154,21 +154,41 @@ #define STARTFILE_ARCH_SPEC "%{ansi:values-Xc.o%s} \ %{!ansi:values-Xa.o%s}" +#if defined(HAVE_LD_PIE) && defined(HAVE_SOLARIS_CRTS) +#define STARTFILE_CRTBEGIN_SPEC "%{shared|pie:crtbeginS.o%s;:crtbegin.o%s}" +#else +#define STARTFILE_CRTBEGIN_SPEC "crtbegin.o%s" +#endif + /* We don't use the standard svr4 STARTFILE_SPEC because it's wrong for us. */ #undef STARTFILE_SPEC -#define STARTFILE_SPEC "%{!shared: \ - %{!symbolic: \ - %{p:mcrt1.o%s} \ - %{!p: \ - %{pg:gcrt1.o%s gmon.o%s} \ - %{!pg:crt1.o%s}}}} \ - crti.o%s %(startfile_arch) \ - crtbegin.o%s" +#ifdef HAVE_SOLARIS_CRTS +/* Since Solaris 11.x and Solaris 12, the OS delivers crt1.o, crti.o, and + crtn.o, with a hook for compiler-dependent stuff like profile handling. */ +#define STARTFILE_SPEC "%{!shared:%{!symbolic: \ + crt1.o%s \ + %{p:%e-p is not supported; \ + pg:crtpg.o%s gmon.o%s; \ + :crtp.o%s}}} \ + crti.o%s %(startfile_arch) %(startfile_crtbegin)" +#else +#define STARTFILE_SPEC "%{!shared:%{!symbolic: \ + %{p:mcrt1.o%s; \ + pg:gcrt1.o%s gmon.o%s; \ + :crt1.o%s}}} \ + crti.o%s %(startfile_arch) %(startfile_crtbegin)" +#endif + +#if defined(HAVE_LD_PIE) && defined(HAVE_SOLARIS_CRTS) +#define ENDFILE_CRTEND_SPEC "%{shared|pie:crtendS.o%s;:crtend.o%s}" +#else +#define ENDFILE_CRTEND_SPEC "crtend.o%s" +#endif #undef ENDFILE_SPEC #define ENDFILE_SPEC \ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ - crtend.o%s crtn.o%s" + %(endfile_arch) %(endfile_crtend) crtn.o%s" #undef LINK_ARCH32_SPEC_BASE #define LINK_ARCH32_SPEC_BASE \ @@ -241,11 +261,14 @@ #undef SUBTARGET_EXTRA_SPECS #define SUBTARGET_EXTRA_SPECS \ - { "startfile_arch", STARTFILE_ARCH_SPEC }, \ - { "link_arch32", LINK_ARCH32_SPEC }, \ - { "link_arch64", LINK_ARCH64_SPEC }, \ - { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ - { "link_arch", LINK_ARCH_SPEC }, \ + { "startfile_arch", STARTFILE_ARCH_SPEC }, \ + { "startfile_crtbegin", STARTFILE_CRTBEGIN_SPEC }, \ + { "link_arch32", LINK_ARCH32_SPEC }, \ + { "link_arch64", LINK_ARCH64_SPEC }, \ + { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ + { "link_arch", LINK_ARCH_SPEC }, \ + { "endfile_arch", ENDFILE_ARCH_SPEC }, \ + { "endfile_crtend", ENDFILE_CRTEND_SPEC }, \ SUBTARGET_CPU_EXTRA_SPECS /* C++11 programs need -lrt for nanosleep. */ @@ -300,6 +323,20 @@ #endif /* HAVE_LD_EH_FRAME && TARGET_DL_ITERATE_PHDR */ #endif +#if defined(HAVE_LD_PIE) && defined(HAVE_SOLARIS_CRTS) +#ifdef USE_GLD +/* Assert -z text by default to match Solaris ld. */ +#define LINK_PIE_SPEC "%{pie:-pie %{!mimpure-text:-z text}} " +#else +/* Solaris ld needs -z type=pie instead of -pie. */ +#define LINK_PIE_SPEC "%{pie:-z type=pie %{mimpure-text:-z textoff}} " +#endif +#else +/* Error out if some part of PIE support is missing. */ +#define LINK_PIE_SPEC \ + "%{no-pie:} %{pie:%e-pie is not supported in this configuration} " +#endif + /* collect2.c can only parse GNU nm -n output. Solaris nm needs -png to produce the same format. */ #define NM_FLAGS "-png" diff -Naur gcc-5.2.0.orig/gcc/config/sparc/driver-sparc.c gcc-5.2.0/gcc/config/sparc/driver-sparc.c --- gcc-5.2.0.orig/gcc/config/sparc/driver-sparc.c 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/sparc/driver-sparc.c 2015-09-28 01:24:21.352655000 -0500 @@ -73,6 +73,7 @@ { "UltraSparc T2", "niagara2" }, { "UltraSparc T3", "niagara3" }, { "UltraSparc T4", "niagara4" }, + { "LEON", "leon3" }, #endif { NULL, NULL } }; diff -Naur gcc-5.2.0.orig/gcc/config/sparc/sol2.h gcc-5.2.0/gcc/config/sparc/sol2.h --- gcc-5.2.0.orig/gcc/config/sparc/sol2.h 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/sparc/sol2.h 2015-10-01 07:01:18.897040000 -0500 @@ -280,6 +280,8 @@ #define SUBTARGET_CPU_EXTRA_SPECS +#define ENDFILE_ARCH_SPEC "" + /* Register the Solaris-specific #pragma directives. */ diff -Naur gcc-5.2.0.orig/gcc/config/sparc/sparc.c gcc-5.2.0/gcc/config/sparc/sparc.c --- gcc-5.2.0.orig/gcc/config/sparc/sparc.c 2015-06-11 10:58:32.000000000 -0500 +++ gcc-5.2.0/gcc/config/sparc/sparc.c 2015-09-28 01:21:05.639984000 -0500 @@ -7455,7 +7455,7 @@ static bool sparc_function_value_regno_p (const unsigned int regno) { - return (regno == 8 || regno == 32); + return (regno == 8 || (TARGET_FPU && regno == 32)); } /* Do what is necessary for `va_start'. We look at the current function @@ -11678,9 +11678,8 @@ if (before_after & 1) { - if (model == MEMMODEL_RELEASE - || model == MEMMODEL_ACQ_REL - || model == MEMMODEL_SEQ_CST) + if (is_mm_release (model) || is_mm_acq_rel (model) + || is_mm_seq_cst (model)) { if (load_store & 1) mm |= LoadLoad | StoreLoad; @@ -11690,9 +11689,8 @@ } if (before_after & 2) { - if (model == MEMMODEL_ACQUIRE - || model == MEMMODEL_ACQ_REL - || model == MEMMODEL_SEQ_CST) + if (is_mm_acquire (model) || is_mm_acq_rel (model) + || is_mm_seq_cst (model)) { if (load_store & 1) mm |= LoadLoad | LoadStore; diff -Naur gcc-5.2.0.orig/gcc/config/sparc/sparc.md gcc-5.2.0/gcc/config/sparc/sparc.md --- gcc-5.2.0.orig/gcc/config/sparc/sparc.md 2015-06-11 10:58:32.000000000 -0500 +++ gcc-5.2.0/gcc/config/sparc/sparc.md 2015-09-28 01:21:05.639984000 -0500 @@ -6398,7 +6398,6 @@ "" { rtx valreg1 = gen_rtx_REG (DImode, 8); - rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32); rtx result = operands[1]; /* Pass constm1 to indicate that it may expect a structure value, but @@ -6407,8 +6406,12 @@ /* Save the function value registers. */ emit_move_insn (adjust_address (result, DImode, 0), valreg1); - emit_move_insn (adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8), - valreg2); + if (TARGET_FPU) + { + rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32); + emit_move_insn (adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8), + valreg2); + } /* The optimizer does not know that the call sets the function value registers we stored in the result block. We avoid problems by @@ -6620,7 +6623,6 @@ "" { rtx valreg1 = gen_rtx_REG (DImode, 24); - rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32); rtx result = operands[0]; if (! TARGET_ARCH64) @@ -6637,14 +6639,18 @@ emit_insn (gen_update_return (rtnreg, value)); } - /* Reload the function value registers. */ + /* Reload the function value registers. + Put USE insns before the return. */ emit_move_insn (valreg1, adjust_address (result, DImode, 0)); - emit_move_insn (valreg2, - adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8)); - - /* Put USE insns before the return. */ emit_use (valreg1); - emit_use (valreg2); + + if (TARGET_FPU) + { + rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32); + emit_move_insn (valreg2, + adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8)); + emit_use (valreg2); + } /* Construct the return. */ expand_naked_return (); diff -Naur gcc-5.2.0.orig/gcc/config/sparc/sparc.opt gcc-5.2.0/gcc/config/sparc/sparc.opt --- gcc-5.2.0.orig/gcc/config/sparc/sparc.opt 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/sparc/sparc.opt 2015-09-28 01:22:41.955493000 -0500 @@ -114,8 +114,8 @@ Optimize tail call instructions in assembler and linker muser-mode -Target Report Mask(USER_MODE) -Do not generate code that can only run in supervisor mode +Target Report InverseMask(SV_MODE) +Do not generate code that can only run in supervisor mode (default) mcpu= Target RejectNegative Joined Var(sparc_cpu_and_features) Enum(sparc_processor_type) Init(PROCESSOR_V7) diff -Naur gcc-5.2.0.orig/gcc/config/sparc/sync.md gcc-5.2.0/gcc/config/sparc/sync.md --- gcc-5.2.0.orig/gcc/config/sparc/sync.md 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/sparc/sync.md 2015-09-28 01:22:41.955493000 -0500 @@ -222,10 +222,10 @@ UNSPECV_CAS))] "TARGET_LEON3" { - if (TARGET_USER_MODE) - return "casa\t%1 0xa, %2, %0"; /* ASI for user data space. */ - else + if (TARGET_SV_MODE) return "casa\t%1 0xb, %2, %0"; /* ASI for supervisor data space. */ + else + return "casa\t%1 0xa, %2, %0"; /* ASI for user data space. */ } [(set_attr "type" "multi")]) diff -Naur gcc-5.2.0.orig/gcc/config/sparc/t-rtems gcc-5.2.0/gcc/config/sparc/t-rtems --- gcc-5.2.0.orig/gcc/config/sparc/t-rtems 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/config/sparc/t-rtems 2015-09-28 10:05:38.974299000 -0500 @@ -17,15 +17,20 @@ # . # -MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3/mcpu=leon3v7 muser-mode -MULTILIB_DIRNAMES = soft v8 leon3 leon3v7 user-mode +MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3/mcpu=leon3v7/mcpu=leon \ + mfix-ut699/mfix-at697f +MULTILIB_DIRNAMES = soft v8 leon3 leon3v7 leon ut699 at697f MULTILIB_MATCHES = msoft-float=mno-fpu -MULTILIB_EXCEPTIONS = muser-mode -MULTILIB_EXCEPTIONS += mcpu=leon3 -MULTILIB_EXCEPTIONS += mcpu=leon3v7 -MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3 -MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3v7 -MULTILIB_EXCEPTIONS += msoft-float/muser-mode -MULTILIB_EXCEPTIONS += msoft-float/mcpu=v8/muser-mode -MULTILIB_EXCEPTIONS += mcpu=v8/muser-mode +MULTILIB_EXCEPTIONS = mfix-ut699 +MULTILIB_EXCEPTIONS += msoft-float/mfix-ut699 +MULTILIB_EXCEPTIONS += msoft-float/mcpu=v8/mfix-ut699 +MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3*/mfix-ut699 +MULTILIB_EXCEPTIONS += mcpu=v8/mfix-ut699 +MULTILIB_EXCEPTIONS += mcpu=leon3*/mfix-ut699 +MULTILIB_EXCEPTIONS += mfix-at697f +MULTILIB_EXCEPTIONS += msoft-float/mfix-at697f +MULTILIB_EXCEPTIONS += msoft-float/mcpu=v8/mfix-at697f +MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3*/mfix-at697f +MULTILIB_EXCEPTIONS += mcpu=v8/mfix-at697f +MULTILIB_EXCEPTIONS += mcpu=leon3*/mfix-at697f diff -Naur gcc-5.2.0.orig/gcc/config.gcc gcc-5.2.0/gcc/config.gcc --- gcc-5.2.0.orig/gcc/config.gcc 2015-07-02 10:53:33.000000000 -0500 +++ gcc-5.2.0/gcc/config.gcc 2015-09-10 09:17:53.714149000 -0500 @@ -819,6 +819,12 @@ sol2_tm_file_head="dbxelf.h elfos.h ${cpu_type}/sysv4.h" sol2_tm_file_tail="${cpu_type}/sol2.h sol2.h" sol2_tm_file="${sol2_tm_file_head} ${sol2_tm_file_tail}" + case ${target} in + *-*-solaris2.1[2-9]*) + # __cxa_atexit was introduced in Solaris 12. + default_use_cxa_atexit=yes + ;; + esac use_gcc_stdint=wrap if test x$gnu_ld = xyes; then tm_file="usegld.h ${tm_file}" diff -Naur gcc-5.2.0.orig/gcc/config.in gcc-5.2.0/gcc/config.in --- gcc-5.2.0.orig/gcc/config.in 2015-07-16 04:16:25.000000000 -0500 +++ gcc-5.2.0/gcc/config.in 2015-10-12 05:59:21.185205000 -0500 @@ -1313,6 +1313,12 @@ #endif +/* Define if isl_options_set_schedule_serialize_sccs exists. */ +#ifndef USED_FOR_TARGET +#undef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS +#endif + + /* Define if isl_schedule_constraints_compute_schedule exists. */ #ifndef USED_FOR_TARGET #undef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE @@ -1423,7 +1429,7 @@ #endif -/* Define if your linker supports -pie option. */ +/* Define if your linker supports PIE option. */ #ifndef USED_FOR_TARGET #undef HAVE_LD_PIE #endif @@ -1580,6 +1586,12 @@ #endif +/* Define if the system-provided CRTs are present on Solaris. */ +#ifndef USED_FOR_TARGET +#undef HAVE_SOLARIS_CRTS +#endif + + /* Define to 1 if you have the header file. */ #ifndef USED_FOR_TARGET #undef HAVE_STDDEF_H diff -Naur gcc-5.2.0.orig/gcc/configure gcc-5.2.0/gcc/configure --- gcc-5.2.0.orig/gcc/configure 2015-07-03 12:00:49.000000000 -0500 +++ gcc-5.2.0/gcc/configure 2015-10-12 05:59:21.185205000 -0500 @@ -27141,15 +27141,37 @@ $as_echo_n "checking linker position independent executable support... " >&6; } gcc_cv_ld_pie=no if test $in_tree_ld = yes ; then - if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 15 -o "$gcc_cv_gld_major_version" -gt 2 \ + case "$target" in + # Full PIE support on Solaris was only introduced in gld 2.26. + *-*-solaris2*) gcc_gld_pie_min_version=26 ;; + *) gcc_gld_pie_min_version=15 ;; + esac + if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge "$gcc_gld_pie_min_version" -o "$gcc_cv_gld_major_version" -gt 2 \ && test $in_tree_ld_is_elf = yes; then gcc_cv_ld_pie=yes fi elif test x$gcc_cv_ld != x; then - # Check if linker supports -pie option - if $gcc_cv_ld --help 2>/dev/null | grep -- -pie > /dev/null; then - gcc_cv_ld_pie=yes + # Check if linker supports -pie option + if $gcc_cv_ld --help 2>/dev/null | grep -- -pie > /dev/null; then + gcc_cv_ld_pie=yes + case "$target" in + *-*-solaris2*) + if echo "$ld_ver" | grep GNU > /dev/null \ + && test "$ld_vers_major" -eq 2 -a "$ld_vers_minor" -lt 26; then + gcc_cv_ld_pie=no fi + ;; + esac + else + case "$target" in + *-*-solaris2.1[1-9]*) + # Solaris 11.x and Solaris 12 added PIE support. + if $gcc_cv_ld -z help 2>&1 | grep -- type.*pie > /dev/null; then + gcc_cv_ld_pie=yes + fi + ;; + esac + fi fi if test x"$gcc_cv_ld_pie" = xyes; then @@ -27734,6 +27756,44 @@ fi +case $target in +*-*-solaris2*) + # Check for system-provided CRTs on Solaris 11.x and Solaris 12. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking system-provided CRTs on Solaris" >&5 +$as_echo_n "checking system-provided CRTs on Solaris... " >&6; } +if test "${gcc_cv_solaris_crts+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_solaris_crts=no + if test x$host != x$target; then + if test "x$with_sysroot" = xyes; then + target_sysroot="${test_exec_prefix}/${target_noncanonical}/sys-root" + else + target_sysroot="${with_sysroot}" + fi + fi + target_libdir="$target_sysroot/usr/lib" + # At the time they were added, gcrt1.o became a symlink for backwards + # compatibility on x86, while crt1.o was added on sparc, so check for that. + case $target in + i?86-*-solaris2* | x86_64-*-solaris2*) + if test -h "$target_libdir/gcrt1.o"; then gcc_cv_solaris_crts=yes; fi + ;; + sparc*-*-solaris2*) + if test -f "$target_libdir/crt1.o"; then gcc_cv_solaris_crts=yes; fi + ;; + esac +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_solaris_crts" >&5 +$as_echo "$gcc_cv_solaris_crts" >&6; } + ;; +esac +if test x$gcc_cv_solaris_crts = xyes; then + +$as_echo "#define HAVE_SOLARIS_CRTS 1" >>confdefs.h + +fi + # Test for stack protector support in target C library. { $as_echo "$as_me:${as_lineno-$LINENO}: checking __stack_chk_fail in target C library" >&5 $as_echo_n "checking __stack_chk_fail in target C library... " >&6; } @@ -28245,6 +28305,8 @@ # Check whether isl_schedule_constraints_compute_schedule is available; # it's new in ISL-0.13. +# Check whether isl_options_set_schedule_serialize_sccs is available; +# it's new in ISL-0.15. if test "x${ISLLIBS}" != "x" ; then saved_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $ISLINC" @@ -28274,6 +28336,29 @@ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_has_isl_schedule_constraints_compute_schedule" >&5 $as_echo "$ac_has_isl_schedule_constraints_compute_schedule" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking Checking for isl_options_set_schedule_serialize_sccs" >&5 +$as_echo_n "checking Checking for isl_options_set_schedule_serialize_sccs... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +isl_options_set_schedule_serialize_sccs (NULL, 0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_has_isl_options_set_schedule_serialize_sccs=yes +else + ac_has_isl_options_set_schedule_serialize_sccs=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_has_isl_options_set_schedule_serialize_sccs" >&5 +$as_echo "$ac_has_isl_options_set_schedule_serialize_sccs" >&6; } + LIBS="$saved_LIBS" CFLAGS="$saved_CFLAGS" @@ -28282,6 +28367,12 @@ $as_echo "#define HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE 1" >>confdefs.h fi + + if test x"$ac_has_isl_options_set_schedule_serialize_sccs" = x"yes"; then + +$as_echo "#define HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS 1" >>confdefs.h + + fi fi # Check for plugin support diff -Naur gcc-5.2.0.orig/gcc/configure.ac gcc-5.2.0/gcc/configure.ac --- gcc-5.2.0.orig/gcc/configure.ac 2015-07-03 12:00:49.000000000 -0500 +++ gcc-5.2.0/gcc/configure.ac 2015-10-12 05:59:21.185205000 -0500 @@ -4719,19 +4719,41 @@ AC_MSG_CHECKING(linker position independent executable support) gcc_cv_ld_pie=no if test $in_tree_ld = yes ; then - if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 15 -o "$gcc_cv_gld_major_version" -gt 2 \ + case "$target" in + # Full PIE support on Solaris was only introduced in gld 2.26. + *-*-solaris2*) gcc_gld_pie_min_version=26 ;; + *) gcc_gld_pie_min_version=15 ;; + esac + if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge "$gcc_gld_pie_min_version" -o "$gcc_cv_gld_major_version" -gt 2 \ && test $in_tree_ld_is_elf = yes; then gcc_cv_ld_pie=yes fi elif test x$gcc_cv_ld != x; then - # Check if linker supports -pie option - if $gcc_cv_ld --help 2>/dev/null | grep -- -pie > /dev/null; then - gcc_cv_ld_pie=yes + # Check if linker supports -pie option + if $gcc_cv_ld --help 2>/dev/null | grep -- -pie > /dev/null; then + gcc_cv_ld_pie=yes + case "$target" in + *-*-solaris2*) + if echo "$ld_ver" | grep GNU > /dev/null \ + && test "$ld_vers_major" -eq 2 -a "$ld_vers_minor" -lt 26; then + gcc_cv_ld_pie=no + fi + ;; + esac + else + case "$target" in + *-*-solaris2.1[[1-9]]*) + # Solaris 11.x and Solaris 12 added PIE support. + if $gcc_cv_ld -z help 2>&1 | grep -- type.*pie > /dev/null; then + gcc_cv_ld_pie=yes fi + ;; + esac + fi fi if test x"$gcc_cv_ld_pie" = xyes; then AC_DEFINE(HAVE_LD_PIE, 1, -[Define if your linker supports -pie option.]) +[Define if your linker supports PIE option.]) fi AC_MSG_RESULT($gcc_cv_ld_pie) @@ -5224,6 +5246,37 @@ [Define if your linker supports --sysroot.]) fi +case $target in +*-*-solaris2*) + # Check for system-provided CRTs on Solaris 11.x and Solaris 12. + AC_CACHE_CHECK([system-provided CRTs on Solaris], + gcc_cv_solaris_crts, + [gcc_cv_solaris_crts=no + if test x$host != x$target; then + if test "x$with_sysroot" = xyes; then + target_sysroot="${test_exec_prefix}/${target_noncanonical}/sys-root" + else + target_sysroot="${with_sysroot}" + fi + fi + target_libdir="$target_sysroot/usr/lib" + # At the time they were added, gcrt1.o became a symlink for backwards + # compatibility on x86, while crt1.o was added on sparc, so check for that. + case $target in + i?86-*-solaris2* | x86_64-*-solaris2*) + if test -h "$target_libdir/gcrt1.o"; then gcc_cv_solaris_crts=yes; fi + ;; + sparc*-*-solaris2*) + if test -f "$target_libdir/crt1.o"; then gcc_cv_solaris_crts=yes; fi + ;; + esac]) + ;; +esac +if test x$gcc_cv_solaris_crts = xyes; then + AC_DEFINE(HAVE_SOLARIS_CRTS, 1, + [Define if the system-provided CRTs are present on Solaris.]) +fi + # Test for stack protector support in target C library. AC_CACHE_CHECK(__stack_chk_fail in target C library, gcc_cv_libc_provides_ssp, @@ -5693,6 +5746,8 @@ # Check whether isl_schedule_constraints_compute_schedule is available; # it's new in ISL-0.13. +# Check whether isl_options_set_schedule_serialize_sccs is available; +# it's new in ISL-0.15. if test "x${ISLLIBS}" != "x" ; then saved_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $ISLINC" @@ -5706,6 +5761,13 @@ [ac_has_isl_schedule_constraints_compute_schedule=no]) AC_MSG_RESULT($ac_has_isl_schedule_constraints_compute_schedule) + AC_MSG_CHECKING([Checking for isl_options_set_schedule_serialize_sccs]) + AC_TRY_LINK([#include ], + [isl_options_set_schedule_serialize_sccs (NULL, 0);], + [ac_has_isl_options_set_schedule_serialize_sccs=yes], + [ac_has_isl_options_set_schedule_serialize_sccs=no]) + AC_MSG_RESULT($ac_has_isl_options_set_schedule_serialize_sccs) + LIBS="$saved_LIBS" CFLAGS="$saved_CFLAGS" @@ -5713,6 +5775,11 @@ AC_DEFINE(HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE, 1, [Define if isl_schedule_constraints_compute_schedule exists.]) fi + + if test x"$ac_has_isl_options_set_schedule_serialize_sccs" = x"yes"; then + AC_DEFINE(HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS, 1, + [Define if isl_options_set_schedule_serialize_sccs exists.]) + fi fi GCC_ENABLE_PLUGINS diff -Naur gcc-5.2.0.orig/gcc/coretypes.h gcc-5.2.0/gcc/coretypes.h --- gcc-5.2.0.orig/gcc/coretypes.h 2015-01-05 06:33:28.000000000 -0600 +++ gcc-5.2.0/gcc/coretypes.h 2015-08-05 06:20:59.983324000 -0500 @@ -263,6 +263,18 @@ function_c11_misc }; +/* Suppose that higher bits are target dependent. */ +#define MEMMODEL_MASK ((1<<16)-1) + +/* Legacy sync operations set this upper flag in the memory model. This allows + targets that need to do something stronger for sync operations to + differentiate with their target patterns and issue a more appropriate insn + sequence. See bugzilla 65697 for background. */ +#define MEMMODEL_SYNC (1<<15) + +/* Memory model without SYNC bit for targets/operations that do not care. */ +#define MEMMODEL_BASE_MASK (MEMMODEL_SYNC-1) + /* Memory model types for the __atomic* builtins. This must match the order in libstdc++-v3/include/bits/atomic_base.h. */ enum memmodel @@ -273,12 +285,12 @@ MEMMODEL_RELEASE = 3, MEMMODEL_ACQ_REL = 4, MEMMODEL_SEQ_CST = 5, - MEMMODEL_LAST = 6 + MEMMODEL_LAST = 6, + MEMMODEL_SYNC_ACQUIRE = MEMMODEL_ACQUIRE | MEMMODEL_SYNC, + MEMMODEL_SYNC_RELEASE = MEMMODEL_RELEASE | MEMMODEL_SYNC, + MEMMODEL_SYNC_SEQ_CST = MEMMODEL_SEQ_CST | MEMMODEL_SYNC }; -/* Suppose that higher bits are target dependent. */ -#define MEMMODEL_MASK ((1<<16)-1) - /* Support for user-provided GGC and PCH markers. The first parameter is a pointer to a pointer, the second a cookie. */ typedef void (*gt_pointer_operator) (void *, void *); diff -Naur gcc-5.2.0.orig/gcc/cp/call.c gcc-5.2.0/gcc/cp/call.c --- gcc-5.2.0.orig/gcc/cp/call.c 2015-06-23 19:53:02.000000000 -0500 +++ gcc-5.2.0/gcc/cp/call.c 2015-10-21 04:27:12.283863000 -0500 @@ -7002,6 +7002,39 @@ return r; } +/* Return true iff T refers to a base field. */ + +static bool +is_base_field_ref (tree t) +{ + STRIP_NOPS (t); + if (TREE_CODE (t) == ADDR_EXPR) + t = TREE_OPERAND (t, 0); + if (TREE_CODE (t) == COMPONENT_REF) + t = TREE_OPERAND (t, 1); + if (TREE_CODE (t) == FIELD_DECL) + return DECL_FIELD_IS_BASE (t); + return false; +} + +/* We can't elide a copy from a function returning by value to a base + subobject, as the callee might clobber tail padding. Return true iff this + could be that case. */ + +static bool +unsafe_copy_elision_p (tree target, tree exp) +{ + tree type = TYPE_MAIN_VARIANT (TREE_TYPE (exp)); + if (type == CLASSTYPE_AS_BASE (type)) + return false; + if (!is_base_field_ref (target) + && resolves_to_fixed_type_p (target, NULL)) + return false; + tree init = TARGET_EXPR_INITIAL (exp); + return (TREE_CODE (init) == AGGR_INIT_EXPR + && !AGGR_INIT_VIA_CTOR_P (init)); +} + /* Subroutine of the various build_*_call functions. Overload resolution has chosen a winning candidate CAND; build up a CALL_EXPR accordingly. ARGS is a TREE_LIST of the unconverted arguments to the call. FLAGS is a @@ -7419,7 +7452,9 @@ else if (trivial) return force_target_expr (DECL_CONTEXT (fn), arg, complain); } - else if (TREE_CODE (arg) == TARGET_EXPR || trivial) + else if (trivial + || (TREE_CODE (arg) == TARGET_EXPR + && !unsafe_copy_elision_p (fa, arg))) { tree to = stabilize_reference (cp_build_indirect_ref (fa, RO_NULL, complain)); diff -Naur gcc-5.2.0.orig/gcc/cp/class.c gcc-5.2.0/gcc/cp/class.c --- gcc-5.2.0.orig/gcc/cp/class.c 2015-07-01 12:06:52.000000000 -0500 +++ gcc-5.2.0/gcc/cp/class.c 2015-08-17 10:44:35.750755000 -0500 @@ -1989,14 +1989,23 @@ if (!t) return; + tree attrs = TYPE_ATTRIBUTES (t); + unsigned align = TYPE_ALIGN (t); + bool user_align = TYPE_USER_ALIGN (t); + for (variants = TYPE_NEXT_VARIANT (t); variants; variants = TYPE_NEXT_VARIANT (variants)) { /* These are the two fields that check_qualified_type looks at and are affected by attributes. */ - TYPE_ATTRIBUTES (variants) = TYPE_ATTRIBUTES (t); - TYPE_ALIGN (variants) = TYPE_ALIGN (t); + TYPE_ATTRIBUTES (variants) = attrs; + unsigned valign = align; + if (TYPE_USER_ALIGN (variants)) + valign = MAX (valign, TYPE_ALIGN (variants)); + else + TYPE_USER_ALIGN (variants) = user_align; + TYPE_ALIGN (variants) = valign; } } diff -Naur gcc-5.2.0.orig/gcc/cp/constexpr.c gcc-5.2.0/gcc/cp/constexpr.c --- gcc-5.2.0.orig/gcc/cp/constexpr.c 2015-06-19 13:24:24.000000000 -0500 +++ gcc-5.2.0/gcc/cp/constexpr.c 2015-08-17 10:44:29.687207000 -0500 @@ -1750,7 +1750,38 @@ VERIFY_CONSTANT (ary); gcc_unreachable (); } - if (compare_tree_int (index, len) >= 0) + + i = tree_to_shwi (index); + bool found = true; + if (TREE_CODE (ary) == CONSTRUCTOR && len + && (TREE_CODE (CONSTRUCTOR_ELT (ary, len-1)->index) == RANGE_EXPR + || compare_tree_int (CONSTRUCTOR_ELT (ary, len-1)->index, len-1))) + { + /* The last element doesn't match its position in the array; this must be + a sparse array from cxx_eval_store_expression. So iterate. */ + found = false; + vec *v = CONSTRUCTOR_ELTS (ary); + constructor_elt *e; + for (unsigned ix = 0; vec_safe_iterate (v, ix, &e); ++ix) + { + if (TREE_CODE (e->index) == RANGE_EXPR) + { + tree lo = TREE_OPERAND (e->index, 0); + tree hi = TREE_OPERAND (e->index, 1); + if (tree_int_cst_le (lo, index) && tree_int_cst_le (index, hi)) + found = true; + } + else if (tree_int_cst_equal (e->index, index)) + found = true; + if (found) + { + i = ix; + break; + } + } + } + + if (i >= len || !found) { if (tree_int_cst_lt (index, array_type_nelts_top (TREE_TYPE (ary)))) { @@ -1767,14 +1798,14 @@ *non_constant_p = true; return t; } - else if (tree_int_cst_lt (index, integer_zero_node)) + else if (i < 0) { if (!ctx->quiet) error ("negative array subscript"); *non_constant_p = true; return t; } - i = tree_to_shwi (index); + if (TREE_CODE (ary) == CONSTRUCTOR) return (*CONSTRUCTOR_ELTS (ary))[i].value; else if (elem_nchars == 1) diff -Naur gcc-5.2.0.orig/gcc/cp/cp-tree.h gcc-5.2.0/gcc/cp/cp-tree.h --- gcc-5.2.0.orig/gcc/cp/cp-tree.h 2015-06-24 15:24:01.000000000 -0500 +++ gcc-5.2.0/gcc/cp/cp-tree.h 2015-08-05 23:10:01.554759000 -0500 @@ -5762,6 +5762,7 @@ extern tree instantiate_non_dependent_expr (tree); extern tree instantiate_non_dependent_expr_sfinae (tree, tsubst_flags_t); extern tree instantiate_non_dependent_expr_internal (tree, tsubst_flags_t); +extern bool variable_template_specialization_p (tree); extern bool alias_type_or_template_p (tree); extern bool alias_template_specialization_p (const_tree); extern bool dependent_alias_template_spec_p (const_tree); @@ -5944,7 +5945,7 @@ tsubst_flags_t); extern tree finish_call_expr (tree, vec **, bool, bool, tsubst_flags_t); -extern tree finish_template_variable (tree); +extern tree finish_template_variable (tree, tsubst_flags_t = tf_warning_or_error); extern tree finish_increment_expr (tree, enum tree_code); extern tree finish_this_expr (void); extern tree finish_pseudo_destructor_expr (tree, tree, tree, location_t); diff -Naur gcc-5.2.0.orig/gcc/cp/cvt.c gcc-5.2.0/gcc/cp/cvt.c --- gcc-5.2.0.orig/gcc/cp/cvt.c 2015-04-22 15:53:02.000000000 -0500 +++ gcc-5.2.0/gcc/cp/cvt.c 2015-08-14 11:33:10.424885000 -0500 @@ -695,7 +695,8 @@ } /* FIXME remove when moving to c_fully_fold model. */ - e = scalar_constant_value (e); + if (!CLASS_TYPE_P (type)) + e = scalar_constant_value (e); if (error_operand_p (e)) return error_mark_node; diff -Naur gcc-5.2.0.orig/gcc/cp/decl2.c gcc-5.2.0/gcc/cp/decl2.c --- gcc-5.2.0.orig/gcc/cp/decl2.c 2015-06-05 11:25:26.000000000 -0500 +++ gcc-5.2.0/gcc/cp/decl2.c 2015-10-12 08:15:30.542303000 -0500 @@ -1742,6 +1742,9 @@ tree fnaddr; unsigned HOST_WIDE_INT idx; + /* It's OK for the vtable to refer to deprecated virtual functions. */ + warning_sentinel w(warn_deprecated_decl); + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (DECL_INITIAL (decl)), idx, fnaddr) { @@ -4232,8 +4235,12 @@ TYPE_NAME (t)); } else if (cxx_dialect >= cxx11) - permerror (DECL_SOURCE_LOCATION (decl), "%q#D, declared using local type " - "%qT, is used but never defined", decl, t); + { + if (TREE_CODE (decl) == VAR_DECL || !DECL_PURE_VIRTUAL_P (decl)) + permerror (DECL_SOURCE_LOCATION (decl), + "%q#D, declared using local type " + "%qT, is used but never defined", decl, t); + } else if (TREE_CODE (decl) == VAR_DECL) warning_at (DECL_SOURCE_LOCATION (decl), 0, "type %qT with no linkage " "used to declare variable %q#D with linkage", t, decl); diff -Naur gcc-5.2.0.orig/gcc/cp/error.c gcc-5.2.0/gcc/cp/error.c --- gcc-5.2.0.orig/gcc/cp/error.c 2015-01-09 14:18:42.000000000 -0600 +++ gcc-5.2.0/gcc/cp/error.c 2015-08-12 13:05:49.608889000 -0500 @@ -1230,7 +1230,8 @@ if (args == error_mark_node) pp_string (pp, M_("