// ---------------------------------------------------------------------------
// PARAMETERS
// ---------------------------------------------------------------------------
#if N != 4
  #error This specification is only valid when N=4
#endif

#ifndef dtype
  #error dtype must be defined
#endif

#ifndef rtype
  #error rtype must be defined
#endif

#if defined(BINOP_ADD)
  #define raddf(x,y) (x + y)
  #define raddf_primed(x,y) (x + y)
  #define ridentity 0
#elif defined(BINOP_OR)
  #define raddf(x,y) (x | y)
  #define raddf_primed(x,y) (x | y)
  #define ridentity 0
#elif defined(BINOP_MAX)
  #define raddf(x,y) __ite(x < y, y, x)
  #define raddf_primed(x,y) __ite(x < y, y, x)
  #define ridentity 0
#elif defined(BINOP_ABSTRACT)
  #define raddf(x,y) __add_abstract(x,y)
  #define raddf_primed(x,y) __add_abstract_primed(x,y)
  #define ridentity 0
#else
  #error BINOP_ADD|BINOP_OR|BINOP_MAX|BINOP_ABSTRACT must be defined
#endif

// ---------------------------------------------------------------------------
// HELPERS
// ---------------------------------------------------------------------------
#define __ite_wrapper(ty) __concatenate(__ite_,ty)
#define __ite(b,x,y) __ite_wrapper(rtype)(b,x,y)
#define __non_temporal(x) \
  __non_temporal_loads_begin(), x, __non_temporal_loads_end()

#define div2(x) (x >> 1)
#define iseven(x) ((x & 1) == 0)
#define isone(bit,x) (((x >> bit) & 1) == 1)
#define modpow2(x,y) (x & (y-1))
#define mul2(x) (x << 1)
#define mul2add1(x) (mul2(x) | 1)
#define pow2(bit) (1 << bit)

#define tid get_local_id(0)
#define other_tid __other_int(tid)

#define isvertex(x,offset) ((offset == 0) | (modpow2(x+1,offset) == 0))
#define stopped(x,offset) isvertex(x+offset, mul2(offset))
#define left(x,offset) (x - div2(offset))
#define iselement(x)  ((0 < x) & (x < 4))
#define isthreadid(t) ((0 < t) & (t < 2))

#define ai_idx(offset,tid) ((offset * mul2add1(tid)) - 1)
#define bi_idx(offset,tid) ((offset * (mul2(tid)+2)) - 1)

// ---------------------------------------------------------------------------
// UPSWEEP INVARIANTS
// ---------------------------------------------------------------------------
#define upsweep_core(offset,result,len,x) \
  (__implies((((offset == 1) & isvertex(x,offset)) | ((1 < offset) & stopped(x,1))), result[x] == len[x]) & \
  __implies((((offset == 2) & isvertex(x,offset)) | ((2 < offset) & stopped(x,2))), result[x] == raddf(result[left(x,2)],len[x])) & \
  __implies((((offset == 4) & isvertex(x,offset)) | ((4 < offset) & stopped(x,4))), result[x] == raddf(raddf(result[left(x,4)],result[left(x,2)]),len[x])))

#if defined(INC_ENDSPEC) && defined(BINOP_ADD)
#define upsweep_nooverflow(offset,result,len,x) \
  (__implies((((offset == 1) & isvertex(x,offset)) | ((1 < offset) & stopped(x,1))), __add_noovfl(len[x])) & \
  __implies((((offset == 2) & isvertex(x,offset)) | ((2 < offset) & stopped(x,2))), __add_noovfl(len[x], result[left(x,2)])) & \
  __implies((((offset == 4) & isvertex(x,offset)) | ((4 < offset) & stopped(x,4))), __add_noovfl(len[x], result[left(x,2)], result[left(x,4)])))

#define upsweep(offset,result,len,x) \
  (upsweep_core(offset,result,len,x) & upsweep_nooverflow(offset,result,len,x))
#else
#define upsweep(offset,result,len,x) \
  upsweep_core(offset,result,len,x)
#endif

#define upsweep_barrier(tid,offset,result,len) \
  (__implies((tid < 2) & (offset >= 1), upsweep(offset,result,len,ai_idx(1,tid))) & \
  __implies((tid < 1) & (offset >= 4), upsweep(offset,result,len,ai_idx(2,tid))) & \
  __implies((tid < 2) & (offset <= 2), upsweep(offset,result,len,bi_idx(1,tid))) & \
  __implies((tid < 1) & (offset == 4), upsweep(offset,result,len,bi_idx(2,tid))))

#define upsweep_d_offset \
  ((d == 2 & offset == 1) | (d == 1 & offset == 2) | (d == 0 & offset == 4))

// ---------------------------------------------------------------------------
// DOWNSWEEP INVARIANTS
// ---------------------------------------------------------------------------
#define sum_pow2_zeroes(bit,x) \
  (__ite((0 < bit) & !isone(0,x), pow2(0), 0))

#define term(ghostsum,bit,x) \
  __ite(!isone(bit,x), 0, ghostsum[x + sum_pow2_zeroes(bit,x) - pow2(bit)])

#define downsweep_core(offset,result,ghostsum,x) \
  (result[x] == __ite(isvertex(x,mul2(offset)), raddf(__ite((offset <= 1), term(ghostsum,1,x), ridentity),__ite((offset <= 0), term(ghostsum,0,x), ridentity)), ghostsum[x]))

#if defined(INC_ENDSPEC) && defined(BINOP_ADD)
#define downsweep_nooverflow(offset,result,ghostsum,x) \
  (__implies(isvertex(x,mul2(offset)), __add_noovfl(__ite((offset <= 1), term(ghostsum,1,x), ridentity), __ite((offset <= 0), term(ghostsum,0,x), ridentity))))

#define downsweep(offset,result,ghostsum,x) \
  (downsweep_core(offset,result,ghostsum,x) & downsweep_nooverflow(offset,result,ghostsum,x))
#else
#define downsweep(offset,result,ghostsum,x) \
  downsweep_core(offset,result,ghostsum,x)
#endif

#define downsweep_barrier(tid,offset,result,ghostsum) \
  (__implies((tid < 1) & (offset >= 1), downsweep(offset,result,ghostsum,ai_idx(2,tid))) & \
  __implies((tid < 2) & (offset >= 0), downsweep(offset,result,ghostsum,ai_idx(1,tid))) & \
  __implies((tid < 1) & (offset >= 1), downsweep(offset,result,ghostsum,bi_idx(2,tid))) & \
  __implies((tid < 2) & (offset == 0), downsweep(offset,result,ghostsum,bi_idx(1,tid))))

#define downsweep_d_offset \
  ((d == 1 & offset == 4) | (d == 2 & offset == 2) | (d == 4 & offset == 1))

// ---------------------------------------------------------------------------
// END SPECIFICATION
// ---------------------------------------------------------------------------
#define x2t(x) __ite_wrapper(dtype)(iseven(x), div2(x), div2((x-1)))

#define final_upsweep_barrier(tid,result,len) \
  (__implies((tid < 2), upsweep(/*offset=*/N,result,len,ai_idx(1,tid))) & \
  __implies((tid < 1), upsweep(/*offset=*/N,result,len,ai_idx(2,tid))) & \
  __implies((tid < 1), upsweep(/*offset=*/N,result,len,bi_idx(2,tid))))

#define final_downsweep_barrier(tid,result,len) \
  (__implies((tid < 2), downsweep(/*offset=*/0,result,ghostsum,ai_idx(1,tid))) & \
  __implies((tid < 2), downsweep(/*offset=*/0,result,ghostsum,bi_idx(1,tid))))

#if defined(SPEC_THREADWISE)
#define upsweep_instantiation \
  tid, other_tid
#elif defined(SPEC_ELEMENTWISE)
#define upsweep_instantiation \
  x2t(tid), x2t(other_tid)
#endif