/* Copyright 2022 RISC OS Open Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#pragma force_top_level
#pragma include_only_once

/* stdatomic.h: ISO 'C' (9899:2018) library header, sections 6.7.2.4, 6.7.3, and 7.17 */

#ifndef __stdatomic_h
#define __stdatomic_h

/*
 * stdatomic.h declares several atomic data types (mainly integers), and
 * operations that can be performed on them.
 *
 * Notes/restrictions about this implementation:
 *
 * - Due to limitations in the current version of the compiler, we support the
 *   _Atomic() type specifier (section 6.7.2.4), but not the _Atomic type
 *   qualifier (section 6.7.3).
 * - Due to limitations in the current version of the compiler, the
 *   ATOMIC_*_LOCK_FREE #defines (section 7.17.1) always report a value of 1
 *   ("type is sometimes lock-free", section 7.17.5)
 * - Due to limitations in the current version of the compiler, all atomic
 *   operations are performed via library function calls
 * - For correct operation, all atomic types and pointer arguments must be
 *   aligned to their size, with the exception of non-atomic 64-bit integers
 *   which can be word aligned.
 *   - Note that the current version of the compiler/library can't automatically
 *     provide the necessary 8-byte alignment for 64-bit atomic types. You must
 *     therefore take care to ensure the correct alignment yourself.
 * - For correct operation, atomic types must only be placed in normal,
 *   cacheable memory, or as otherwise required by the ARMv6+ load/store
 *   exclusive instructions.
 *
 * For atomic_flag:
 *
 * - On ARM2a and newer CPUs, the atomic_flag implementation uses either the SWP
 *   or LDREX/STREX instructions, making it fully lock-free and atomic across
 *   all execution environments: threads, C signal handlers, IRQ or FIQ
 *   handlers, RISC OS callback handlers, RISC OS event handlers, SWI and abort
 *   handlers, etc.
 * - On ARM2 CPUs, the atomic_flag implementation enforces atomicity by
 *   temporarily disabling interrupts, and isn't atomic for FIQ handlers.
 *
 * For the other atomic types:
 *
 * - On ARMv6K and newer CPUs, the library functions are implemented using the
 *   LDREX/STREX family of instructions. This makes them fully lock-free and
 *   atomic across all execution environments: threads, C signal handlers, IRQ
 *   or FIQ handlers, RISC OS callback handlers, RISC OS event handlers, SWI and
 *   abort handlers, etc.
 * - On older CPUs, none of the types are atomic from the perspective of FIQ
 *   handlers, and the routines typically enforce atomicity by temporarily
 *   disabling interrupts.
 *   - If full atomicity isn't required, an alternative implementation can be
 *     selected by setting the _kernel_INITFLAG_UNSAFE_ATOMICS initialisation
 *     flag (see kernel.h). This will cause the Shared C Library to implement
 *     the atomic operations using a SWP-based spinlock instead of disabling
 *     interrupts. This should significantly improve performance for usermode
 *     code (by avoiding the need for SWI calls to disable/enable interrupts),
 *     but it has the major drawback that the routines will only be safe to use
 *     from foreground threads. Attempting to use them from other environments
 *     (C signal handlers, RISC OS IRQ handlers, etc.) is unsafe and may cause
 *     incorrect behaviour or deadlocks. The C18 specification does allow for
 *     this kind of unsafe behaviour (see sections 5.1.2.3 and 7.14.1.1), but
 *     for safety the library defaults to the slower but safer implementation.
 *   - On ARM2, there is no SWP instruction, so the UNSAFE_ATOMICS flag is
 *     ignored and the IRQ-based routines will be used regardless.
 */


#include <stddef.h> /* ptrdiff_t, size_t, wchar_t */
#include <stdint.h> /* assorted integer types */
#include <uchar.h> /* char16_t, char32_t */

#ifdef __cplusplus
extern "C"
{
#endif

#define ATOMIC_BOOL_LOCK_FREE     (1)
#define ATOMIC_CHAR_LOCK_FREE     (1)
#define ATOMIC_CHAR16_T_LOCK_FREE (1)
#define ATOMIC_CHAR32_T_LOCK_FREE (1)
#define ATOMIC_WCHAR_T_LOCK_FREE  (1)
#define ATOMIC_SHORT_LOCK_FREE    (1)
#define ATOMIC_INT_LOCK_FREE      (1)
#define ATOMIC_LONG_LOCK_FREE     (1)
#define ATOMIC_LLONG_LOCK_FREE    (1)
#define ATOMIC_POINTER_LOCK_FREE  (1)

/*
 * Use an enum value mapping that allows the implementation to quickly check
 * if a barrier is needed on load (bit 0 set) or store (bit 1 set).
 *
 * Even though the implementation only cares about the bottom two bits, we
 * still ensure each enum member is given a unique value, to avoid breaking any
 * user code which requires them to have unique values.
 */
typedef enum
{
    memory_order_relaxed = 0, /* Do nothing */
    memory_order_consume = 1, /* Barrier after load */
    memory_order_acquire = 5, /* Barrier after load */
    memory_order_release = 2, /* Barrier before store */
    memory_order_acq_rel = 3, /* Barrier after first load, barrier before first store */
    memory_order_seq_cst = 7, /* Barrier after first load, barrier before first store */
} memory_order;

/*
 * Future versions of the compiler may have built-in support for _Atomic and
 * _Atomic(); make sure that we only define our no-op macro version on older
 * compilers (pre-C11 or __STDC_NO_ATOMICS__ defined).
 *
 * Also note that Norcroft won't allow us to have both _Atomic and _Atomic() as
 * macros, so for older compilers we can only support one of them.
 */
#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112) || defined(__STDC_NO_ATOMICS__)
#define _Atomic(X) X
#endif

typedef _Atomic(_Bool)              atomic_bool;
typedef _Atomic(char)               atomic_char;
typedef _Atomic(signed char)        atomic_schar;
typedef _Atomic(unsigned char)      atomic_uchar;
typedef _Atomic(short)              atomic_short;
typedef _Atomic(unsigned short)     atomic_ushort;
typedef _Atomic(int)                atomic_int;
typedef _Atomic(unsigned int)       atomic_uint;
typedef _Atomic(long)               atomic_long;
typedef _Atomic(unsigned long)      atomic_ulong;
typedef _Atomic(long long)          atomic_llong;
typedef _Atomic(unsigned long long) atomic_ullong;
typedef _Atomic(char16_t)           atomic_char16_t;
typedef _Atomic(char32_t)           atomic_char32_t;
typedef _Atomic(wchar_t)            atomic_wchar_t;

typedef _Atomic(int_least8_t)       atomic_int_least8_t;
typedef _Atomic(uint_least8_t)      atomic_uint_least8_t;
typedef _Atomic(int_least16_t)      atomic_int_least16_t;
typedef _Atomic(uint_least16_t)     atomic_uint_least16_t;
typedef _Atomic(int_least32_t)      atomic_int_least32_t;
typedef _Atomic(uint_least32_t)     atomic_uint_least32_t;
typedef _Atomic(int_least64_t)      atomic_int_least64_t;
typedef _Atomic(uint_least64_t)     atomic_uint_least64_t;

typedef _Atomic(int_fast8_t)        atomic_int_fast8_t;
typedef _Atomic(uint_fast8_t)       atomic_uint_fast8_t;
typedef _Atomic(int_fast16_t)       atomic_int_fast16_t;
typedef _Atomic(uint_fast16_t)      atomic_uint_fast16_t;
typedef _Atomic(int_fast32_t)       atomic_int_fast32_t;
typedef _Atomic(uint_fast32_t)      atomic_uint_fast32_t;
typedef _Atomic(int_fast64_t)       atomic_int_fast64_t;
typedef _Atomic(uint_fast64_t)      atomic_uint_fast64_t;

typedef _Atomic(intptr_t)           atomic_intptr_t;
typedef _Atomic(uintptr_t)          atomic_uintptr_t;
typedef _Atomic(size_t)             atomic_size_t;
typedef _Atomic(ptrdiff_t)          atomic_ptrdiff_t;
typedef _Atomic(intmax_t)           atomic_intmax_t;
typedef _Atomic(uintmax_t)          atomic_uintmax_t;

/*
 * Define atomic_flag as 4 byte struct, so that:
 * (a) We can use LDREX/STREX instead of SWP(B) on base ARMv6 machines
 * (b) _Generic will consider it to be different from the other atomic types
 */
typedef struct { int value; } atomic_flag;

/*
 * With the current compiler, this can only really be a no-op. However,
 * allow for future compilers (even ones which don't fully support atomics)
 * to predefine it to something useful.
 */
#if !defined(kill_dependency)
#define kill_dependency(Y) (Y)
#endif

#define ATOMIC_VAR_INIT(C) (C)

/*
 * void atomic_init(volatile A* obj, C desired);
 * This isn't required to be atomic, so we can just do a direct assignment
 */
#define atomic_init(obj,desired) ((void) (*(obj) = (desired)))

/*
 * _Bool atomic_is_lock_free(const volatile A* obj);
 */
extern _Bool _kernel_atomic_is_lock_free(int type);
#define atomic_is_lock_free(obj) _kernel_atomic_is_lock_free(sizeof(*(obj)))

/*
 * void atomic_store(volatile A* obj, C desired);
 * void atomic_store_explicit(volatile A* obj, C desired, memory_order order);
 */
extern void _kernel_atomic_store_1(volatile void* obj, memory_order order, uint8_t desired);
extern void _kernel_atomic_store_2(volatile void* obj, memory_order order, uint16_t desired);
extern void _kernel_atomic_store_4(volatile void* obj, memory_order order, uint32_t desired);
extern void _kernel_atomic_store_8(volatile void* obj, memory_order order, uint64_t desired);
#define atomic_store(obj,desired) atomic_store_explicit(obj,desired,memory_order_seq_cst)
/*
 * For improved performance on pre-ARMv6K, it's possible the compiler will pad
 * atomic_short to 4 bytes, as this will allow atomic_store to be a simple
 * STR instruction instead of needing to disable interrupts. Detect the size of
 * the type here and map the operation to the appropriate _kernel_atomic
 * function.
 */
#define atomic_store_explicit(obj,desired,order) (_Generic(*(obj) \
, atomic_bool   : _kernel_atomic_store_1(obj, order, (_Bool) (desired)) \
, atomic_char   : _kernel_atomic_store_1(obj, order, desired) \
, atomic_schar  : _kernel_atomic_store_1(obj, order, desired) \
, atomic_uchar  : _kernel_atomic_store_1(obj, order, desired) \
, atomic_short  : ((sizeof(atomic_short) == 2) \
                   ? _kernel_atomic_store_2(obj, order, desired) \
                   : _kernel_atomic_store_4(obj, order, desired)) \
, atomic_ushort : ((sizeof(atomic_ushort) == 2) \
                   ? _kernel_atomic_store_2(obj, order, desired) \
                   : _kernel_atomic_store_4(obj, order, desired)) \
, atomic_int    : _kernel_atomic_store_4(obj, order, desired) \
, atomic_uint   : _kernel_atomic_store_4(obj, order, desired) \
, atomic_long   : _kernel_atomic_store_4(obj, order, desired) \
, atomic_ulong  : _kernel_atomic_store_4(obj, order, desired) \
, atomic_llong  : _kernel_atomic_store_8(obj, order, desired) \
, atomic_ullong : _kernel_atomic_store_8(obj, order, desired) \
))

/*
 * C atomic_load(volatile A* obj);
 * C atomic_load_explicit(volatile A* obj, memory_order order);
 */
extern uint8_t _kernel_atomic_load_1(volatile void* obj, memory_order order);
extern uint16_t _kernel_atomic_load_2(volatile void* obj, memory_order order);
extern uint32_t _kernel_atomic_load_4(volatile void* obj, memory_order order);
extern uint64_t _kernel_atomic_load_8(volatile void* obj, memory_order order);
#define atomic_load(obj) atomic_load_explicit(obj,memory_order_seq_cst)
#define atomic_load_explicit(obj,order) (_Generic(*(obj) \
, atomic_bool   : (_Bool             ) _kernel_atomic_load_1(obj, order) \
, atomic_char   : (char              ) _kernel_atomic_load_1(obj, order) \
, atomic_schar  : (signed char       ) _kernel_atomic_load_1(obj, order) \
, atomic_uchar  : (unsigned char     ) _kernel_atomic_load_1(obj, order) \
, atomic_short  : (short             ) _kernel_atomic_load_2(obj, order) \
, atomic_ushort : (unsigned short    ) _kernel_atomic_load_2(obj, order) \
, atomic_int    : (int               ) _kernel_atomic_load_4(obj, order) \
, atomic_uint   : (unsigned int      ) _kernel_atomic_load_4(obj, order) \
, atomic_long   : (long              ) _kernel_atomic_load_4(obj, order) \
, atomic_ulong  : (unsigned long     ) _kernel_atomic_load_4(obj, order) \
, atomic_llong  : (long long         ) _kernel_atomic_load_8(obj, order) \
, atomic_ullong : (unsigned long long) _kernel_atomic_load_8(obj, order) \
))

/*
 * C atomic_exchange(volatile A* obj, C desired);
 * C atomic_exchange_explicit(volatile A* obj, C desired, memory_order order);
 */
extern uint8_t _kernel_atomic_exchange_1(volatile void* obj, memory_order order, uint8_t desired);
extern uint16_t _kernel_atomic_exchange_2(volatile void* obj, memory_order order, uint16_t desired);
extern uint32_t _kernel_atomic_exchange_4(volatile void* obj, memory_order order, uint32_t desired);
extern uint64_t _kernel_atomic_exchange_8(volatile void* obj, memory_order order, uint64_t desired);
#define atomic_exchange(obj,desired) atomic_exchange_explicit(obj,desired,memory_order_seq_cst)
/*
 * For improved performance on pre-ARMv6K, it's possible the compiler will pad
 * atomic_short to 4 bytes, as this will allow atomic_exchange to be a simple
 * SWP instruction instead of needing to disable interrupts. Detect the size of
 * the type here and map the operation to the appropriate _kernel_atomic
 * function.
 */
#define atomic_exchange_explicit(obj,desired,order) (_Generic(*(obj) \
, atomic_bool   : (_Bool             ) _kernel_atomic_exchange_1(obj, order, (_Bool) (desired)) \
, atomic_char   : (char              ) _kernel_atomic_exchange_1(obj, order, desired) \
, atomic_schar  : (signed char       ) _kernel_atomic_exchange_1(obj, order, desired) \
, atomic_uchar  : (unsigned char     ) _kernel_atomic_exchange_1(obj, order, desired) \
, atomic_short  : ((sizeof(atomic_short) == 2) \
                   ? (short          ) _kernel_atomic_exchange_2(obj, order, desired) \
                   : (short          ) _kernel_atomic_exchange_4(obj, order, desired)) \
, atomic_ushort : ((sizeof(atomic_ushort) == 2) \
                   ? (unsigned short ) _kernel_atomic_exchange_2(obj, order, desired) \
                   : (unsigned short ) _kernel_atomic_exchange_4(obj, order, desired)) \
, atomic_int    : (int               ) _kernel_atomic_exchange_4(obj, order, desired) \
, atomic_uint   : (unsigned int      ) _kernel_atomic_exchange_4(obj, order, desired) \
, atomic_long   : (long              ) _kernel_atomic_exchange_4(obj, order, desired) \
, atomic_ulong  : (unsigned long     ) _kernel_atomic_exchange_4(obj, order, desired) \
, atomic_llong  : (long long         ) _kernel_atomic_exchange_8(obj, order, desired) \
, atomic_ullong : (unsigned long long) _kernel_atomic_exchange_8(obj, order, desired) \
))

/*
 * _Bool atomic_compare_exchange_weak(volatile A* obj, C* expected, C desired);
 * _Bool atomic_compare_exchange_strong(volatile A* obj, C* expected, C desired);
 * _Bool atomic_compare_exchange_weak_explicit(volatile A* obj, C* expected, C desired, memory_order succ, memory_order fail);
 * _Bool atomic_compare_exchange_strong_explicit(volatile A* obj, C* expected, C desired, memory_order succ, memory_order fail);
 */
extern _Bool _kernel_atomic_compare_exchange_weak_1(volatile void* obj, int orders, volatile void* expected, uint8_t desired);
extern _Bool _kernel_atomic_compare_exchange_weak_2(volatile void* obj, int orders, volatile void* expected, uint16_t desired);
extern _Bool _kernel_atomic_compare_exchange_weak_4(volatile void* obj, int orders, volatile void* expected, uint32_t desired);
extern _Bool _kernel_atomic_compare_exchange_weak_8(volatile void* obj, int orders, volatile void* expected, uint64_t desired);
extern _Bool _kernel_atomic_compare_exchange_strong_1(volatile void* obj, int orders, volatile void* expected, uint8_t desired);
extern _Bool _kernel_atomic_compare_exchange_strong_2(volatile void* obj, int orders, volatile void* expected, uint16_t desired);
extern _Bool _kernel_atomic_compare_exchange_strong_4(volatile void* obj, int orders, volatile void* expected, uint32_t desired);
extern _Bool _kernel_atomic_compare_exchange_strong_8(volatile void* obj, int orders, volatile void* expected, uint64_t desired);
#define atomic_compare_exchange_weak(obj,expected,desired) atomic_compare_exchange_weak_explicit(obj,expected,desired,memory_order_seq_cst,memory_order_seq_cst)
#define atomic_compare_exchange_strong(obj,expected,desired) atomic_compare_exchange_strong_explicit(obj,expected,desired,memory_order_seq_cst,memory_order_seq_cst)
#define atomic_compare_exchange_weak_explicit(obj,expected,desired,succ,fail) (_Generic(*(obj) \
, atomic_bool   : _kernel_atomic_compare_exchange_weak_1(obj, (succ)+((fail)<<4), expected, (_Bool) (desired)) \
, atomic_char   : _kernel_atomic_compare_exchange_weak_1(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_schar  : _kernel_atomic_compare_exchange_weak_1(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_uchar  : _kernel_atomic_compare_exchange_weak_1(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_short  : _kernel_atomic_compare_exchange_weak_2(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_ushort : _kernel_atomic_compare_exchange_weak_2(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_int    : _kernel_atomic_compare_exchange_weak_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_uint   : _kernel_atomic_compare_exchange_weak_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_long   : _kernel_atomic_compare_exchange_weak_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_ulong  : _kernel_atomic_compare_exchange_weak_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_llong  : _kernel_atomic_compare_exchange_weak_8(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_ullong : _kernel_atomic_compare_exchange_weak_8(obj, (succ)+((fail)<<4), expected, desired) \
))
#define atomic_compare_exchange_strong_explicit(obj,expected,desired,succ,fail) (_Generic(*(obj) \
, atomic_bool   : _kernel_atomic_compare_exchange_strong_1(obj, (succ)+((fail)<<4), expected, (_Bool) (desired)) \
, atomic_char   : _kernel_atomic_compare_exchange_strong_1(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_schar  : _kernel_atomic_compare_exchange_strong_1(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_uchar  : _kernel_atomic_compare_exchange_strong_1(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_short  : _kernel_atomic_compare_exchange_strong_2(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_ushort : _kernel_atomic_compare_exchange_strong_2(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_int    : _kernel_atomic_compare_exchange_strong_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_uint   : _kernel_atomic_compare_exchange_strong_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_long   : _kernel_atomic_compare_exchange_strong_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_ulong  : _kernel_atomic_compare_exchange_strong_4(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_llong  : _kernel_atomic_compare_exchange_strong_8(obj, (succ)+((fail)<<4), expected, desired) \
, atomic_ullong : _kernel_atomic_compare_exchange_strong_8(obj, (succ)+((fail)<<4), expected, desired) \
))

/*
 * C atomic_fetch_add(volatile A* obj, M arg);
 * C atomic_fetch_add_explicit(volatile A* obj, M arg, memory_order order);
 */
extern uint8_t _kernel_atomic_fetch_add_1(volatile void* obj, memory_order order, uint8_t arg);
extern uint16_t _kernel_atomic_fetch_add_2(volatile void* obj, memory_order order, uint16_t arg);
extern uint32_t _kernel_atomic_fetch_add_4(volatile void* obj, memory_order order, uint32_t arg);
extern uint64_t _kernel_atomic_fetch_add_8(volatile void* obj, memory_order order, uint64_t arg);
#define atomic_fetch_add(obj,arg) atomic_fetch_add_explicit(obj,arg,memory_order_seq_cst)
#define atomic_fetch_add_explicit(obj,arg,order) (_Generic(*(obj) \
, atomic_char   : (char              ) _kernel_atomic_fetch_add_1(obj, order, arg) \
, atomic_schar  : (signed char       ) _kernel_atomic_fetch_add_1(obj, order, arg) \
, atomic_uchar  : (unsigned char     ) _kernel_atomic_fetch_add_1(obj, order, arg) \
, atomic_short  : (short             ) _kernel_atomic_fetch_add_2(obj, order, arg) \
, atomic_ushort : (unsigned short    ) _kernel_atomic_fetch_add_2(obj, order, arg) \
, atomic_int    : (int               ) _kernel_atomic_fetch_add_4(obj, order, arg) \
, atomic_uint   : (unsigned int      ) _kernel_atomic_fetch_add_4(obj, order, arg) \
, atomic_long   : (long              ) _kernel_atomic_fetch_add_4(obj, order, arg) \
, atomic_ulong  : (unsigned long     ) _kernel_atomic_fetch_add_4(obj, order, arg) \
, atomic_llong  : (long long         ) _kernel_atomic_fetch_add_8(obj, order, arg) \
, atomic_ullong : (unsigned long long) _kernel_atomic_fetch_add_8(obj, order, arg) \
))

/*
 * C atomic_fetch_sub(volatile A* obj, M arg);
 * C atomic_fetch_sub_explicit(volatile A* obj, M arg, memory_order order);
 * We can reduce the number of library routines needed by mapping this to atomic_fetch_add
 */
#define atomic_fetch_sub(obj,arg) atomic_fetch_sub_explicit(obj,arg,memory_order_seq_cst)
#define atomic_fetch_sub_explicit(obj,arg,order) (_Generic(*(obj) \
, atomic_char   : (char              ) _kernel_atomic_fetch_add_1(obj, order, -((char              ) (arg))) \
, atomic_schar  : (signed char       ) _kernel_atomic_fetch_add_1(obj, order, -((signed char       ) (arg))) \
, atomic_uchar  : (unsigned char     ) _kernel_atomic_fetch_add_1(obj, order, -((unsigned char     ) (arg))) \
, atomic_short  : (short             ) _kernel_atomic_fetch_add_2(obj, order, -((short             ) (arg))) \
, atomic_ushort : (unsigned short    ) _kernel_atomic_fetch_add_2(obj, order, -((unsigned short    ) (arg))) \
, atomic_int    : (int               ) _kernel_atomic_fetch_add_4(obj, order, -((int               ) (arg))) \
, atomic_uint   : (unsigned int      ) _kernel_atomic_fetch_add_4(obj, order, -((unsigned int      ) (arg))) \
, atomic_long   : (long              ) _kernel_atomic_fetch_add_4(obj, order, -((long              ) (arg))) \
, atomic_ulong  : (unsigned long     ) _kernel_atomic_fetch_add_4(obj, order, -((unsigned long     ) (arg))) \
, atomic_llong  : (long long         ) _kernel_atomic_fetch_add_8(obj, order, -((long long         ) (arg))) \
, atomic_ullong : (unsigned long long) _kernel_atomic_fetch_add_8(obj, order, -((unsigned long long) (arg))) \
))

/*
 * C atomic_fetch_or(volatile A* obj, M arg);
 * C atomic_fetch_or_explicit(volatile A* obj, M arg, memory_order order);
 */
extern uint8_t _kernel_atomic_fetch_or_1(volatile void* obj, memory_order order, uint8_t arg);
extern uint16_t _kernel_atomic_fetch_or_2(volatile void* obj, memory_order order, uint16_t arg);
extern uint32_t _kernel_atomic_fetch_or_4(volatile void* obj, memory_order order, uint32_t arg);
extern uint64_t _kernel_atomic_fetch_or_8(volatile void* obj, memory_order order, uint64_t arg);
#define atomic_fetch_or(obj,arg) atomic_fetch_or_explicit(obj,arg,memory_order_seq_cst)
#define atomic_fetch_or_explicit(obj,arg,order) (_Generic(*(obj) \
, atomic_char   : (char              ) _kernel_atomic_fetch_or_1(obj, order, arg) \
, atomic_schar  : (signed char       ) _kernel_atomic_fetch_or_1(obj, order, arg) \
, atomic_uchar  : (unsigned char     ) _kernel_atomic_fetch_or_1(obj, order, arg) \
, atomic_short  : (short             ) _kernel_atomic_fetch_or_2(obj, order, arg) \
, atomic_ushort : (unsigned short    ) _kernel_atomic_fetch_or_2(obj, order, arg) \
, atomic_int    : (int               ) _kernel_atomic_fetch_or_4(obj, order, arg) \
, atomic_uint   : (unsigned int      ) _kernel_atomic_fetch_or_4(obj, order, arg) \
, atomic_long   : (long              ) _kernel_atomic_fetch_or_4(obj, order, arg) \
, atomic_ulong  : (unsigned long     ) _kernel_atomic_fetch_or_4(obj, order, arg) \
, atomic_llong  : (long long         ) _kernel_atomic_fetch_or_8(obj, order, arg) \
, atomic_ullong : (unsigned long long) _kernel_atomic_fetch_or_8(obj, order, arg) \
))

/*
 * C atomic_fetch_xor(volatile A* obj, M arg);
 * C atomic_fetch_xor_explicit(volatile A* obj, M arg, memory_order order);
 */
extern uint8_t _kernel_atomic_fetch_xor_1(volatile void* obj, memory_order order, uint8_t arg);
extern uint16_t _kernel_atomic_fetch_xor_2(volatile void* obj, memory_order order, uint16_t arg);
extern uint32_t _kernel_atomic_fetch_xor_4(volatile void* obj, memory_order order, uint32_t arg);
extern uint64_t _kernel_atomic_fetch_xor_8(volatile void* obj, memory_order order, uint64_t arg);
#define atomic_fetch_xor(obj,arg) atomic_fetch_xor_explicit(obj,arg,memory_order_seq_cst)
#define atomic_fetch_xor_explicit(obj,arg,order) (_Generic(*(obj) \
, atomic_char   : (char              ) _kernel_atomic_fetch_xor_1(obj, order, arg) \
, atomic_schar  : (signed char       ) _kernel_atomic_fetch_xor_1(obj, order, arg) \
, atomic_uchar  : (unsigned char     ) _kernel_atomic_fetch_xor_1(obj, order, arg) \
, atomic_short  : (short             ) _kernel_atomic_fetch_xor_2(obj, order, arg) \
, atomic_ushort : (unsigned short    ) _kernel_atomic_fetch_xor_2(obj, order, arg) \
, atomic_int    : (int               ) _kernel_atomic_fetch_xor_4(obj, order, arg) \
, atomic_uint   : (unsigned int      ) _kernel_atomic_fetch_xor_4(obj, order, arg) \
, atomic_long   : (long              ) _kernel_atomic_fetch_xor_4(obj, order, arg) \
, atomic_ulong  : (unsigned long     ) _kernel_atomic_fetch_xor_4(obj, order, arg) \
, atomic_llong  : (long long         ) _kernel_atomic_fetch_xor_8(obj, order, arg) \
, atomic_ullong : (unsigned long long) _kernel_atomic_fetch_xor_8(obj, order, arg) \
))

/*
 * C atomic_fetch_and(volatile A* obj, M arg);
 * C atomic_fetch_and_explicit(volatile A* obj, M arg, memory_order order);
 */
extern uint8_t _kernel_atomic_fetch_and_1(volatile void* obj, memory_order order, uint8_t arg);
extern uint16_t _kernel_atomic_fetch_and_2(volatile void* obj, memory_order order, uint16_t arg);
extern uint32_t _kernel_atomic_fetch_and_4(volatile void* obj, memory_order order, uint32_t arg);
extern uint64_t _kernel_atomic_fetch_and_8(volatile void* obj, memory_order order, uint64_t arg);
#define atomic_fetch_and(obj,arg) atomic_fetch_and_explicit(obj,arg,memory_order_seq_cst)
#define atomic_fetch_and_explicit(obj,arg,order) (_Generic(*(obj) \
, atomic_char   : (char              ) _kernel_atomic_fetch_and_1(obj, order, arg) \
, atomic_schar  : (signed char       ) _kernel_atomic_fetch_and_1(obj, order, arg) \
, atomic_uchar  : (unsigned char     ) _kernel_atomic_fetch_and_1(obj, order, arg) \
, atomic_short  : (short             ) _kernel_atomic_fetch_and_2(obj, order, arg) \
, atomic_ushort : (unsigned short    ) _kernel_atomic_fetch_and_2(obj, order, arg) \
, atomic_int    : (int               ) _kernel_atomic_fetch_and_4(obj, order, arg) \
, atomic_uint   : (unsigned int      ) _kernel_atomic_fetch_and_4(obj, order, arg) \
, atomic_long   : (long              ) _kernel_atomic_fetch_and_4(obj, order, arg) \
, atomic_ulong  : (unsigned long     ) _kernel_atomic_fetch_and_4(obj, order, arg) \
, atomic_llong  : (long long         ) _kernel_atomic_fetch_and_8(obj, order, arg) \
, atomic_ullong : (unsigned long long) _kernel_atomic_fetch_and_8(obj, order, arg) \
))


/*
 * void atomic_thread_fence(memory_order order);
 */
extern void _kernel_atomic_thread_fence(memory_order order);
#define atomic_thread_fence _kernel_atomic_thread_fence

/*
 * void atomic_signal_fence(memory_order order);
 * A compiler scheduling barrier should be sufficient
 */
#define atomic_signal_fence(X) __schedule_barrier()


#define ATOMIC_FLAG_INIT (atomic_flag) {0}

/*
 * _Bool atomic_flag_test_and_set(volatile atomic_flag *obj);
 * _Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *obj, memory_order order);
 */
extern _Bool _kernel_atomic_flag_test_and_set_explicit(volatile atomic_flag *obj, memory_order order);
#define atomic_flag_test_and_set(obj) atomic_flag_test_and_set_explicit(obj,memory_order_seq_cst)
#define atomic_flag_test_and_set_explicit _kernel_atomic_flag_test_and_set_explicit

/*
 * void atomic_flag_clear(volatile atomic_flag *obj);
 * void atomic_flag_clear_explicit(volatile atomic_flag *obj, memory_order order);
 */
extern void _kernel_atomic_flag_clear_explicit(volatile atomic_flag *obj, memory_order order);
#define atomic_flag_clear(obj) atomic_flag_clear_explicit(obj,memory_order_seq_cst)
#define atomic_flag_clear_explicit _kernel_atomic_flag_clear_explicit

#ifdef __cplusplus
}
#endif

#endif
/* end of stdatomic.h */
