/*
 * Copyright (c) 2019     The Universiy of Tennessee and The Universiy
 *                        of Tennessee Research Foundation. All rights
 *                        reserved.
 */
#include "stencil_internal.h"

/**
 * @brief stencil_1D init operator
 *
 * @param [in] es: execution stream
 * @param [in] descA: tiled matrix date descriptor
 * @param [inout] A:  inout data
 * @param [in] uplo: matrix shape
 * @param [in] m: tile row index
 * @param [in] n: tile column index
 * @param [in] args: R
 */
int stencil_1D_init_ops(parsec_execution_stream_t *es,
                        const parsec_tiled_matrix_dc_t *descA,
                        void *_A, enum matrix_uplo uplo,
                        int m, int n, void *args)
{
    DTYPE *A = (DTYPE *)_A;
    int R = ((int *)args)[0];

    for(int j = R; j < descA->nb - R; j++)
        for(int i = 0; i < descA->mb; i++)
            A[j*descA->mb+i] = (DTYPE)1.0 * i + (DTYPE)1.0 * j;

    for(int j = 0; j < R; j++)
        for(int i = 0; i < descA->mb; i++)
            A[j*descA->mb+i] = (DTYPE)0.0;

    for(int j = descA->nb - R; j < descA->nb; j++)
        for(int i = 0; i < descA->mb; i++)
            A[j*descA->mb+i] = (DTYPE)0.0;
    (void)es; (void)uplo; (void)m; (void)n;
    return 0;
}

/**
 * @brief CORE Kernel of Stencil 1D
 *
 * @param [out] OUT: output data
 * @param [in] IN: input data
 * @param [in] weight_1D: weight
 * @param [in] mb: row tile size
 * @param [in] nb: column tile size
 * @param [in] lda: lda
 * @param [in] R: radius of ghost region
 * @see modified based on https://github.com/ParRes/Kernels/
 */
void CORE_stencil_1D(DTYPE *restrict _OUT, const DTYPE *restrict _IN,
                     const DTYPE *restrict weight_1D, const int mb,
                     const int nb, const int lda, const int R) {
    for(int j = R; j < nb-R; j++) {
        for(int i = 0; i < mb; i++) {
#if LOOPGEN
                /* This file is generated by ./loop_gen_1D R */
                #include "loop_body_1D.in"
#else
                OUT_2D(i, j) = WEIGHT_1D(-R) * IN_2D(i, j-R);
                for (int jj = -R+1; jj <= R; jj++) {
                    OUT_2D(i, j) += WEIGHT_1D(jj) * IN_2D(i, j+jj);
                }
#endif
        }
    }
}
