/*
 * Copyright 2011-2024   NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */

#if !defined(_CUPTI_METRIC_H_)
#define _CUPTI_METRIC_H_

#include <cuda.h>
#include <string.h>
#include <cuda_stdint.h>
#include <cupti_result.h>

#ifndef CUPTIAPI
#ifdef _WIN32
#define CUPTIAPI __stdcall
#else
#define CUPTIAPI
#endif
#endif

#if defined(__cplusplus)
extern "C" {
#endif

#if defined(__GNUC__) && defined(CUPTI_LIB)
    #pragma GCC visibility push(default)
#endif

/**
 * \defgroup CUPTI_METRIC_API CUPTI Metric API
 * Functions, types, and enums that implement the CUPTI Metric API.
 *
 * \note The CUPTI metric API from the header cupti_metrics.h is not supported on devices
 * with compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
 * This API is deprecated in CUDA 12.8 release and will be removed in a future CUDA release.
 * This is replaced by the host profiling API in the header cupti_profiler_host.h and
 * target profiling API in the header cupti_range_profiler.h which are supported on
 * devices with compute capability 7.0 and higher (i.e. Volta and later GPU architectures).
 *
 * @{
 */

/**
 * \brief ID for a metric.
 *
 * A metric provides a measure of some aspect of the device.
 */
typedef uint32_t CUpti_MetricID;

/**
 * \brief A metric category.
 *
 * Each metric is assigned to a category that represents the general
 * type of the metric. A metric's category is accessed using \ref
 * cuptiMetricGetAttribute and the CUPTI_METRIC_ATTR_CATEGORY
 * attribute.
 */
typedef enum {
  /**
   * A memory related metric.
   */
  CUPTI_METRIC_CATEGORY_MEMORY          = 0,
  /**
   * An instruction related metric.
   */
  CUPTI_METRIC_CATEGORY_INSTRUCTION     = 1,
  /**
   * A multiprocessor related metric.
   */
  CUPTI_METRIC_CATEGORY_MULTIPROCESSOR  = 2,
  /**
   * A cache related metric.
   */
  CUPTI_METRIC_CATEGORY_CACHE           = 3,
  /**
   * A texture related metric.
   */
  CUPTI_METRIC_CATEGORY_TEXTURE         = 4,
  /**
   *A Nvlink related metric.
   */
  CUPTI_METRIC_CATEGORY_NVLINK          = 5,
  /**
   *A PCIe related metric.
   */
  CUPTI_METRIC_CATEGORY_PCIE           = 6,
  CUPTI_METRIC_CATEGORY_FORCE_INT                         = 0x7fffffff,
} CUpti_MetricCategory;

/**
 * \brief A metric evaluation mode.
 *
 * A metric can be evaluated per hardware instance to know the load balancing
 * across instances of a domain or the metric can be evaluated in aggregate mode
 * when the events involved in metric evaluation are from different event
 * domains. It might be possible to evaluate some metrics in both
 * modes for convenience. A metric's evaluation mode is accessed using \ref
 * CUpti_MetricEvaluationMode and the CUPTI_METRIC_ATTR_EVALUATION_MODE
 * attribute.
 */
typedef enum {
  /**
   * If this bit is set, the metric can be profiled for each instance of the
   * domain. The event values passed to \ref cuptiMetricGetValue can contain
   * values for one instance of the domain. And \ref cuptiMetricGetValue can
   * be called for each instance.
   */
  CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE         = 1,
  /**
   * If this bit is set, the metric can be profiled over all instances. The
   * event values passed to \ref cuptiMetricGetValue can be aggregated values
   * of events for all instances of the domain.
   */
  CUPTI_METRIC_EVALUATION_MODE_AGGREGATE            = 1 << 1,
  CUPTI_METRIC_EVALUATION_MODE_FORCE_INT            = 0x7fffffff,
} CUpti_MetricEvaluationMode;

/**
 * \brief Kinds of metric values.
 *
 * Metric values can be one of several different kinds. Corresponding
 * to each kind is a member of the CUpti_MetricValue union. The metric
 * value returned by \ref cuptiMetricGetValue should be accessed using
 * the appropriate member of that union based on its value kind.
 */
typedef enum {
  /**
   * The metric value is a 64-bit double.
   */
  CUPTI_METRIC_VALUE_KIND_DOUBLE            = 0,
  /**
   * The metric value is a 64-bit unsigned integer.
   */
  CUPTI_METRIC_VALUE_KIND_UINT64            = 1,
  /**
   * The metric value is a percentage represented by a 64-bit
   * double. For example, 57.5% is represented by the value 57.5.
   */
  CUPTI_METRIC_VALUE_KIND_PERCENT           = 2,
  /**
   * The metric value is a throughput represented by a 64-bit
   * integer. The unit for throughput values is bytes/second.
   */
  CUPTI_METRIC_VALUE_KIND_THROUGHPUT        = 3,
  /**
   * The metric value is a 64-bit signed integer.
   */
  CUPTI_METRIC_VALUE_KIND_INT64             = 4,
  /**
   * The metric value is a utilization level, as represented by
   * CUpti_MetricValueUtilizationLevel.
   */
  CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL = 5,

  CUPTI_METRIC_VALUE_KIND_FORCE_INT  = 0x7fffffff
} CUpti_MetricValueKind;

/**
 * \brief Enumeration of utilization levels for metrics values of kind
 * CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL. Utilization values can
 * vary from IDLE (0) to MAX (10) but the enumeration only provides
 * specific names for a few values.
 */
typedef enum {
  CUPTI_METRIC_VALUE_UTILIZATION_IDLE      = 0,
  CUPTI_METRIC_VALUE_UTILIZATION_LOW       = 2,
  CUPTI_METRIC_VALUE_UTILIZATION_MID       = 5,
  CUPTI_METRIC_VALUE_UTILIZATION_HIGH      = 8,
  CUPTI_METRIC_VALUE_UTILIZATION_MAX       = 10,
  CUPTI_METRIC_VALUE_UTILIZATION_FORCE_INT = 0x7fffffff
} CUpti_MetricValueUtilizationLevel;

/**
 * \brief Metric attributes.
 *
 * Metric attributes describe properties of a metric. These attributes
 * can be read using \ref cuptiMetricGetAttribute.
 */
typedef enum {
  /**
   * Metric name. Value is a null terminated const c-string.
   */
  CUPTI_METRIC_ATTR_NAME              = 0,
  /**
   * Short description of metric. Value is a null terminated const c-string.
   */
  CUPTI_METRIC_ATTR_SHORT_DESCRIPTION = 1,
  /**
   * Long description of metric. Value is a null terminated const c-string.
   */
  CUPTI_METRIC_ATTR_LONG_DESCRIPTION  = 2,
  /**
   * Category of the metric. Value is of type CUpti_MetricCategory.
   */
  CUPTI_METRIC_ATTR_CATEGORY          = 3,
  /**
   * Value type of the metric. Value is of type CUpti_MetricValueKind.
   */
  CUPTI_METRIC_ATTR_VALUE_KIND          = 4,
  /**
   * Metric evaluation mode. Value is of type CUpti_MetricEvaluationMode.
   */
  CUPTI_METRIC_ATTR_EVALUATION_MODE     = 5,
  CUPTI_METRIC_ATTR_FORCE_INT         = 0x7fffffff,
} CUpti_MetricAttribute;

/**
 * \brief A metric value.
 *
 * Metric values can be one of several different kinds. Corresponding
 * to each kind is a member of the CUpti_MetricValue union. The metric
 * value returned by \ref cuptiMetricGetValue should be accessed using
 * the appropriate member of that union based on its value kind.
 */
typedef union {
  /*
   * Value for CUPTI_METRIC_VALUE_KIND_DOUBLE.
   */
  double metricValueDouble;
  /*
   * Value for CUPTI_METRIC_VALUE_KIND_UINT64.
   */
  uint64_t metricValueUint64;
  /*
   * Value for CUPTI_METRIC_VALUE_KIND_INT64.
   */
  int64_t metricValueInt64;
  /*
   * Value for CUPTI_METRIC_VALUE_KIND_PERCENT. For example, 57.5% is
   * represented by the value 57.5.
   */
  double metricValuePercent;
  /*
   * Value for CUPTI_METRIC_VALUE_KIND_THROUGHPUT.  The unit for
   * throughput values is bytes/second.
   */
  uint64_t metricValueThroughput;
  /*
   * Value for CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL.
   */
  CUpti_MetricValueUtilizationLevel metricValueUtilizationLevel;
} CUpti_MetricValue;

/**
 * \brief Device class.
 *
 * Enumeration of device classes for metric property
 * CUPTI_METRIC_PROPERTY_DEVICE_CLASS.
 */
typedef enum {
  CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TESLA          = 0,
  CUPTI_METRIC_PROPERTY_DEVICE_CLASS_QUADRO         = 1,
  CUPTI_METRIC_PROPERTY_DEVICE_CLASS_GEFORCE        = 2,
  CUPTI_METRIC_PROPERTY_DEVICE_CLASS_TEGRA          = 3,
} CUpti_MetricPropertyDeviceClass;

/**
 * \brief Metric device properties.
 *
 * Metric device properties describe device properties which are needed for a metric.
 * Some of these properties can be collected using cuDeviceGetAttribute.
 */
typedef enum {
  /*
   * Number of multiprocessors on a device.  This can be collected
   * using value of \param CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT of
   * cuDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_MULTIPROCESSOR_COUNT,
  /*
   * Maximum number of warps on a multiprocessor. This can be
   * collected using ratio of value of \param
   * CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR and \param
   * CU_DEVICE_ATTRIBUTE_WARP_SIZE of cuDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_WARPS_PER_MULTIPROCESSOR,
  /*
   * GPU Time for kernel in ns. This should be profiled using CUPTI
   * Activity API.
   */
  CUPTI_METRIC_PROPERTY_KERNEL_GPU_TIME,
  /*
   * Clock rate for device in KHz.  This should be collected using
   * value of \param CU_DEVICE_ATTRIBUTE_CLOCK_RATE of
   * cuDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_CLOCK_RATE,
  /*
   * Number of Frame buffer units for device. This should be collected
   * using value of \param CUPTI_DEVICE_ATTRIBUTE_MAX_FRAME_BUFFERS of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_FRAME_BUFFER_COUNT,
  /*
   * Global memory bandwidth in KBytes/sec. This should be collected
   * using value of \param CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH
   * of cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_GLOBAL_MEMORY_BANDWIDTH,
  /*
   * PCIE link rate in Mega bits/sec. This should be collected using
   * value of \param CUPTI_DEVICE_ATTR_PCIE_LINK_RATE of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_PCIE_LINK_RATE,
  /*
   * PCIE link width for device. This should be collected using
   * value of \param CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_PCIE_LINK_WIDTH,
  /*
   * PCIE generation for device. This should be collected using
   * value of \param CUPTI_DEVICE_ATTR_PCIE_GEN of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_PCIE_GEN,
  /*
   * The device class. This should be collected using
   * value of \param CUPTI_DEVICE_ATTR_DEVICE_CLASS of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_DEVICE_CLASS,
  /*
   * Peak single precision floating point operations that
   * can be performed in one cycle by the device.
   * This should be collected using value of
   * \param CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_FLOP_SP_PER_CYCLE,
  /*
   * Peak double precision floating point operations that
   * can be performed in one cycle by the device.
   * This should be collected using value of
   * \param CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_FLOP_DP_PER_CYCLE,
  /*
   * Number of L2 units on a device. This can be collected
   * using value of \param CUPTI_DEVICE_ATTR_MAX_L2_UNITS of
   * cuDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_L2_UNITS,
  /*
   * Whether ECC support is enabled on the device. This can be
   * collected using value of \param CU_DEVICE_ATTRIBUTE_ECC_ENABLED of
   * cuDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_ECC_ENABLED,
  /*
   * Peak half precision floating point operations that
   * can be performed in one cycle by the device.
   * This should be collected using value of
   * \param CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_FLOP_HP_PER_CYCLE,
  /*
   * NVLINK Bandwitdh for device. This should be collected
   * using value of \param CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW of
   * cuptiDeviceGetAttribute.
   */
  CUPTI_METRIC_PROPERTY_GPU_CPU_NVLINK_BANDWIDTH,
} CUpti_MetricPropertyID;

/**
 * \brief Get the total number of metrics available on any device.
 *
 * Returns the total number of metrics available on any CUDA-capable
 * devices.
 *
 * \param numMetrics Returns the number of metrics
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numMetrics is NULL
*/
CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t *numMetrics);

/**
 * \brief Get all the metrics available on any device.
 *
 * Returns the metric IDs in \p metricArray for all CUDA-capable
 * devices.  The size of the \p metricArray buffer is given by \p
 * *arraySizeBytes. The size of the \p metricArray buffer must be at
 * least \p numMetrics * sizeof(CUpti_MetricID) or all metric IDs will
 * not be returned. The value returned in \p *arraySizeBytes contains
 * the number of bytes returned in \p metricArray.
 *
 * \param arraySizeBytes The size of \p metricArray in bytes, and
 * returns the number of bytes written to \p metricArray
 * \param metricArray Returns the IDs of the metrics
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
 * \p metricArray are NULL
*/
CUptiResult CUPTIAPI cuptiEnumMetrics(size_t *arraySizeBytes,
                                      CUpti_MetricID *metricArray);

/**
 * \brief Get the number of metrics for a device.
 *
 * Returns the number of metrics available for a device.
 *
 * \param device The CUDA device
 * \param numMetrics Returns the number of metrics available for the
 * device
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_DEVICE
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numMetrics is NULL
 */
CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device,
                                              uint32_t *numMetrics);

/**
 * \brief Get the metrics for a device.
 *
 * Returns the metric IDs in \p metricArray for a device.  The size of
 * the \p metricArray buffer is given by \p *arraySizeBytes. The size
 * of the \p metricArray buffer must be at least \p numMetrics *
 * sizeof(CUpti_MetricID) or else all metric IDs will not be
 * returned. The value returned in \p *arraySizeBytes contains the
 * number of bytes returned in \p metricArray.
 *
 * \param device The CUDA device
 * \param arraySizeBytes The size of \p metricArray in bytes, and
 * returns the number of bytes written to \p metricArray
 * \param metricArray Returns the IDs of the metrics for the device
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_DEVICE
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
 * \p metricArray are NULL
 */
CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device,
                                            size_t *arraySizeBytes,
                                            CUpti_MetricID *metricArray);

/**
 * \brief Get a metric attribute.
 *
 * Returns a metric attribute in \p *value. The size of the \p
 * value buffer is given by \p *valueSize. The value returned in \p
 * *valueSize contains the number of bytes returned in \p value.
 *
 * If the attribute value is a c-string that is longer than \p
 * *valueSize, then only the first \p *valueSize characters will be
 * returned and there will be no terminating null byte.
 *
 * \param metric ID of the metric
 * \param attrib The metric attribute to read
 * \param valueSize The size of the \p value buffer in bytes, and
 * returns the number of bytes written to \p value
 * \param value Returns the attribute's value
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
 * is NULL, or if \p attrib is not a metric attribute
 * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
 * attribute values, indicates that the \p value buffer is too small
 * to hold the attribute value.
 */
CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric,
                                             CUpti_MetricAttribute attrib,
                                             size_t *valueSize,
                                             void *value);

/**
 * \brief Find an metric by name.
 *
 * Find a metric by name and return the metric ID in \p *metric.
 *
 * \param device The CUDA device
 * \param metricName The name of metric to find
 * \param metric Returns the ID of the found metric or undefined if
 * unable to find the metric
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_DEVICE
 * \retval CUPTI_ERROR_INVALID_METRIC_NAME if unable to find a metric
 * with name \p metricName. In this case \p *metric is undefined
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricName or \p
 * metric are NULL.
 */
CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device,
                                              const char *metricName,
                                              CUpti_MetricID *metric);

/**
 * \brief Get number of events required to calculate a metric.
 *
 * Returns the number of events in \p numEvents that are required to
 * calculate a metric.
 *
 * \param metric ID of the metric
 * \param numEvents Returns the number of events required for the metric
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL
 */
CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric,
                                             uint32_t *numEvents);

/**
 * \brief Get the events required to calculating a metric.
 *
 * Gets the event IDs in \p eventIdArray required to calculate a \p
 * metric. The size of the \p eventIdArray buffer is given by \p
 * *eventIdArraySizeBytes and must be at least \p numEvents *
 * sizeof(CUpti_EventID) or all events will not be returned. The value
 * returned in \p *eventIdArraySizeBytes contains the number of bytes
 * returned in \p eventIdArray.
 *
 * \param metric ID of the metric
 * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes,
 * and returns the number of bytes written to \p eventIdArray
 * \param eventIdArray Returns the IDs of the events required to
 * calculate \p metric
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArraySizeBytes or \p
 * eventIdArray are NULL.
 */
CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric,
                                           size_t *eventIdArraySizeBytes,
                                           CUpti_EventID *eventIdArray);

/**
 * \brief Get number of properties required to calculate a metric.
 *
 * Returns the number of properties in \p numProp that are required to
 * calculate a metric.
 *
 * \param metric ID of the metric
 * \param numProp Returns the number of properties required for the
 * metric
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numProp is NULL
 */
CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric,
                                                 uint32_t *numProp);

/**
 * \brief Get the properties required to calculating a metric.
 *
 * Gets the property IDs in \p propIdArray required to calculate a \p
 * metric. The size of the \p propIdArray buffer is given by \p
 * *propIdArraySizeBytes and must be at least \p numProp *
 * sizeof(CUpti_DeviceAttribute) or all properties will not be
 * returned. The value returned in \p *propIdArraySizeBytes contains
 * the number of bytes returned in \p propIdArray.
 *
 * \param metric ID of the metric
 * \param propIdArraySizeBytes The size of \p propIdArray in bytes,
 * and returns the number of bytes written to \p propIdArray
 * \param propIdArray Returns the IDs of the properties required to
 * calculate \p metric
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p propIdArraySizeBytes or \p
 * propIdArray are NULL.
 */
CUptiResult CUPTIAPI cuptiMetricEnumProperties(CUpti_MetricID metric,
                                               size_t *propIdArraySizeBytes,
                                               CUpti_MetricPropertyID *propIdArray);


/**
 * \brief For a metric get the groups of events that must be collected
 * in the same pass.
 *
 * For a metric get the groups of events that must be collected in the
 * same pass to ensure that the metric is calculated correctly. If the
 * events are not collected as specified then the metric value may be
 * inaccurate.
 *
 * The function returns NULL if a metric does not have any required
 * event group. In this case the events needed for the metric can be
 * grouped in any manner for collection.
 *
 * \param context The context for event collection
 * \param metric The metric ID
 * \param eventGroupSets Returns a CUpti_EventGroupSets object that
 * indicates the events that must be collected in the same pass to
 * ensure the metric is calculated correctly.  Returns NULL if no
 * grouping is required for metric
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 */
CUptiResult CUPTIAPI cuptiMetricGetRequiredEventGroupSets(CUcontext context,
                                                          CUpti_MetricID metric,
                                                          CUpti_EventGroupSets **eventGroupSets);

/**
 * \brief For a set of metrics, get the grouping that indicates the
 * number of passes and the event groups necessary to collect the
 * events required for those metrics.
 *
 * For a set of metrics, get the grouping that indicates the number of
 * passes and the event groups necessary to collect the events
 * required for those metrics.
 *
 * \see cuptiEventGroupSetsCreate for details on event group set
 * creation.
 *
 * \param context The context for event collection
 * \param metricIdArraySizeBytes Size of the metricIdArray in bytes
 * \param metricIdArray Array of metric IDs
 * \param eventGroupPasses Returns a CUpti_EventGroupSets object that
 * indicates the number of passes required to collect the events and
 * the events to collect on each pass
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_CONTEXT
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricIdArray or
 * \p eventGroupPasses is NULL
 */
CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets(CUcontext context,
                                                     size_t metricIdArraySizeBytes,
                                                     CUpti_MetricID *metricIdArray,
                                                     CUpti_EventGroupSets **eventGroupPasses);

/**
 * \brief Calculate the value for a metric.
 *
 * Use the events collected for a metric to calculate the metric
 * value. Metric value evaluation depends on the evaluation mode
 * \ref CUpti_MetricEvaluationMode that the metric supports.
 * If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE,
 * then it assumes that the input event value is for one domain instance.
 * If a metric has evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE,
 * it assumes that input event values are
 * normalized to represent all domain instances on a device. For the
 * most accurate metric collection, the events required for the metric
 * should be collected for all profiled domain instances. For example,
 * to collect all instances of an event, set the
 * CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on
 * the group containing the event to 1. The normalized value for the
 * event is then: (\p sum_event_values * \p totalInstanceCount) / \p
 * instanceCount, where \p sum_event_values is the summation of the
 * event values across all profiled domain instances, \p
 * totalInstanceCount is obtained from querying
 * CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and \p instanceCount
 * is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or
 * CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT).
 *
 * \param device The CUDA device that the metric is being calculated for
 * \param metric The metric ID
 * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes
 * \param eventIdArray The event IDs required to calculate \p metric
 * \param eventValueArraySizeBytes The size of \p eventValueArray in bytes
 * \param eventValueArray The normalized event values required to
 * calculate \p metric. The values must be order to match the order of
 * events in \p eventIdArray
 * \param timeDuration The duration over which the events were
 * collected, in ns
 * \param metricValue Returns the value for the metric
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_OPERATION
 * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the
 * eventIdArray does not contain all the events needed for metric
 * \retval CUPTI_ERROR_INVALID_EVENT_VALUE if any of the
 * event values required for the metric is CUPTI_EVENT_OVERFLOW
 * \retval CUPTI_ERROR_INVALID_METRIC_VALUE if the computed metric value
 * cannot be represented in the metric's value type. For example,
 * if the metric value type is unsigned and the computed metric value is negative
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricValue,
 * \p eventIdArray or \p eventValueArray is NULL
 */
CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device,
                                         CUpti_MetricID metric,
                                         size_t eventIdArraySizeBytes,
                                         CUpti_EventID *eventIdArray,
                                         size_t eventValueArraySizeBytes,
                                         uint64_t *eventValueArray,
                                         uint64_t timeDuration,
                                         CUpti_MetricValue *metricValue);

/**
 * \brief Calculate the value for a metric.
 *
 * Use the events and properties collected for a metric to calculate
 * the metric value. Metric value evaluation depends on the evaluation
 * mode \ref CUpti_MetricEvaluationMode that the metric supports.  If
 * a metric has evaluation mode as
 * CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, then it assumes that the
 * input event value is for one domain instance.  If a metric has
 * evaluation mode as CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, it
 * assumes that input event values are normalized to represent all
 * domain instances on a device. For the most accurate metric
 * collection, the events required for the metric should be collected
 * for all profiled domain instances. For example, to collect all
 * instances of an event, set the
 * CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES attribute on
 * the group containing the event to 1. The normalized value for the
 * event is then: (\p sum_event_values * \p totalInstanceCount) / \p
 * instanceCount, where \p sum_event_values is the summation of the
 * event values across all profiled domain instances, \p
 * totalInstanceCount is obtained from querying
 * CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT and \p instanceCount
 * is obtained from querying CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT (or
 * CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT).
 *
 * \param metric The metric ID
 * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes
 * \param eventIdArray The event IDs required to calculate \p metric
 * \param eventValueArraySizeBytes The size of \p eventValueArray in bytes
 * \param eventValueArray The normalized event values required to
 * calculate \p metric. The values must be order to match the order of
 * events in \p eventIdArray
 * \param propIdArraySizeBytes The size of \p propIdArray in bytes
 * \param propIdArray The metric property IDs required to calculate \p metric
 * \param propValueArraySizeBytes The size of \p propValueArray in bytes
 * \param propValueArray The metric property values required to
 * calculate \p metric. The values must be order to match the order of
 * metric properties in \p propIdArray
 * \param metricValue Returns the value for the metric
 *
 * \retval CUPTI_SUCCESS
 * \retval CUPTI_ERROR_NOT_INITIALIZED
 * \retval CUPTI_ERROR_INVALID_METRIC_ID
 * \retval CUPTI_ERROR_INVALID_OPERATION
 * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if the
 * eventIdArray does not contain all the events needed for metric
 * \retval CUPTI_ERROR_INVALID_EVENT_VALUE if any of the
 * event values required for the metric is CUPTI_EVENT_OVERFLOW
 * \retval CUPTI_ERROR_NOT_COMPATIBLE if the computed metric value
 * cannot be represented in the metric's value type. For example,
 * if the metric value type is unsigned and the computed metric value is negative
 * \retval CUPTI_ERROR_INVALID_PARAMETER if \p metricValue,
 * \p eventIdArray or \p eventValueArray is NULL
 */
CUptiResult CUPTIAPI cuptiMetricGetValue2(CUpti_MetricID metric,
                                          size_t eventIdArraySizeBytes,
                                          CUpti_EventID *eventIdArray,
                                          size_t eventValueArraySizeBytes,
                                          uint64_t *eventValueArray,
                                          size_t propIdArraySizeBytes,
                                          CUpti_MetricPropertyID *propIdArray,
                                          size_t propValueArraySizeBytes,
                                          uint64_t *propValueArray,
                                          CUpti_MetricValue *metricValue);

/** @} */ /* END CUPTI_METRIC_API */

#if defined(__GNUC__) && defined(CUPTI_LIB)
    #pragma GCC visibility pop
#endif

#if defined(__cplusplus)
}
#endif

#endif /*_CUPTI_METRIC_H_*/


